]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'v28-range-hrtimers-for-linus-v2' of git://git.kernel.org/pub/scm/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Oct 2008 17:53:02 +0000 (10:53 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Oct 2008 17:53:02 +0000 (10:53 -0700)
* 'v28-range-hrtimers-for-linus-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (37 commits)
  hrtimers: add missing docbook comments to struct hrtimer
  hrtimers: simplify hrtimer_peek_ahead_timers()
  hrtimers: fix docbook comments
  DECLARE_PER_CPU needs linux/percpu.h
  hrtimers: fix typo
  rangetimers: fix the bug reported by Ingo for real
  rangetimer: fix BUG_ON reported by Ingo
  rangetimer: fix x86 build failure for the !HRTIMERS case
  select: fix alpha OSF wrapper
  select: fix alpha OSF wrapper
  hrtimer: peek at the timer queue just before going idle
  hrtimer: make the futex() system call use the per process slack value
  hrtimer: make the nanosleep() syscall use the per process slack
  hrtimer: fix signed/unsigned bug in slack estimator
  hrtimer: show the timer ranges in /proc/timer_list
  hrtimer: incorporate feedback from Peter Zijlstra
  hrtimer: add a hrtimer_start_range() function
  hrtimer: another build fix
  hrtimer: fix build bug found by Ingo
  hrtimer: make select() and poll() use the hrtimer range feature
  ...

1  2 
arch/alpha/kernel/osf_sys.c
arch/powerpc/oprofile/cell/spu_profiler.c
drivers/cpuidle/cpuidle.c
fs/compat.c
include/linux/sched.h
kernel/sched.c

index f25f6c490952f79d55aab060e5e9c8d9abed9bed,8e19acbf288681634cef88f13e189fd2da2c6e49..18a3ea1aac51376882d0bd8cb50503b2d0442ee3
@@@ -165,11 -165,14 +165,11 @@@ osf_getdirentries(unsigned int fd, stru
        buf.error = 0;
  
        error = vfs_readdir(file, osf_filldir, &buf);
 -      if (error < 0)
 -              goto out_putf;
 -
 -      error = buf.error;
 +      if (error >= 0)
 +              error = buf.error;
        if (count != buf.count)
                error = count - buf.count;
  
 - out_putf:
        fput(file);
   out:
        return error;
@@@ -983,10 -986,12 +983,12 @@@ asmlinkage in
  osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
           struct timeval32 __user *tvp)
  {
-       s64 timeout = MAX_SCHEDULE_TIMEOUT;
+       struct timespec end_time, *to = NULL;
        if (tvp) {
                time_t sec, usec;
  
+               to = &end_time;
                if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
                    || __get_user(sec, &tvp->tv_sec)
                    || __get_user(usec, &tvp->tv_usec)) {
                if (sec < 0 || usec < 0)
                        return -EINVAL;
  
-               if ((unsigned long) sec < MAX_SELECT_SECONDS) {
-                       timeout = (usec + 1000000/HZ - 1) / (1000000/HZ);
-                       timeout += sec * (unsigned long) HZ;
-               }
+               if (poll_select_set_timeout(to, sec, usec * NSEC_PER_USEC))
+                       return -EINVAL;         
        }
  
        /* OSF does not copy back the remaining time.  */
-       return core_sys_select(n, inp, outp, exp, &timeout);
+       return core_sys_select(n, inp, outp, exp, to);
  }
  
  struct rusage32 {
index 6edaebd5099a7104703bcc620ace0e6c3768b47a,02ffe060db51d4f449395ab3823579ba4c20e6af..dd499c3e9da769cb87109b910d3b8b36c79137a1
  
  static u32 *samples;
  
 -static int spu_prof_running;
 +int spu_prof_running;
  static unsigned int profiling_interval;
  
  #define NUM_SPU_BITS_TRBUF 16
  #define SPUS_PER_TB_ENTRY   4
 -#define SPUS_PER_NODE      8
  
  #define SPU_PC_MASK        0xFFFF
  
@@@ -195,7 -196,7 +195,7 @@@ int start_spu_profiling(unsigned int cy
        pr_debug("timer resolution: %lu\n", TICK_NSEC);
        kt = ktime_set(0, profiling_interval);
        hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-       timer.expires = kt;
+       hrtimer_set_expires(&timer, kt);
        timer.function = profile_spus;
  
        /* Allocate arrays for collecting SPU PC samples */
  
        spu_prof_running = 1;
        hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
 +      schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
  
        return 0;
  }
index bb6e3b338043e13e8e87bc8fceaad67e8f6a9dcc,2e3148499368b8b965e6845c7627caa6ce621be4..5bed73329ef825e2f423f04f4540d57330d30b73
@@@ -16,6 -16,7 +16,7 @@@
  #include <linux/cpu.h>
  #include <linux/cpuidle.h>
  #include <linux/ktime.h>
+ #include <linux/hrtimer.h>
  
  #include "cpuidle.h"
  
@@@ -56,14 -57,16 +57,20 @@@ static void cpuidle_idle_call(void
                if (pm_idle_old)
                        pm_idle_old();
                else
 +#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
 +                      default_idle();
 +#else
                        local_irq_enable();
 +#endif
                return;
        }
  
+       /*
+        * run any timers that can be run now, at this point
+        * before calculating the idle duration etc.
+        */
+       hrtimer_peek_ahead_timers();
        /* ask the governor for the next state */
        next_state = cpuidle_curr_governor->select(dev);
        if (need_resched())
        target_state = &dev->states[next_state];
  
        /* enter the state and update stats */
 -      dev->last_residency = target_state->enter(dev, target_state);
        dev->last_state = target_state;
 +      dev->last_residency = target_state->enter(dev, target_state);
 +      if (dev->last_state)
 +              target_state = dev->last_state;
 +
        target_state->time += (unsigned long long)dev->last_residency;
        target_state->usage++;
  
diff --combined fs/compat.c
index cb36245f9fe0af7b7df70f54cd6f601ff10f9f51,3b58c32be526301998fe7d0698bbdf98ac84d3a2..fe3c9bf876089f1d71f7b45b92fb5110693eb599
@@@ -869,7 -869,7 +869,7 @@@ asmlinkage long compat_sys_old_readdir(
        buf.dirent = dirent;
  
        error = vfs_readdir(file, compat_fillonedir, &buf);
 -      if (error >= 0)
 +      if (buf.result)
                error = buf.result;
  
        fput(file);
@@@ -956,8 -956,9 +956,8 @@@ asmlinkage long compat_sys_getdents(uns
        buf.error = 0;
  
        error = vfs_readdir(file, compat_filldir, &buf);
 -      if (error < 0)
 -              goto out_putf;
 -      error = buf.error;
 +      if (error >= 0)
 +              error = buf.error;
        lastdirent = buf.previous;
        if (lastdirent) {
                if (put_user(file->f_pos, &lastdirent->d_off))
                else
                        error = count - buf.count;
        }
 -
 -out_putf:
        fput(file);
  out:
        return error;
@@@ -1044,16 -1047,19 +1044,16 @@@ asmlinkage long compat_sys_getdents64(u
        buf.error = 0;
  
        error = vfs_readdir(file, compat_filldir64, &buf);
 -      if (error < 0)
 -              goto out_putf;
 -      error = buf.error;
 +      if (error >= 0)
 +              error = buf.error;
        lastdirent = buf.previous;
        if (lastdirent) {
                typeof(lastdirent->d_off) d_off = file->f_pos;
 -              error = -EFAULT;
                if (__put_user_unaligned(d_off, &lastdirent->d_off))
 -                      goto out_putf;
 -              error = count - buf.count;
 +                      error = -EFAULT;
 +              else
 +                      error = count - buf.count;
        }
 -
 -out_putf:
        fput(file);
  out:
        return error;
@@@ -1469,6 -1475,57 +1469,57 @@@ out_ret
  
  #define __COMPAT_NFDBITS       (8 * sizeof(compat_ulong_t))
  
+ static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
+                                     int timeval, int ret)
+ {
+       struct timespec ts;
+       if (!p)
+               return ret;
+       if (current->personality & STICKY_TIMEOUTS)
+               goto sticky;
+       /* No update for zero timeout */
+       if (!end_time->tv_sec && !end_time->tv_nsec)
+               return ret;
+       ktime_get_ts(&ts);
+       ts = timespec_sub(*end_time, ts);
+       if (ts.tv_sec < 0)
+               ts.tv_sec = ts.tv_nsec = 0;
+       if (timeval) {
+               struct compat_timeval rtv;
+               rtv.tv_sec = ts.tv_sec;
+               rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
+               if (!copy_to_user(p, &rtv, sizeof(rtv)))
+                       return ret;
+       } else {
+               struct compat_timespec rts;
+               rts.tv_sec = ts.tv_sec;
+               rts.tv_nsec = ts.tv_nsec;
+               if (!copy_to_user(p, &rts, sizeof(rts)))
+                       return ret;
+       }
+       /*
+        * If an application puts its timeval in read-only memory, we
+        * don't want the Linux-specific update to the timeval to
+        * cause a fault after the select has completed
+        * successfully. However, because we're not updating the
+        * timeval, we can't restart the system call.
+        */
+ sticky:
+       if (ret == -ERESTARTNOHAND)
+               ret = -EINTR;
+       return ret;
+ }
  /*
   * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
   * 64-bit unsigned longs.
@@@ -1550,7 -1607,8 +1601,8 @@@ int compat_set_fd_set(unsigned long nr
        ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
  
  int compat_core_sys_select(int n, compat_ulong_t __user *inp,
-       compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout)
+       compat_ulong_t __user *outp, compat_ulong_t __user *exp,
+       struct timespec *end_time)
  {
        fd_set_bits fds;
        void *bits;
        zero_fd_set(n, fds.res_out);
        zero_fd_set(n, fds.res_ex);
  
-       ret = do_select(n, &fds, timeout);
+       ret = do_select(n, &fds, end_time);
  
        if (ret < 0)
                goto out;
@@@ -1623,7 -1681,7 +1675,7 @@@ asmlinkage long compat_sys_select(int n
        compat_ulong_t __user *outp, compat_ulong_t __user *exp,
        struct compat_timeval __user *tvp)
  {
-       s64 timeout = -1;
+       struct timespec end_time, *to = NULL;
        struct compat_timeval tv;
        int ret;
  
                if (copy_from_user(&tv, tvp, sizeof(tv)))
                        return -EFAULT;
  
-               if (tv.tv_sec < 0 || tv.tv_usec < 0)
+               to = &end_time;
+               if (poll_select_set_timeout(to, tv.tv_sec,
+                                           tv.tv_usec * NSEC_PER_USEC))
                        return -EINVAL;
-               /* Cast to u64 to make GCC stop complaining */
-               if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
-                       timeout = -1;   /* infinite */
-               else {
-                       timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ);
-                       timeout += tv.tv_sec * HZ;
-               }
        }
  
-       ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
-       if (tvp) {
-               struct compat_timeval rtv;
-               if (current->personality & STICKY_TIMEOUTS)
-                       goto sticky;
-               rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
-               rtv.tv_sec = timeout;
-               if (compat_timeval_compare(&rtv, &tv) >= 0)
-                       rtv = tv;
-               if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
- sticky:
-                       /*
-                        * If an application puts its timeval in read-only
-                        * memory, we don't want the Linux-specific update to
-                        * the timeval to cause a fault after the select has
-                        * completed successfully. However, because we're not
-                        * updating the timeval, we can't restart the system
-                        * call.
-                        */
-                       if (ret == -ERESTARTNOHAND)
-                               ret = -EINTR;
-               }
-       }
+       ret = compat_core_sys_select(n, inp, outp, exp, to);
+       ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
  
        return ret;
  }
@@@ -1680,15 -1709,16 +1703,16 @@@ asmlinkage long compat_sys_pselect7(in
  {
        compat_sigset_t ss32;
        sigset_t ksigmask, sigsaved;
-       s64 timeout = MAX_SCHEDULE_TIMEOUT;
        struct compat_timespec ts;
+       struct timespec end_time, *to = NULL;
        int ret;
  
        if (tsp) {
                if (copy_from_user(&ts, tsp, sizeof(ts)))
                        return -EFAULT;
  
-               if (ts.tv_sec < 0 || ts.tv_nsec < 0)
+               to = &end_time;
+               if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
                        return -EINVAL;
        }
  
                sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
        }
  
-       do {
-               if (tsp) {
-                       if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
-                               timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
-                               timeout += ts.tv_sec * (unsigned long)HZ;
-                               ts.tv_sec = 0;
-                               ts.tv_nsec = 0;
-                       } else {
-                               ts.tv_sec -= MAX_SELECT_SECONDS;
-                               timeout = MAX_SELECT_SECONDS * HZ;
-                       }
-               }
-               ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
-       } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
-       if (tsp) {
-               struct compat_timespec rts;
-               if (current->personality & STICKY_TIMEOUTS)
-                       goto sticky;
-               rts.tv_sec = timeout / HZ;
-               rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
-               if (rts.tv_nsec >= NSEC_PER_SEC) {
-                       rts.tv_sec++;
-                       rts.tv_nsec -= NSEC_PER_SEC;
-               }
-               if (compat_timespec_compare(&rts, &ts) >= 0)
-                       rts = ts;
-               if (copy_to_user(tsp, &rts, sizeof(rts))) {
- sticky:
-                       /*
-                        * If an application puts its timeval in read-only
-                        * memory, we don't want the Linux-specific update to
-                        * the timeval to cause a fault after the select has
-                        * completed successfully. However, because we're not
-                        * updating the timeval, we can't restart the system
-                        * call.
-                        */
-                       if (ret == -ERESTARTNOHAND)
-                               ret = -EINTR;
-               }
-       }
+       ret = compat_core_sys_select(n, inp, outp, exp, to);
+       ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
  
        if (ret == -ERESTARTNOHAND) {
                /*
@@@ -1792,18 -1779,16 +1773,16 @@@ asmlinkage long compat_sys_ppoll(struc
        compat_sigset_t ss32;
        sigset_t ksigmask, sigsaved;
        struct compat_timespec ts;
-       s64 timeout = -1;
+       struct timespec end_time, *to = NULL;
        int ret;
  
        if (tsp) {
                if (copy_from_user(&ts, tsp, sizeof(ts)))
                        return -EFAULT;
  
-               /* We assume that ts.tv_sec is always lower than
-                  the number of seconds that can be expressed in
-                  an s64. Otherwise the compiler bitches at us */
-               timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
-               timeout += ts.tv_sec * HZ;
+               to = &end_time;
+               if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+                       return -EINVAL;
        }
  
        if (sigmask) {
                sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
        }
  
-       ret = do_sys_poll(ufds, nfds, &timeout);
+       ret = do_sys_poll(ufds, nfds, to);
  
        /* We can restart this syscall, usually */
        if (ret == -EINTR) {
        } else if (sigmask)
                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
  
-       if (tsp && timeout >= 0) {
-               struct compat_timespec rts;
-               if (current->personality & STICKY_TIMEOUTS)
-                       goto sticky;
-               /* Yes, we know it's actually an s64, but it's also positive. */
-               rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
-                                       1000;
-               rts.tv_sec = timeout;
-               if (compat_timespec_compare(&rts, &ts) >= 0)
-                       rts = ts;
-               if (copy_to_user(tsp, &rts, sizeof(rts))) {
- sticky:
-                       /*
-                        * If an application puts its timeval in read-only
-                        * memory, we don't want the Linux-specific update to
-                        * the timeval to cause a fault after the select has
-                        * completed successfully. However, because we're not
-                        * updating the timeval, we can't restart the system
-                        * call.
-                        */
-                       if (ret == -ERESTARTNOHAND && timeout >= 0)
-                               ret = -EINTR;
-               }
-       }
+       ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
  
        return ret;
  }
diff --combined include/linux/sched.h
index 10bff55b0824f2cf7874484a46d6749ad7e2381a,9ee3bed0ff065108a2f52404c2705b88277730f3..5ca620573d47c35c707b115cdb44ca2fb243d519
@@@ -287,6 -287,7 +287,6 @@@ extern void trap_init(void)
  extern void account_process_tick(struct task_struct *task, int user);
  extern void update_process_times(int user);
  extern void scheduler_tick(void);
 -extern void hrtick_resched(void);
  
  extern void sched_show_task(struct task_struct *p);
  
@@@ -1345,6 -1346,12 +1345,12 @@@ struct task_struct 
        int latency_record_count;
        struct latency_record latency_record[LT_SAVECOUNT];
  #endif
+       /*
+        * time slack values; these are used to round up poll() and
+        * select() etc timeout values. These are in nanoseconds.
+        */
+       unsigned long timer_slack_ns;
+       unsigned long default_timer_slack_ns;
  };
  
  /*
@@@ -1664,7 -1671,6 +1670,7 @@@ extern unsigned int sysctl_sched_featur
  extern unsigned int sysctl_sched_migration_cost;
  extern unsigned int sysctl_sched_nr_migrate;
  extern unsigned int sysctl_sched_shares_ratelimit;
 +extern unsigned int sysctl_sched_shares_thresh;
  
  int sched_nr_latency_handler(struct ctl_table *table, int write,
                struct file *file, void __user *buffer, size_t *length,
diff --combined kernel/sched.c
index 945a97b9600ddcea35e267487a9737ff675d9c0e,bfa87918380ffa3005847c832a3fd86fdf1c54ce..1645c7211944b2c6fc23aeea9b890ddf85392289
@@@ -227,9 -227,8 +227,8 @@@ static void start_rt_bandwidth(struct r
  
                now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
                hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
-               hrtimer_start(&rt_b->rt_period_timer,
-                             rt_b->rt_period_timer.expires,
-                             HRTIMER_MODE_ABS);
+               hrtimer_start_expires(&rt_b->rt_period_timer,
+                               HRTIMER_MODE_ABS);
        }
        spin_unlock(&rt_b->rt_runtime_lock);
  }
@@@ -818,13 -817,6 +817,13 @@@ const_debug unsigned int sysctl_sched_n
   */
  unsigned int sysctl_sched_shares_ratelimit = 250000;
  
 +/*
 + * Inject some fuzzyness into changing the per-cpu group shares
 + * this avoids remote rq-locks at the expense of fairness.
 + * default: 4
 + */
 +unsigned int sysctl_sched_shares_thresh = 4;
 +
  /*
   * period over which we measure -rt task cpu usage in us.
   * default: 1s
@@@ -1071,7 -1063,7 +1070,7 @@@ static void hrtick_start(struct rq *rq
        struct hrtimer *timer = &rq->hrtick_timer;
        ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
  
-       timer->expires = time;
+       hrtimer_set_expires(timer, time);
  
        if (rq == this_rq()) {
                hrtimer_restart(timer);
@@@ -1461,8 -1453,8 +1460,8 @@@ static void __set_se_shares(struct sche
   * Calculate and set the cpu's group shares.
   */
  static void
 -__update_group_shares_cpu(struct task_group *tg, int cpu,
 -                        unsigned long sd_shares, unsigned long sd_rq_weight)
 +update_group_shares_cpu(struct task_group *tg, int cpu,
 +                      unsigned long sd_shares, unsigned long sd_rq_weight)
  {
        int boost = 0;
        unsigned long shares;
         *
         */
        shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
 +      shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
  
 -      /*
 -       * record the actual number of shares, not the boosted amount.
 -       */
 -      tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
 -      tg->cfs_rq[cpu]->rq_weight = rq_weight;
 +      if (abs(shares - tg->se[cpu]->load.weight) >
 +                      sysctl_sched_shares_thresh) {
 +              struct rq *rq = cpu_rq(cpu);
 +              unsigned long flags;
  
 -      if (shares < MIN_SHARES)
 -              shares = MIN_SHARES;
 -      else if (shares > MAX_SHARES)
 -              shares = MAX_SHARES;
 +              spin_lock_irqsave(&rq->lock, flags);
 +              /*
 +               * record the actual number of shares, not the boosted amount.
 +               */
 +              tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
 +              tg->cfs_rq[cpu]->rq_weight = rq_weight;
  
 -      __set_se_shares(tg->se[cpu], shares);
 +              __set_se_shares(tg->se[cpu], shares);
 +              spin_unlock_irqrestore(&rq->lock, flags);
 +      }
  }
  
  /*
@@@ -1538,8 -1526,14 +1537,8 @@@ static int tg_shares_up(struct task_gro
        if (!rq_weight)
                rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
  
 -      for_each_cpu_mask(i, sd->span) {
 -              struct rq *rq = cpu_rq(i);
 -              unsigned long flags;
 -
 -              spin_lock_irqsave(&rq->lock, flags);
 -              __update_group_shares_cpu(tg, i, shares, rq_weight);
 -              spin_unlock_irqrestore(&rq->lock, flags);
 -      }
 +      for_each_cpu_mask(i, sd->span)
 +              update_group_shares_cpu(tg, i, shares, rq_weight);
  
        return 0;
  }
@@@ -4448,8 -4442,12 +4447,8 @@@ need_resched_nonpreemptible
        if (sched_feat(HRTICK))
                hrtick_clear(rq);
  
 -      /*
 -       * Do the rq-clock update outside the rq lock:
 -       */
 -      local_irq_disable();
 +      spin_lock_irq(&rq->lock);
        update_rq_clock(rq);
 -      spin_lock(&rq->lock);
        clear_tsk_need_resched(prev);
  
        if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {