Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)
diff --combined include/linux/sched.h

index 5c38db536e07f8b7b0d6e5b0dc4f31a9e3cb07c1,4f59c8e8597d02f4961deb32e9df460e74d80a71..10bff55b0824f2cf7874484a46d6749ad7e2381a
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -287,7 -287,6 +287,6 @@@ extern void trap_init(void)
   extern void account_process_tick(struct task_struct *task, int user);
   extern void update_process_times(int user);
   extern void scheduler_tick(void);
- extern void hrtick_resched(void);
   
   extern void sched_show_task(struct task_struct *p);
   
@@@ -403,21 -402,12 +402,21 @@@ extern int get_dumpable(struct mm_struc
   #define MMF_DUMP_MAPPED_PRIVATE       4
   #define MMF_DUMP_MAPPED_SHARED        5
   #define MMF_DUMP_ELF_HEADERS  6
+ +#define MMF_DUMP_HUGETLB_PRIVATE 7
+ +#define MMF_DUMP_HUGETLB_SHARED  8
   #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
- -#define MMF_DUMP_FILTER_BITS  5
+ +#define MMF_DUMP_FILTER_BITS  7
   #define MMF_DUMP_FILTER_MASK \
         (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
   #define MMF_DUMP_FILTER_DEFAULT \
- -      ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED))
+ +      ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
+ +       (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+ +
+ +#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+ +# define MMF_DUMP_MASK_DEFAULT_ELF    (1 << MMF_DUMP_ELF_HEADERS)
+ +#else
+ +# define MMF_DUMP_MASK_DEFAULT_ELF    0
+ +#endif
   
   struct sighand_struct {
         atomic_t                count;
@@@ -434,39 -424,6 +433,39 @@@ struct pacct_struct 
         unsigned long           ac_minflt, ac_majflt;
   };
   
+ +/**
+ + * struct task_cputime - collected CPU time counts
+ + * @utime:            time spent in user mode, in &cputime_t units
+ + * @stime:            time spent in kernel mode, in &cputime_t units
+ + * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
+ + *
+ + * This structure groups together three kinds of CPU time that are
+ + * tracked for threads and thread groups.  Most things considering
+ + * CPU time want to group these counts together and treat all three
+ + * of them in parallel.
+ + */
+ +struct task_cputime {
+ +      cputime_t utime;
+ +      cputime_t stime;
+ +      unsigned long long sum_exec_runtime;
+ +};
+ +/* Alternate field names when used to cache expirations. */
+ +#define prof_exp      stime
+ +#define virt_exp      utime
+ +#define sched_exp     sum_exec_runtime
+ +
+ +/**
+ + * struct thread_group_cputime - thread group interval timer counts
+ + * @totals:           thread group interval timers; substructure for
+ + *                    uniprocessor kernel, per-cpu for SMP kernel.
+ + *
+ + * This structure contains the version of task_cputime, above, that is
+ + * used for thread group CPU clock calculations.
+ + */
+ +struct thread_group_cputime {
+ +      struct task_cputime *totals;
+ +};
+ +
   /*
    * NOTE! "signal_struct" does not have it's own
    * locking, because a shared signal_struct always
@@@ -512,17 -469,6 +511,17 @@@ struct signal_struct 
         cputime_t it_prof_expires, it_virt_expires;
         cputime_t it_prof_incr, it_virt_incr;
   
+ +      /*
+ +       * Thread group totals for process CPU clocks.
+ +       * See thread_group_cputime(), et al, for details.
+ +       */
+ +      struct thread_group_cputime cputime;
+ +
+ +      /* Earliest-expiration cache. */
+ +      struct task_cputime cputime_expires;
+ +
+ +      struct list_head cpu_timers[3];
+ +
         /* job control IDs */
   
         /*
@@@ -553,7 -499,7 +552,7 @@@
          * Live threads maintain their own counters and add to these
          * in __exit_signal, except for the group leader.
          */
- -      cputime_t utime, stime, cutime, cstime;
+ +      cputime_t cutime, cstime;
         cputime_t gtime;
         cputime_t cgtime;
         unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@@ -561,6 -507,14 +560,6 @@@
         unsigned long inblock, oublock, cinblock, coublock;
         struct task_io_accounting ioac;
   
- -      /*
- -       * Cumulative ns of scheduled CPU time for dead threads in the
- -       * group, not including a zombie group leader.  (This only differs
- -       * from jiffies_to_ns(utime + stime) if sched_clock uses something
- -       * other than jiffies.)
- -       */
- -      unsigned long long sum_sched_runtime;
- -
         /*
          * We don't bother to synchronize most readers of this at all,
          * because there is no reader checking a limit that actually needs
@@@ -572,6 -526,8 +571,6 @@@
          */
         struct rlimit rlim[RLIM_NLIMITS];
   
- -      struct list_head cpu_timers[3];
- -
         /* keep the process-shared keyrings here so that they do the right
          * thing in threads created with CLONE_THREAD */
   #ifdef CONFIG_KEYS
@@@ -1180,7 -1136,8 +1179,7 @@@ struct task_struct 
   /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
         unsigned long min_flt, maj_flt;
   
- -      cputime_t it_prof_expires, it_virt_expires;
- -      unsigned long long it_sched_expires;
+ +      struct task_cputime cputime_expires;
         struct list_head cpu_timers[3];
   
   /* process credentials */
@@@ -1630,7 -1587,6 +1629,7 @@@ extern unsigned long long cpu_clock(in
   
   extern unsigned long long
   task_sched_runtime(struct task_struct *task);
+ +extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
   
   /* sched_exec is called by processes performing an exec */
   #ifdef CONFIG_SMP
@@@ -1665,6 -1621,7 +1664,7 @@@ extern unsigned int sysctl_sched_featur
   extern unsigned int sysctl_sched_migration_cost;
   extern unsigned int sysctl_sched_nr_migrate;
   extern unsigned int sysctl_sched_shares_ratelimit;
+ extern unsigned int sysctl_sched_shares_thresh;
   
   int sched_nr_latency_handler(struct ctl_table *table, int write,
                 struct file *file, void __user *buffer, size_t *length,
@@@ -2127,30 -2084,6 +2127,30 @@@ static inline int spin_needbreak(spinlo
   #endif
   }
   
+ +/*
+ + * Thread group CPU time accounting.
+ + */
+ +
+ +extern int thread_group_cputime_alloc(struct task_struct *);
+ +extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
+ +
+ +static inline void thread_group_cputime_init(struct signal_struct *sig)
+ +{
+ +      sig->cputime.totals = NULL;
+ +}
+ +
+ +static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
+ +{
+ +      if (curr->signal->cputime.totals)
+ +              return 0;
+ +      return thread_group_cputime_alloc(curr);
+ +}
+ +
+ +static inline void thread_group_cputime_free(struct signal_struct *sig)
+ +{
+ +      free_percpu(sig->cputime.totals);
+ +}
+ +
   /*
    * Reevaluate whether the task has signals pending delivery.
    * Wake the task if so.
diff --combined kernel/sched.c

index d906f72b42d23ae1d8c2355d9b605e5fd0761eaa,11ca39017835d2d60309e4b72ca724da9d0b38a1..945a97b9600ddcea35e267487a9737ff675d9c0e
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -71,7 -71,6 +71,7 @@@
   #include <linux/debugfs.h>
   #include <linux/ctype.h>
   #include <linux/ftrace.h>
+ +#include <trace/sched.h>
   
   #include <asm/tlb.h>
   #include <asm/irq_regs.h>
@@@ -818,6 -817,13 +818,13 @@@ const_debug unsigned int sysctl_sched_n
    */
   unsigned int sysctl_sched_shares_ratelimit = 250000;
   
+ /*
+  * Inject some fuzzyness into changing the per-cpu group shares
+  * this avoids remote rq-locks at the expense of fairness.
+  * default: 4
+  */
+ unsigned int sysctl_sched_shares_thresh = 4;
+ 
   /*
    * period over which we measure -rt task cpu usage in us.
    * default: 1s
@@@ -1454,8 -1460,8 +1461,8 @@@ static void __set_se_shares(struct sche
    * Calculate and set the cpu's group shares.
    */
   static void
- __update_group_shares_cpu(struct task_group *tg, int cpu,
-                         unsigned long sd_shares, unsigned long sd_rq_weight)
+ update_group_shares_cpu(struct task_group *tg, int cpu,
+                       unsigned long sd_shares, unsigned long sd_rq_weight)
   {
         int boost = 0;
         unsigned long shares;
@@@ -1486,19 -1492,23 +1493,23 @@@
          *
          */
         shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+       shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
   
-       /*
-        * record the actual number of shares, not the boosted amount.
-        */
-       tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
-       tg->cfs_rq[cpu]->rq_weight = rq_weight;
+       if (abs(shares - tg->se[cpu]->load.weight) >
+                       sysctl_sched_shares_thresh) {
+               struct rq *rq = cpu_rq(cpu);
+               unsigned long flags;
   
-       if (shares < MIN_SHARES)
-               shares = MIN_SHARES;
-       else if (shares > MAX_SHARES)
-               shares = MAX_SHARES;
+               spin_lock_irqsave(&rq->lock, flags);
+               /*
+                * record the actual number of shares, not the boosted amount.
+                */
+               tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
+               tg->cfs_rq[cpu]->rq_weight = rq_weight;
   
-       __set_se_shares(tg->se[cpu], shares);
+               __set_se_shares(tg->se[cpu], shares);
+               spin_unlock_irqrestore(&rq->lock, flags);
+       }
   }
   
   /*
@@@ -1527,14 -1537,8 +1538,8 @@@ static int tg_shares_up(struct task_gro
         if (!rq_weight)
                 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
   
-       for_each_cpu_mask(i, sd->span) {
-               struct rq *rq = cpu_rq(i);
-               unsigned long flags;
- 
-               spin_lock_irqsave(&rq->lock, flags);
-               __update_group_shares_cpu(tg, i, shares, rq_weight);
-               spin_unlock_irqrestore(&rq->lock, flags);
-       }
+       for_each_cpu_mask(i, sd->span)
+               update_group_shares_cpu(tg, i, shares, rq_weight);
   
         return 0;
   }
@@@ -1937,7 -1941,6 +1942,7 @@@ unsigned long wait_task_inactive(struc
                  * just go back and repeat.
                  */
                 rq = task_rq_lock(p, &flags);
+ +              trace_sched_wait_task(rq, p);
                 running = task_running(rq, p);
                 on_rq = p->se.on_rq;
                 ncsw = 0;
@@@ -2299,7 -2302,9 +2304,7 @@@ out_activate
         success = 1;
   
   out_running:
- -      trace_mark(kernel_sched_wakeup,
- -              "pid %d state %ld ## rq %p task %p rq->curr %p",
- -              p->pid, p->state, rq, p, rq->curr);
+ +      trace_sched_wakeup(rq, p);
         check_preempt_curr(rq, p, sync);
   
         p->state = TASK_RUNNING;
@@@ -2432,7 -2437,9 +2437,7 @@@ void wake_up_new_task(struct task_struc
                 p->sched_class->task_new(rq, p);
                 inc_nr_running(rq);
         }
- -      trace_mark(kernel_sched_wakeup_new,
- -              "pid %d state %ld ## rq %p task %p rq->curr %p",
- -              p->pid, p->state, rq, p, rq->curr);
+ +      trace_sched_wakeup_new(rq, p);
         check_preempt_curr(rq, p, 0);
   #ifdef CONFIG_SMP
         if (p->sched_class->task_wake_up)
@@@ -2605,7 -2612,11 +2610,7 @@@ context_switch(struct rq *rq, struct ta
         struct mm_struct *mm, *oldmm;
   
         prepare_task_switch(rq, prev, next);
- -      trace_mark(kernel_sched_schedule,
- -              "prev_pid %d next_pid %d prev_state %ld "
- -              "## rq %p prev %p next %p",
- -              prev->pid, next->pid, prev->state,
- -              rq, prev, next);
+ +      trace_sched_switch(rq, prev, next);
         mm = next->mm;
         oldmm = prev->active_mm;
         /*
@@@ -2845,7 -2856,6 +2850,7 @@@ static void sched_migrate_task(struct t
             || unlikely(!cpu_active(dest_cpu)))
                 goto out;
   
+ +      trace_sched_migrate_task(rq, p, dest_cpu);
         /* force the process onto the specified CPU */
         if (migrate_task(p, dest_cpu, &req)) {
                 /* Need to wait for migration thread (might exit: take ref). */
@@@ -4047,26 -4057,23 +4052,26 @@@ DEFINE_PER_CPU(struct kernel_stat, ksta
   EXPORT_PER_CPU_SYMBOL(kstat);
   
   /*
- - * Return p->sum_exec_runtime plus any more ns on the sched_clock
- - * that have not yet been banked in case the task is currently running.
+ + * Return any ns on the sched_clock that have not yet been banked in
+ + * @p in case that task is currently running.
    */
- -unsigned long long task_sched_runtime(struct task_struct *p)
+ +unsigned long long task_delta_exec(struct task_struct *p)
   {
         unsigned long flags;
- -      u64 ns, delta_exec;
         struct rq *rq;
+ +      u64 ns = 0;
   
         rq = task_rq_lock(p, &flags);
- -      ns = p->se.sum_exec_runtime;
+ +
         if (task_current(rq, p)) {
+ +              u64 delta_exec;
+ +
                 update_rq_clock(rq);
                 delta_exec = rq->clock - p->se.exec_start;
                 if ((s64)delta_exec > 0)
- -                      ns += delta_exec;
+ +                      ns = delta_exec;
         }
+ +
         task_rq_unlock(rq, &flags);
   
         return ns;
@@@ -4083,7 -4090,6 +4088,7 @@@ void account_user_time(struct task_stru
         cputime64_t tmp;
   
         p->utime = cputime_add(p->utime, cputime);
+ +      account_group_user_time(p, cputime);
   
         /* Add user time to cpustat. */
         tmp = cputime_to_cputime64(cputime);
@@@ -4108,7 -4114,6 +4113,7 @@@ static void account_guest_time(struct t
         tmp = cputime_to_cputime64(cputime);
   
         p->utime = cputime_add(p->utime, cputime);
+ +      account_group_user_time(p, cputime);
         p->gtime = cputime_add(p->gtime, cputime);
   
         cpustat->user = cputime64_add(cpustat->user, tmp);
@@@ -4144,7 -4149,6 +4149,7 @@@ void account_system_time(struct task_st
         }
   
         p->stime = cputime_add(p->stime, cputime);
+ +      account_group_system_time(p, cputime);
   
         /* Add system time to cpustat. */
         tmp = cputime_to_cputime64(cputime);
@@@ -4186,7 -4190,6 +4191,7 @@@ void account_steal_time(struct task_str
   
         if (p == rq->idle) {
                 p->stime = cputime_add(p->stime, steal);
+ +              account_group_system_time(p, steal);
                 if (atomic_read(&rq->nr_iowait) > 0)
                         cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                 else
@@@ -4443,12 -4446,8 +4448,8 @@@ need_resched_nonpreemptible
         if (sched_feat(HRTICK))
                 hrtick_clear(rq);
   
-       /*
-        * Do the rq-clock update outside the rq lock:
-        */
-       local_irq_disable();
+       spin_lock_irq(&rq->lock);
         update_rq_clock(rq);
-       spin_lock(&rq->lock);
         clear_tsk_need_resched(prev);
   
         if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
diff --combined kernel/sched_fair.c

index f604dae71316264445e63b4d09f26a483d61113e,a0aa38b10fdd1d5c52bc3fd788c04e70a4ff98ef..9573c33688b89616d38346ed5a744af4324fc282
--- 1/kernel/sched_fair.c
--- 2/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@@ -73,6 -73,8 +73,8 @@@ unsigned int sysctl_sched_wakeup_granul
   
   const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
   
+ static const struct sched_class fair_sched_class;
+ 
   /**************************************************************
    * CFS operations on generic schedulable entities:
    */
@@@ -334,7 -336,7 +336,7 @@@ int sched_nr_latency_handler(struct ctl
   #endif
   
   /*
-  * delta *= w / rw
+  * delta *= P[w / rw]
    */
   static inline unsigned long
   calc_delta_weight(unsigned long delta, struct sched_entity *se)
@@@ -348,15 -350,13 +350,13 @@@
   }
   
   /*
-  * delta *= rw / w
+  * delta /= w
    */
   static inline unsigned long
   calc_delta_fair(unsigned long delta, struct sched_entity *se)
   {
-       for_each_sched_entity(se) {
-               delta = calc_delta_mine(delta,
-                               cfs_rq_of(se)->load.weight, &se->load);
-       }
+       if (unlikely(se->load.weight != NICE_0_LOAD))
+               delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
   
         return delta;
   }
@@@ -386,26 -386,26 +386,26 @@@ static u64 __sched_period(unsigned lon
    * We calculate the wall-time slice from the period by taking a part
    * proportional to the weight.
    *
-  * s = p*w/rw
+  * s = p*P[w/rw]
    */
   static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
   {
-       return calc_delta_weight(__sched_period(cfs_rq->nr_running), se);
+       unsigned long nr_running = cfs_rq->nr_running;
+ 
+       if (unlikely(!se->on_rq))
+               nr_running++;
+ 
+       return calc_delta_weight(__sched_period(nr_running), se);
   }
   
   /*
    * We calculate the vruntime slice of a to be inserted task
    *
-  * vs = s*rw/w = p
+  * vs = s/w
    */
- static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
   {
-       unsigned long nr_running = cfs_rq->nr_running;
- 
-       if (!se->on_rq)
-               nr_running++;
- 
-       return __sched_period(nr_running);
+       return calc_delta_fair(sched_slice(cfs_rq, se), se);
   }
   
   /*
@@@ -449,7 -449,6 +449,7 @@@ static void update_curr(struct cfs_rq *
                 struct task_struct *curtask = task_of(curr);
   
                 cpuacct_charge(curtask, delta_exec);
+ +              account_group_exec_runtime(curtask, delta_exec);
         }
   }
   
@@@ -628,7 -627,7 +628,7 @@@ place_entity(struct cfs_rq *cfs_rq, str
          * stays open at the end.
          */
         if (initial && sched_feat(START_DEBIT))
-               vruntime += sched_vslice_add(cfs_rq, se);
+               vruntime += sched_vslice(cfs_rq, se);
   
         if (!initial) {
                 /* sleeps upto a single latency don't count. */
@@@ -748,7 -747,7 +748,7 @@@ pick_next(struct cfs_rq *cfs_rq, struc
         struct rq *rq = rq_of(cfs_rq);
         u64 pair_slice = rq->clock - cfs_rq->pair_start;
   
-       if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) {
+       if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
                 cfs_rq->pair_start = rq->clock;
                 return se;
         }
@@@ -849,11 -848,31 +849,31 @@@ static void hrtick_start_fair(struct r
                 hrtick_start(rq, delta);
         }
   }
+ 
+ /*
+  * called from enqueue/dequeue and updates the hrtick when the
+  * current task is from our class and nr_running is low enough
+  * to matter.
+  */
+ static void hrtick_update(struct rq *rq)
+ {
+       struct task_struct *curr = rq->curr;
+ 
+       if (curr->sched_class != &fair_sched_class)
+               return;
+ 
+       if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
+               hrtick_start_fair(rq, curr);
+ }
   #else /* !CONFIG_SCHED_HRTICK */
   static inline void
   hrtick_start_fair(struct rq *rq, struct task_struct *p)
   {
   }
+ 
+ static inline void hrtick_update(struct rq *rq)
+ {
+ }
   #endif
   
   /*
@@@ -874,7 -893,7 +894,7 @@@ static void enqueue_task_fair(struct r
                 wakeup = 1;
         }
   
-       hrtick_start_fair(rq, rq->curr);
+       hrtick_update(rq);
   }
   
   /*
@@@ -896,7 -915,7 +916,7 @@@ static void dequeue_task_fair(struct r
                 sleep = 1;
         }
   
-       hrtick_start_fair(rq, rq->curr);
+       hrtick_update(rq);
   }
   
   /*
@@@ -1002,8 -1021,6 +1022,6 @@@ static inline int wake_idle(int cpu, st
   
   #ifdef CONFIG_SMP
   
- static const struct sched_class fair_sched_class;
- 
   #ifdef CONFIG_FAIR_GROUP_SCHED
   /*
    * effective_load() calculates the load change as seen from the root_task_group
diff --combined kernel/sched_stats.h

index b8c156979cf2ad600b0f9b59f9a935cec86aab85,67579253b53b5051f05338e4cf76dae1d09f4945..2df9d297d292d23fa46db931cdbd0cf9a41af2c0
--- 1/kernel/sched_stats.h
--- 2/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@@ -9,7 -9,7 +9,7 @@@
   static int show_schedstat(struct seq_file *seq, void *v)
   {
         int cpu;
-       int mask_len = NR_CPUS/32 * 9;
+       int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
         char *mask_str = kmalloc(mask_len, GFP_KERNEL);
   
         if (mask_str == NULL)
@@@ -270,89 -270,3 +270,89 @@@ sched_info_switch(struct task_struct *p
   #define sched_info_switch(t, next)            do { } while (0)
   #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
   
+ +/*
+ + * The following are functions that support scheduler-internal time accounting.
+ + * These functions are generally called at the timer tick.  None of this depends
+ + * on CONFIG_SCHEDSTATS.
+ + */
+ +
+ +/**
+ + * account_group_user_time - Maintain utime for a thread group.
+ + *
+ + * @tsk:      Pointer to task structure.
+ + * @cputime:  Time value by which to increment the utime field of the
+ + *            thread_group_cputime structure.
+ + *
+ + * If thread group time is being maintained, get the structure for the
+ + * running CPU and update the utime field there.
+ + */
+ +static inline void account_group_user_time(struct task_struct *tsk,
+ +                                         cputime_t cputime)
+ +{
+ +      struct signal_struct *sig;
+ +
+ +      sig = tsk->signal;
+ +      if (unlikely(!sig))
+ +              return;
+ +      if (sig->cputime.totals) {
+ +              struct task_cputime *times;
+ +
+ +              times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+ +              times->utime = cputime_add(times->utime, cputime);
+ +              put_cpu_no_resched();
+ +      }
+ +}
+ +
+ +/**
+ + * account_group_system_time - Maintain stime for a thread group.
+ + *
+ + * @tsk:      Pointer to task structure.
+ + * @cputime:  Time value by which to increment the stime field of the
+ + *            thread_group_cputime structure.
+ + *
+ + * If thread group time is being maintained, get the structure for the
+ + * running CPU and update the stime field there.
+ + */
+ +static inline void account_group_system_time(struct task_struct *tsk,
+ +                                           cputime_t cputime)
+ +{
+ +      struct signal_struct *sig;
+ +
+ +      sig = tsk->signal;
+ +      if (unlikely(!sig))
+ +              return;
+ +      if (sig->cputime.totals) {
+ +              struct task_cputime *times;
+ +
+ +              times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+ +              times->stime = cputime_add(times->stime, cputime);
+ +              put_cpu_no_resched();
+ +      }
+ +}
+ +
+ +/**
+ + * account_group_exec_runtime - Maintain exec runtime for a thread group.
+ + *
+ + * @tsk:      Pointer to task structure.
+ + * @ns:               Time value by which to increment the sum_exec_runtime field
+ + *            of the thread_group_cputime structure.
+ + *
+ + * If thread group time is being maintained, get the structure for the
+ + * running CPU and update the sum_exec_runtime field there.
+ + */
+ +static inline void account_group_exec_runtime(struct task_struct *tsk,
+ +                                            unsigned long long ns)
+ +{
+ +      struct signal_struct *sig;
+ +
+ +      sig = tsk->signal;
+ +      if (unlikely(!sig))
+ +              return;
+ +      if (sig->cputime.totals) {
+ +              struct task_cputime *times;
+ +
+ +              times = per_cpu_ptr(sig->cputime.totals, get_cpu());
+ +              times->sum_exec_runtime += ns;
+ +              put_cpu_no_resched();
+ +      }
+ +}
diff --combined kernel/sysctl.c

index b3cc73931d1f6411c6e99faf205124987d4a887e,3d804f41e649246b4aa477252d9626d916347034..a13bd4dfaeb1becfff933212893ea1c422ff0a04
--- 1/kernel/sysctl.c
--- 2/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@@ -274,6 -274,16 +274,16 @@@ static struct ctl_table kern_table[] = 
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec,
         },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "sched_shares_thresh",
+               .data           = &sysctl_sched_shares_thresh,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &zero,
+       },
         {
                 .ctl_name       = CTL_UNNUMBERED,
                 .procname       = "sched_child_runs_first",
@@@ -833,16 -843,6 +843,16 @@@
                 .proc_handler   = &proc_dointvec,
         },
   #endif
+ +#ifdef CONFIG_UNEVICTABLE_LRU
+ +      {
+ +              .ctl_name       = CTL_UNNUMBERED,
+ +              .procname       = "scan_unevictable_pages",
+ +              .data           = &scan_unevictable_pages,
+ +              .maxlen         = sizeof(scan_unevictable_pages),
+ +              .mode           = 0644,
+ +              .proc_handler   = &scan_unevictable_handler,
+ +      },
+ +#endif
   /*
    * NOTE: do not add new entries to this table unless you have read
    * Documentation/sysctl/ctl_unnumbered.txt
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)
		1	2
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_fair.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_stats.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sysctl.c	patch \|	diff1 \|	diff2 \|	blob \| history