Merge branches 'core/debug', 'core/futexes', 'core/locking', 'core/rcu', 'core/signal...

author Ingo Molnar <mingo@elte.hu>

Mon, 24 Nov 2008 16:44:55 +0000 (17:44 +0100)

committer Ingo Molnar <mingo@elte.hu>

Mon, 24 Nov 2008 16:44:55 +0000 (17:44 +0100)
author Ingo Molnar <mingo@elte.hu>
Mon, 24 Nov 2008 16:44:55 +0000 (17:44 +0100)
committer Ingo Molnar <mingo@elte.hu>
Mon, 24 Nov 2008 16:44:55 +0000 (17:44 +0100)
diff --combined arch/x86/include/asm/uaccess_64.h

index f8cfd00db450f2e0f948ce7128a2d44867aebf59,515d4dce96b598bc6e9d07dba21332a44924948c,515d4dce96b598bc6e9d07dba21332a44924948c,543ba883cc66200ff0e2206aec36b434b63d3695,664f15280f14354dc057e1d97954db6baab4b959,664f15280f14354dc057e1d97954db6baab4b959,f8cfd00db450f2e0f948ce7128a2d44867aebf59,c96c1f5d07a2c88e4f5ca547ea0f7154ac68583c..84210c479fca83524c6cef4c6bc069bcff76e272
--- 1/arch/x86/include/asm/uaccess_64.h
--- 2/include/asm-x86/uaccess_64.h
--- 3/include/asm-x86/uaccess_64.h
--- 4/arch/x86/include/asm/uaccess_64.h
--- 5/arch/x86/include/asm/uaccess_64.h
--- 6/arch/x86/include/asm/uaccess_64.h
--- 7/arch/x86/include/asm/uaccess_64.h
--- 8/include/asm-x86/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@@@@@@@@ -1,5 -1,5 -1,5 -1,5 -1,5 -1,5 -1,5 -1,5 +1,5 @@@@@@@@@
- --     #ifndef __X86_64_UACCESS_H
- --     #define __X86_64_UACCESS_H
-       -#ifndef ASM_X86__UACCESS_64_H
-       -#define ASM_X86__UACCESS_64_H
+ ++    +#ifndef _ASM_X86_UACCESS_64_H
+ ++    +#define _ASM_X86_UACCESS_64_H
         
         /*
          * User space memory access functions
@@@@@@@@@ -7,7 -7,6 -7,6 -7,7 -7,7 -7,7 -7,7 -7,7 +7,7 @@@@@@@@@
         #include <linux/compiler.h>
         #include <linux/errno.h>
         #include <linux/prefetch.h>
+ ++     #include <linux/lockdep.h>
         #include <asm/page.h>
         
         /*
@@@@@@@@@ -29,6 -28,6 -28,6 -29,8 -29,6 -29,6 -29,6 -29,6 +29,8 @@@@@@@@@ static __always_inline __must_chec
         int __copy_from_user(void *dst, const void __user *src, unsigned size)
         {
                 int ret = 0;
+++ ++++
+++ ++++        might_fault();
                 if (!__builtin_constant_p(size))
                         return copy_user_generic(dst, (__force void *)src, size);
                 switch (size) {
@@@@@@@@@ -46,7 -45,7 -45,7 -48,7 -46,7 -46,7 -46,7 -46,7 +48,7 @@@@@@@@@
                         return ret;
                 case 10:
                         __get_user_asm(*(u64 *)dst, (u64 __user *)src,
- ----- -                               ret, "q", "", "=r", 16);
+ +++++ +                               ret, "q", "", "=r", 10);
                         if (unlikely(ret))
                                 return ret;
                         __get_user_asm(*(u16 *)(8 + (char *)dst),
@@@@@@@@@ -71,6 -70,6 -70,6 -73,8 -71,6 -71,6 -71,6 -71,6 +73,8 @@@@@@@@@ static __always_inline __must_chec
         int __copy_to_user(void __user *dst, const void *src, unsigned size)
         {
                 int ret = 0;
+++ ++++
+++ ++++        might_fault();
                 if (!__builtin_constant_p(size))
                         return copy_user_generic((__force void *)dst, src, size);
                 switch (size) {
@@@@@@@@@ -113,6 -112,6 -112,6 -117,8 -113,6 -113,6 -113,6 -113,6 +117,8 @@@@@@@@@ static __always_inline __must_chec
         int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
         {
                 int ret = 0;
+++ ++++
+++ ++++        might_fault();
                 if (!__builtin_constant_p(size))
                         return copy_user_generic((__force void *)dst,
                                                  (__force void *)src, size);
@@@@@@@@@ -199,4 -198,4 -198,4 -205,4 -199,4 -199,4 -199,4 -199,4 +205,4 @@@@@@@@@ static inline int __copy_from_user_inat
         unsigned long
         copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
         
- --     #endif /* __X86_64_UACCESS_H */
-       -#endif /* ASM_X86__UACCESS_64_H */
+ ++    +#endif /* _ASM_X86_UACCESS_64_H */
diff --combined include/linux/kernel.h

index dc7e0d0a6474448aba71b4c32d2045afc44e240e,3f30557be2a3f2e209a2abd60c21ddef9086fc96,2651f805ba6d771b9ec1f26078609aebdb198853,69a9bfdf9c86d9de03919277d108fe242a4d1e4f,fba141d3ca0783303c661f39fb2c503ba418dc56,fba141d3ca0783303c661f39fb2c503ba418dc56,dc7e0d0a6474448aba71b4c32d2045afc44e240e,94d17ff64c5a3b48c0f6716d29b9832d6a896551..269df5a17b30af1b7349c131da05abec8aa95046
--- 1/include/linux/kernel.h
--- 2/include/linux/kernel.h
--- 3/include/linux/kernel.h
--- 4/include/linux/kernel.h
--- 5/include/linux/kernel.h
--- 6/include/linux/kernel.h
--- 7/include/linux/kernel.h
--- 8/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@@@@@@@@ -16,7 -16,6 -16,6 -16,7 -16,7 -16,7 -16,7 -16,7 +16,7 @@@@@@@@@
         #include <linux/log2.h>
         #include <linux/typecheck.h>
         #include <linux/ratelimit.h>
+ ++     #include <linux/dynamic_printk.h>
         #include <asm/byteorder.h>
         #include <asm/bug.h>
         
@@@@@@@@@ -116,8 -115,6 -115,6 -116,8 -116,8 -116,8 -116,8 -116,6 +116,8 @@@@@@@@@ extern int _cond_resched(void)
         # define might_resched() do { } while (0)
         #endif
         
+ ++    +#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
+ ++    +  void __might_sleep(char *file, int line);
         /**
          * might_sleep - annotation for functions that can sleep
          *
@@@@@@@@@ -128,6 -125,8 -125,8 -128,6 -128,6 -128,6 -128,6 -126,8 +128,6 @@@@@@@@@
          * be bitten later when the calling function happens to sleep when it is not
          * supposed to.
          */
- --    -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
- --    -  void __might_sleep(char *file, int line);
         # define might_sleep() \
                 do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
         #else
@@@@@@@@@ -141,6 -140,6 -140,6 -141,15 -141,6 -141,6 -141,6 -141,6 +141,15 @@@@@@@@@
                         (__x < 0) ? -__x : __x;         \
                 })
         
+++ ++++#ifdef CONFIG_PROVE_LOCKING
+++ ++++void might_fault(void);
+++ ++++#else
+++ ++++static inline void might_fault(void)
+++ ++++{
+++ ++++        might_sleep();
+++ ++++}
+++ ++++#endif
+++ ++++
         extern struct atomic_notifier_head panic_notifier_list;
         extern long (*panic_blink)(long time);
         NORET_TYPE void panic(const char * fmt, ...)
@@@@@@@@@ -183,38 -182,16 -182,14 -192,38 -183,38 -183,38 -183,38 -183,14 +192,40 @@@@@@@@@ extern int vsscanf(const char *, const 
         
         extern int get_option(char **str, int *pint);
         extern char *get_options(const char *str, int nints, int *ints);
- --     extern unsigned long long memparse(char *ptr, char **retptr);
+ ++     extern unsigned long long memparse(const char *ptr, char **retptr);
         
         extern int core_kernel_text(unsigned long addr);
         extern int __kernel_text_address(unsigned long addr);
         extern int kernel_text_address(unsigned long addr);
+ ++++++extern int func_ptr_is_kernel_text(void *ptr);
+ ++++++
         struct pid;
         extern struct pid *session_of_pgrp(struct pid *pgrp);
         
+ ++    +/*
+ ++    + * FW_BUG
+ ++    + * Add this to a message where you are sure the firmware is buggy or behaves
+ ++    + * really stupid or out of spec. Be aware that the responsible BIOS developer
+ ++    + * should be able to fix this issue or at least get a concrete idea of the
+ ++    + * problem by reading your message without the need of looking at the kernel
+ ++    + * code.
+ ++    + * 
+ ++    + * Use it for definite and high priority BIOS bugs.
+ ++    + *
+ ++    + * FW_WARN
+ ++    + * Use it for not that clear (e.g. could the kernel messed up things already?)
+ ++    + * and medium priority BIOS bugs.
+ ++    + *
+ ++    + * FW_INFO
+ ++    + * Use this one if you want to tell the user or vendor about something
+ ++    + * suspicious, but generally harmless related to the firmware.
+ ++    + *
+ ++    + * Use it for information or very low priority BIOS bugs.
+ ++    + */
+ ++    +#define FW_BUG          "[Firmware Bug]: "
+ ++    +#define FW_WARN         "[Firmware Warn]: "
+ ++    +#define FW_INFO         "[Firmware Info]: "
+ ++    +
         #ifdef CONFIG_PRINTK
         asmlinkage int vprintk(const char *fmt, va_list args)
                 __attribute__ ((format (printf, 1, 0)));
@@@@@@@@@ -238,9 -215,6 -213,6 -247,9 -238,9 -238,9 -238,9 -214,9 +249,9 @@@@@@@@@ static inline bool printk_timed_ratelim
                         { return false; }
         #endif
         
+ ++     extern int printk_needs_cpu(int cpu);
+ ++     extern void printk_tick(void);
+ ++     
         extern void asmlinkage __attribute__((format(printf, 1, 2)))
                 early_printk(const char *fmt, ...);
         
@@@@@@@@@ -263,10 -237,9 -235,9 -272,10 -263,10 -263,10 -263,10 -239,10 +274,10 @@@@@@@@@ extern int oops_in_progress;          /* If set
         extern int panic_timeout;
         extern int panic_on_oops;
         extern int panic_on_unrecovered_nmi;
- --     extern int tainted;
         extern const char *print_tainted(void);
- --     extern void add_taint(unsigned);
+ ++     extern void add_taint(unsigned flag);
+ ++     extern int test_taint(unsigned flag);
+ ++     extern unsigned long get_taint(void);
         extern int root_mountflags;
         
         /* Values used for system_state */
@@@@@@@@@ -279,17 -252,16 -250,16 -288,17 -279,17 -279,17 -279,17 -255,17 +290,17 @@@@@@@@@ extern enum system_states 
                 SYSTEM_SUSPEND_DISK,
         } system_state;
         
- --     #define TAINT_PROPRIETARY_MODULE        (1<<0)
- --     #define TAINT_FORCED_MODULE             (1<<1)
- --     #define TAINT_UNSAFE_SMP                (1<<2)
- --     #define TAINT_FORCED_RMMOD              (1<<3)
- --     #define TAINT_MACHINE_CHECK             (1<<4)
- --     #define TAINT_BAD_PAGE                  (1<<5)
- --     #define TAINT_USER                      (1<<6)
- --     #define TAINT_DIE                       (1<<7)
- --     #define TAINT_OVERRIDDEN_ACPI_TABLE     (1<<8)
- --     #define TAINT_WARN                      (1<<9)
+ ++     #define TAINT_PROPRIETARY_MODULE        0
+ ++     #define TAINT_FORCED_MODULE             1
+ ++     #define TAINT_UNSAFE_SMP                2
+ ++     #define TAINT_FORCED_RMMOD              3
+ ++     #define TAINT_MACHINE_CHECK             4
+ ++     #define TAINT_BAD_PAGE                  5
+ ++     #define TAINT_USER                      6
+ ++     #define TAINT_DIE                       7
+ ++     #define TAINT_OVERRIDDEN_ACPI_TABLE     8
+ ++     #define TAINT_WARN                      9
+ ++     #define TAINT_CRAP                      10
         
         extern void dump_stack(void) __cold;
         
@@@@@@@@@ -318,36 -290,28 -288,28 -327,32 -318,32 -318,32 -318,36 -294,32 +329,36 @@@@@@@@@ static inline char *pack_hex_byte(char 
                 return buf;
         }
         
- ----- -#define pr_emerg(fmt, arg...) \
- ----- -        printk(KERN_EMERG fmt, ##arg)
- ----- -#define pr_alert(fmt, arg...) \
- ----- -        printk(KERN_ALERT fmt, ##arg)
- ----- -#define pr_crit(fmt, arg...) \
- ----- -        printk(KERN_CRIT fmt, ##arg)
- ----- -#define pr_err(fmt, arg...) \
- ----- -        printk(KERN_ERR fmt, ##arg)
- ----- -#define pr_warning(fmt, arg...) \
- ----- -        printk(KERN_WARNING fmt, ##arg)
- ----- -#define pr_notice(fmt, arg...) \
- ----- -        printk(KERN_NOTICE fmt, ##arg)
- ----- -#define pr_info(fmt, arg...) \
- ----- -        printk(KERN_INFO fmt, ##arg)
- --     
- --     #ifdef DEBUG
+ +++++ +#ifndef pr_fmt
+ +++++ +#define pr_fmt(fmt) fmt
+ +++++ +#endif
+ +++++ +
+ +++++ +#define pr_emerg(fmt, ...) \
+ +++++ +        printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+ +++++ +#define pr_alert(fmt, ...) \
+ +++++ +        printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+ +++++ +#define pr_crit(fmt, ...) \
+ +++++ +        printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+ +++++ +#define pr_err(fmt, ...) \
+ +++++ +        printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+ +++++ +#define pr_warning(fmt, ...) \
+ +++++ +        printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+ +++++ +#define pr_notice(fmt, ...) \
+ +++++ +        printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+ +++++ +#define pr_info(fmt, ...) \
+ +++++ +        printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+ ++     
         /* If you are writing a driver, please use dev_dbg instead */
- --     #define pr_debug(fmt, arg...) \
- --             printk(KERN_DEBUG fmt, ##arg)
+ ++     #if defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
+ ++     #define pr_debug(fmt, ...) do { \
-   --- -        dynamic_pr_debug(fmt, ##__VA_ARGS__); \
+ +++++ +        dynamic_pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
+ ++             } while (0)
+ ++     #elif defined(DEBUG)
-   --- -#define pr_debug(fmt, arg...) \
-   --- -        printk(KERN_DEBUG fmt, ##arg)
+ +++++ +#define pr_debug(fmt, ...) \
+ +++++ +        printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
         #else
- ----- -#define pr_debug(fmt, arg...) \
- ----- -        ({ if (0) printk(KERN_DEBUG fmt, ##arg); 0; })
+ +++++ +#define pr_debug(fmt, ...) \
+ +++++ +        ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
         #endif
         
         /*
@@@@@@@@@ -524,9 -488,4 -486,4 -529,9 -520,9 -520,9 -524,9 -496,9 +535,9 @@@@@@@@@ struct sysinfo 
         #define NUMA_BUILD 0
         #endif
         
+ ++     /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
+ ++     #ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ ++     # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
+ ++     #endif
+ ++     
         #endif
diff --combined kernel/exit.c

index 2d8be7ebb0f73499f894a1828fd827f0217290f1,16395644a98ff8c060b2f5fd776fe7abadd61c6a,85a83c831856c193570e40a3b7d3e03ef8862d1c,ae2b92be5faec1efa73beefb63304a22d030fc16,80137a5d9467811ba4dab35c6e95790002a5f12a,b9c4d8bb72e5aedb49b9aab8ff96dd7ff8380f9e,2d8be7ebb0f73499f894a1828fd827f0217290f1,80137a5d9467811ba4dab35c6e95790002a5f12a..30fcdf16737a2bb013a78b62bc6b7f1eb6eb165b
--- 1/kernel/exit.c
--- 2/kernel/exit.c
--- 3/kernel/exit.c
--- 4/kernel/exit.c
--- 5/kernel/exit.c
--- 6/kernel/exit.c
--- 7/kernel/exit.c
--- 8/kernel/exit.c
+++ b/kernel/exit.c
@@@@@@@@@ -40,13 -40,13 -40,13 -40,14 -40,14 -40,14 -40,13 -40,14 +40,13 @@@@@@@@@
         #include <linux/cn_proc.h>
         #include <linux/mutex.h>
         #include <linux/futex.h>
- ----- -#include <linux/compat.h>
         #include <linux/pipe_fs_i.h>
         #include <linux/audit.h> /* for audit_free() */
         #include <linux/resource.h>
         #include <linux/blkdev.h>
         #include <linux/task_io_accounting_ops.h>
         #include <linux/tracehook.h>
+ ++     #include <trace/sched.h>
         
         #include <asm/uaccess.h>
         #include <asm/unistd.h>
@@@@@@@@@ -112,6 -112,8 -112,8 -113,6 -113,6 -113,6 -112,6 -113,6 +112,6 @@@@@@@@@ static void __exit_signal(struct task_s
                          * We won't ever get here for the group leader, since it
                          * will have been the last reference on the signal_struct.
                          */
- --                     sig->utime = cputime_add(sig->utime, task_utime(tsk));
- --                     sig->stime = cputime_add(sig->stime, task_stime(tsk));
                         sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
                         sig->min_flt += tsk->min_flt;
                         sig->maj_flt += tsk->maj_flt;
@@@@@@@@@ -120,6 -122,7 -122,7 -121,6 -121,6 -121,6 -120,6 -121,6 +120,6 @@@@@@@@@
                         sig->inblock += task_io_get_inblock(tsk);
                         sig->oublock += task_io_get_oublock(tsk);
                         task_io_accounting_add(&sig->ioac, &tsk->ioac);
- --                     sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
                         sig = NULL; /* Marker for below. */
                 }
         
@@@@@@@@@ -140,21 -143,13 -143,13 -141,21 -141,16 -141,21 -140,21 -141,16 +140,21 @@@@@@@@@
                 if (sig) {
                         flush_sigqueue(&sig->shared_pending);
                         taskstats_tgid_free(sig);
+ ++ +  +                /*
+ ++ +  +                 * Make sure ->signal can't go away under rq->lock,
+ ++ +  +                 * see account_group_exec_runtime().
+ ++ +  +                 */
+ ++ +  +                task_rq_unlock_wait(tsk);
                         __cleanup_signal(sig);
                 }
         }
         
         static void delayed_put_task_struct(struct rcu_head *rhp)
         {
- --             put_task_struct(container_of(rhp, struct task_struct, rcu));
+ ++             struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+ ++     
+ ++             trace_sched_process_free(tsk);
+ ++             put_task_struct(tsk);
         }
         
         
@@@@@@@@@ -588,6 -583,8 -583,6 -589,6 -584,6 -589,6 -588,6 -584,6 +588,6 @@@@@@@@@ mm_need_new_owner(struct mm_struct *mm
                  * If there are other users of the mm and the owner (us) is exiting
                  * we need to find a new owner to take on the responsibility.
                  */
- -              if (!mm)
- -                      return 0;
                 if (atomic_read(&mm->mm_users) <= 1)
                         return 0;
                 if (mm->owner != p)
@@@@@@@@@ -630,38 -627,29 -625,39 -631,38 -626,38 -631,38 -630,38 -626,38 +630,38 @@@@@@@@@ retry
                 } while_each_thread(g, c);
         
                 read_unlock(&tasklist_lock);
+ +              /*
+ +               * We found no owner yet mm_users > 1: this implies that we are
+ +               * most likely racing with swapoff (try_to_unuse()) or /proc or
+ +               * ptrace or page migration (get_task_mm()).  Mark owner as NULL,
+ +               * so that subsystems can understand the callback and take action.
+ +               */
+ +              down_write(&mm->mmap_sem);
+ +              cgroup_mm_owner_callbacks(mm->owner, NULL);
+ +              mm->owner = NULL;
+ +              up_write(&mm->mmap_sem);
                 return;
         
         assign_new_owner:
                 BUG_ON(c == p);
                 get_task_struct(c);
+ ++             read_unlock(&tasklist_lock);
+ ++             down_write(&mm->mmap_sem);
                 /*
                  * The task_lock protects c->mm from changing.
                  * We always want mm->owner->mm == mm
                  */
                 task_lock(c);
- --             /*
- --              * Delay read_unlock() till we have the task_lock()
- --              * to ensure that c does not slip away underneath us
- --              */
- --             read_unlock(&tasklist_lock);
                 if (c->mm != mm) {
                         task_unlock(c);
+ ++                     up_write(&mm->mmap_sem);
                         put_task_struct(c);
                         goto retry;
                 }
                 cgroup_mm_owner_callbacks(mm->owner, c);
                 mm->owner = c;
                 task_unlock(c);
+ ++             up_write(&mm->mmap_sem);
                 put_task_struct(c);
         }
         #endif /* CONFIG_MM_OWNER */
@@@@@@@@@ -1058,6 -1046,14 -1054,14 -1059,14 -1054,14 -1059,14 -1058,6 -1054,14 +1058,6 @@@@@@@@@ NORET_TYPE void do_exit(long code
                         exit_itimers(tsk->signal);
                 }
                 acct_collect(code, group_dead);
- ----- -#ifdef CONFIG_FUTEX
- ----- -        if (unlikely(tsk->robust_list))
- ----- -                exit_robust_list(tsk);
- ----- -#ifdef CONFIG_COMPAT
- ----- -        if (unlikely(tsk->compat_robust_list))
- ----- -                compat_exit_robust_list(tsk);
- ----- -#endif
- ----- -#endif
                 if (group_dead)
                         tty_audit_exit();
                 if (unlikely(tsk->audit_context))
@@@@@@@@@ -1070,8 -1066,6 -1074,6 -1079,8 -1074,8 -1079,8 -1070,8 -1074,8 +1070,8 @@@@@@@@@
         
                 if (group_dead)
                         acct_process();
+ ++             trace_sched_process_exit(tsk);
+ ++     
                 exit_sem(tsk);
                 exit_files(tsk);
                 exit_fs(tsk);
@@@@@@@@@ -1300,7 -1294,6 -1302,6 -1309,7 -1304,7 -1309,7 -1300,7 -1304,7 +1300,7 @@@@@@@@@ static int wait_task_zombie(struct task
                 if (likely(!traced)) {
                         struct signal_struct *psig;
                         struct signal_struct *sig;
+ ++                     struct task_cputime cputime;
         
                         /*
                          * The resource counters for the group leader are in its
@@@@@@@@@ -1316,23 -1309,20 -1317,20 -1325,23 -1320,23 -1325,23 -1316,23 -1320,23 +1316,23 @@@@@@@@@
                          * need to protect the access to p->parent->signal fields,
                          * as other threads in the parent group can be right
                          * here reaping other children at the same time.
+ ++                      *
+ ++                      * We use thread_group_cputime() to get times for the thread
+ ++                      * group, which consolidates times for all threads in the
+ ++                      * group including the group leader.
                          */
+++++ ++                thread_group_cputime(p, &cputime);
                         spin_lock_irq(&p->parent->sighand->siglock);
                         psig = p->parent->signal;
                         sig = p->signal;
-  -- --                thread_group_cputime(p, &cputime);
                         psig->cutime =
                                 cputime_add(psig->cutime,
- --                             cputime_add(p->utime,
- --                             cputime_add(sig->utime,
- --                                         sig->cutime)));
+ ++                             cputime_add(cputime.utime,
+ ++                                         sig->cutime));
                         psig->cstime =
                                 cputime_add(psig->cstime,
- --                             cputime_add(p->stime,
- --                             cputime_add(sig->stime,
- --                                         sig->cstime)));
+ ++                             cputime_add(cputime.stime,
+ ++                                         sig->cstime));
                         psig->cgtime =
                                 cputime_add(psig->cgtime,
                                 cputime_add(p->gtime,
@@@@@@@@@ -1677,8 -1667,6 -1675,6 -1686,8 -1681,8 -1686,8 -1677,8 -1681,8 +1677,8 @@@@@@@@@ static long do_wait(enum pid_type type
                 struct task_struct *tsk;
                 int retval;
         
+ ++             trace_sched_process_wait(pid);
+ ++     
                 add_wait_queue(&current->signal->wait_chldexit,&wait);
         repeat:
                 /*
diff --combined kernel/futex.c

index 8af10027514bb1cc9cb2702051330e52bf43a533,7d1136e97c142d198b897dab1846acd99f1f655f,62cbd648e28a663fc5e165adf177d227bed38a33,8af10027514bb1cc9cb2702051330e52bf43a533,8af10027514bb1cc9cb2702051330e52bf43a533,8af10027514bb1cc9cb2702051330e52bf43a533,8af10027514bb1cc9cb2702051330e52bf43a533,7d1136e97c142d198b897dab1846acd99f1f655f..e10c5c8786a614619c943f5102189fdb428c5ac3
--- 1/kernel/futex.c
--- 2/kernel/futex.c
--- 3/kernel/futex.c
--- 4/kernel/futex.c
--- 5/kernel/futex.c
--- 6/kernel/futex.c
--- 7/kernel/futex.c
--- 8/kernel/futex.c
+++ b/kernel/futex.c
@@@@@@@@@ -122,24 -122,24 -122,6 -122,24 -122,24 -122,24 -122,24 -122,24 +122,6 @@@@@@@@@ struct futex_hash_bucket 
         
         static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
         
-- -----/*
-- ----- * Take mm->mmap_sem, when futex is shared
-- ----- */
-- -----static inline void futex_lock_mm(struct rw_semaphore *fshared)
-- -----{
-- -----        if (fshared)
-- -----                down_read(fshared);
-- -----}
-- -----
-- -----/*
-- ----- * Release mm->mmap_sem, when the futex is shared
-- ----- */
-- -----static inline void futex_unlock_mm(struct rw_semaphore *fshared)
-- -----{
-- -----        if (fshared)
-- -----                up_read(fshared);
-- -----}
-- -----
         /*
          * We hash on the keys returned from get_futex_key (see below).
          */
@@@@@@@@@ -161,6 -161,6 -143,45 -161,6 -161,6 -161,6 -161,6 -161,6 +143,45 @@@@@@@@@ static inline int match_futex(union fut
                         && key1->both.offset == key2->both.offset);
         }
         
++ +++++/*
++ +++++ * Take a reference to the resource addressed by a key.
++ +++++ * Can be called while holding spinlocks.
++ +++++ *
++ +++++ */
++ +++++static void get_futex_key_refs(union futex_key *key)
++ +++++{
++ +++++        if (!key->both.ptr)
++ +++++                return;
++ +++++
++ +++++        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
++ +++++        case FUT_OFF_INODE:
++ +++++                atomic_inc(&key->shared.inode->i_count);
++ +++++                break;
++ +++++        case FUT_OFF_MMSHARED:
++ +++++                atomic_inc(&key->private.mm->mm_count);
++ +++++                break;
++ +++++        }
++ +++++}
++ +++++
++ +++++/*
++ +++++ * Drop a reference to the resource addressed by a key.
++ +++++ * The hash bucket spinlock must not be held.
++ +++++ */
++ +++++static void drop_futex_key_refs(union futex_key *key)
++ +++++{
++ +++++        if (!key->both.ptr)
++ +++++                return;
++ +++++
++ +++++        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
++ +++++        case FUT_OFF_INODE:
++ +++++                iput(key->shared.inode);
++ +++++                break;
++ +++++        case FUT_OFF_MMSHARED:
++ +++++                mmdrop(key->private.mm);
++ +++++                break;
++ +++++        }
++ +++++}
++ +++++
         /**
          * get_futex_key - Get parameters which are the keys for a futex.
          * @uaddr: virtual address of the futex
@@@@@@@@@ -179,12 -179,12 -200,10 -179,12 -179,12 -179,12 -179,12 -179,12 +200,10 @@@@@@@@@
          * For other futexes, it points to &current->mm->mmap_sem and
          * caller must have taken the reader lock. but NOT any spinlocks.
          */
-- -----static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
-- -----                         union futex_key *key)
++ +++++static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
         {
                 unsigned long address = (unsigned long)uaddr;
                 struct mm_struct *mm = current->mm;
-- -----        struct vm_area_struct *vma;
                 struct page *page;
                 int err;
         
@@@@@@@@@ -208,100 -208,100 -227,50 -208,100 -208,100 -208,100 -208,100 -208,100 +227,50 @@@@@@@@@
                                 return -EFAULT;
                         key->private.mm = mm;
                         key->private.address = address;
++ +++++                get_futex_key_refs(key);
                         return 0;
                 }
-- -----        /*
-- -----         * The futex is hashed differently depending on whether
-- -----         * it's in a shared or private mapping.  So check vma first.
-- -----         */
-- -----        vma = find_extend_vma(mm, address);
-- -----        if (unlikely(!vma))
-- -----                return -EFAULT;
         
-- -----        /*
-- -----         * Permissions.
-- -----         */
-- -----        if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
-- -----                return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
++ +++++again:
++ +++++        err = get_user_pages_fast(address, 1, 0, &page);
++ +++++        if (err < 0)
++ +++++                return err;
++ +++++
++ +++++        lock_page(page);
++ +++++        if (!page->mapping) {
++ +++++                unlock_page(page);
++ +++++                put_page(page);
++ +++++                goto again;
++ +++++        }
         
                 /*
                  * Private mappings are handled in a simple way.
                  *
                  * NOTE: When userspace waits on a MAP_SHARED mapping, even if
                  * it's a read-only handle, it's expected that futexes attach to
-- -----         * the object not the particular process.  Therefore we use
-- -----         * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
-- -----         * mappings of _writable_ handles.
++ +++++         * the object not the particular process.
                  */
-- -----        if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
-- -----                key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
++ +++++        if (PageAnon(page)) {
++ +++++                key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                         key->private.mm = mm;
                         key->private.address = address;
- -     -                return 0;
- -     -        }
- -     -
- -     -        /*
- -     -         * Linear file mappings are also simple.
- -     -         */
- -     -        key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
- -     -        key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
- -     -        if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
- -     -                key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
- -     -                                     + vma->vm_pgoff);
-- -----                return 0;
++ +++++        } else {
++ +++++                key->both.offset |= FUT_OFF_INODE; /* inode-based key */
++ +++++                key->shared.inode = page->mapping->host;
++ +++++                key->shared.pgoff = page->index;
                 }
         
-- -----        /*
-  ----          * Linear file mappings are also simple.
- -     -         * We could walk the page table to read the non-linear
- -     -         * pte, and get the page index without fetching the page
- -     -         * from swap.  But that's a lot of code to duplicate here
- -     -         * for a rare case, so we simply fetch the page.
-- -----         */
-  ----         key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
-  ----         key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
-  ----         if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
-  ----                 key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
-  ----                                      + vma->vm_pgoff);
- -     -        err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
- -     -        if (err >= 0) {
- -     -                key->shared.pgoff =
- -     -                        page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
- -     -                put_page(page);
-- -----                return 0;
-- -----        }
- -     -        return err;
- -     -}
++ +++++        get_futex_key_refs(key);
         
-  ----         /*
-  ----          * We could walk the page table to read the non-linear
-  ----          * pte, and get the page index without fetching the page
-  ----          * from swap.  But that's a lot of code to duplicate here
-  ----          * for a rare case, so we simply fetch the page.
-  ----          */
-  ----         err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
-  ----         if (err >= 0) {
-  ----                 key->shared.pgoff =
-  ----                         page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-  ----                 put_page(page);
-  ----                 return 0;
-  ----         }
-  ----         return err;
-  ---- }
-  ---- 
-- -----/*
-- ----- * Take a reference to the resource addressed by a key.
-- ----- * Can be called while holding spinlocks.
-- ----- *
-- ----- */
-- -----static void get_futex_key_refs(union futex_key *key)
-- -----{
-- -----        if (key->both.ptr == NULL)
-- -----                return;
-- -----        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-- -----                case FUT_OFF_INODE:
-- -----                        atomic_inc(&key->shared.inode->i_count);
-- -----                        break;
-- -----                case FUT_OFF_MMSHARED:
-- -----                        atomic_inc(&key->private.mm->mm_count);
-- -----                        break;
-- -----        }
++ +++++        unlock_page(page);
++ +++++        put_page(page);
++ +++++        return 0;
         }
         
-- -----/*
-- ----- * Drop a reference to the resource addressed by a key.
-- ----- * The hash bucket spinlock must not be held.
-- ----- */
-- -----static void drop_futex_key_refs(union futex_key *key)
++ +++++static inline
++ +++++void put_futex_key(int fshared, union futex_key *key)
         {
-- -----        if (!key->both.ptr)
-- -----                return;
-- -----        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-- -----                case FUT_OFF_INODE:
-- -----                        iput(key->shared.inode);
-- -----                        break;
-- -----                case FUT_OFF_MMSHARED:
-- -----                        mmdrop(key->private.mm);
-- -----                        break;
-- -----        }
++ +++++        drop_futex_key_refs(key);
         }
         
         static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@@@@@@@@ -328,10 -328,10 -297,8 -328,10 -328,10 -328,10 -328,10 -328,10 +297,8 @@@@@@@@@ static int get_futex_value_locked(u32 *
         
         /*
          * Fault handling.
-- ----- * if fshared is non NULL, current->mm->mmap_sem is already held
          */
-- -----static int futex_handle_fault(unsigned long address,
-- -----                              struct rw_semaphore *fshared, int attempt)
++ +++++static int futex_handle_fault(unsigned long address, int attempt)
         {
                 struct vm_area_struct * vma;
                 struct mm_struct *mm = current->mm;
@@@@@@@@@ -340,8 -340,8 -307,7 -340,8 -340,8 -340,8 -340,8 -340,8 +307,7 @@@@@@@@@
                 if (attempt > 2)
                         return ret;
         
-- -----        if (!fshared)
-- -----                down_read(&mm->mmap_sem);
++ +++++        down_read(&mm->mmap_sem);
                 vma = find_vma(mm, address);
                 if (vma && address >= vma->vm_start &&
                     (vma->vm_flags & VM_WRITE)) {
@@@@@@@@@ -361,8 -361,8 -327,7 -361,8 -361,8 -361,8 -361,8 -361,8 +327,7 @@@@@@@@@
                                         current->min_flt++;
                         }
                 }
-- -----        if (!fshared)
-- -----                up_read(&mm->mmap_sem);
++ +++++        up_read(&mm->mmap_sem);
                 return ret;
         }
         
@@@@@@@@@ -385,6 -385,6 -350,7 -385,6 -385,6 -385,6 -385,6 -385,6 +350,7 @@@@@@@@@ static int refill_pi_state_cache(void
                 /* pi_mutex gets initialized later */
                 pi_state->owner = NULL;
                 atomic_set(&pi_state->refcount, 1);
++ +++++        pi_state->key = FUTEX_KEY_INIT;
         
                 current->pi_state_cache = pi_state;
         
@@@@@@@@@ -462,7 -462,7 -428,7 -462,7 -462,7 -462,7 -462,7 -462,7 +428,7 @@@@@@@@@ void exit_pi_state_list(struct task_str
                 struct list_head *next, *head = &curr->pi_state_list;
                 struct futex_pi_state *pi_state;
                 struct futex_hash_bucket *hb;
-- -----        union futex_key key;
++ +++++        union futex_key key = FUTEX_KEY_INIT;
         
                 if (!futex_cmpxchg_enabled)
                         return;
@@@@@@@@@ -719,20 -719,20 -685,17 -719,20 -719,20 -719,20 -719,20 -719,20 +685,17 @@@@@@@@@ double_lock_hb(struct futex_hash_bucke
          * Wake up all waiters hashed on the physical page that is mapped
          * to this virtual address:
          */
-- -----static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
-- -----                      int nr_wake, u32 bitset)
++ +++++static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
         {
                 struct futex_hash_bucket *hb;
                 struct futex_q *this, *next;
                 struct plist_head *head;
-- -----        union futex_key key;
++ +++++        union futex_key key = FUTEX_KEY_INIT;
                 int ret;
         
                 if (!bitset)
                         return -EINVAL;
         
-- -----        futex_lock_mm(fshared);
-- -----
                 ret = get_futex_key(uaddr, fshared, &key);
                 if (unlikely(ret != 0))
                         goto out;
@@@@@@@@@ -760,7 -760,7 -723,7 -760,7 -760,7 -760,7 -760,7 -760,7 +723,7 @@@@@@@@@
         
                 spin_unlock(&hb->lock);
         out:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &key);
                 return ret;
         }
         
@@@@@@@@@ -769,19 -769,19 -732,16 -769,19 -769,19 -769,19 -769,19 -769,19 +732,16 @@@@@@@@@
          * to this virtual address:
          */
         static int
-- -----futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
-- -----              u32 __user *uaddr2,
++ +++++futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
                       int nr_wake, int nr_wake2, int op)
         {
-- -----        union futex_key key1, key2;
++ +++++        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
                 struct futex_hash_bucket *hb1, *hb2;
                 struct plist_head *head;
                 struct futex_q *this, *next;
                 int ret, op_ret, attempt = 0;
         
         retryfull:
-- -----        futex_lock_mm(fshared);
-- -----
                 ret = get_futex_key(uaddr1, fshared, &key1);
                 if (unlikely(ret != 0))
                         goto out;
@@@@@@@@@ -826,18 -826,18 -786,12 -826,18 -826,18 -826,18 -826,18 -826,18 +786,12 @@@@@@@@@ retry
                          */
                         if (attempt++) {
                                 ret = futex_handle_fault((unsigned long)uaddr2,
-- -----                                                 fshared, attempt);
++ +++++                                                 attempt);
                                 if (ret)
                                         goto out;
                                 goto retry;
                         }
         
-- -----                /*
-- -----                 * If we would have faulted, release mmap_sem,
-- -----                 * fault it in and start all over again.
-- -----                 */
-- -----                futex_unlock_mm(fshared);
-- -----
                         ret = get_user(dummy, uaddr2);
                         if (ret)
                                 return ret;
@@@@@@@@@ -873,7 -873,7 -827,8 -873,7 -873,7 -873,7 -873,7 -873,7 +827,8 @@@@@@@@@
                 if (hb1 != hb2)
                         spin_unlock(&hb2->lock);
         out:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &key2);
++ +++++        put_futex_key(fshared, &key1);
         
                 return ret;
         }
@@@@@@@@@ -882,19 -882,19 -837,16 -882,19 -882,19 -882,19 -882,19 -882,19 +837,16 @@@@@@@@@
          * Requeue all waiters hashed on one physical page to another
          * physical page.
          */
-- -----static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
-- -----                         u32 __user *uaddr2,
++ +++++static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
                                  int nr_wake, int nr_requeue, u32 *cmpval)
         {
-- -----        union futex_key key1, key2;
++ +++++        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
                 struct futex_hash_bucket *hb1, *hb2;
                 struct plist_head *head1;
                 struct futex_q *this, *next;
                 int ret, drop_count = 0;
         
          retry:
-- -----        futex_lock_mm(fshared);
-- -----
                 ret = get_futex_key(uaddr1, fshared, &key1);
                 if (unlikely(ret != 0))
                         goto out;
@@@@@@@@@ -917,12 -917,12 -869,6 -917,12 -917,12 -917,12 -917,12 -917,12 +869,6 @@@@@@@@@
                                 if (hb1 != hb2)
                                         spin_unlock(&hb2->lock);
         
-- -----                        /*
-- -----                         * If we would have faulted, release mmap_sem, fault
-- -----                         * it in and start all over again.
-- -----                         */
-- -----                        futex_unlock_mm(fshared);
-- -----
                                 ret = get_user(curval, uaddr1);
         
                                 if (!ret)
@@@@@@@@@ -974,7 -974,7 -920,8 -974,7 -974,7 -974,7 -974,7 -974,7 +920,8 @@@@@@@@@ out_unlock
                         drop_futex_key_refs(&key1);
         
         out:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &key2);
++ +++++        put_futex_key(fshared, &key1);
                 return ret;
         }
         
@@@@@@@@@ -1096,8 -1096,8 -1043,7 -1096,8 -1096,8 -1096,8 -1096,8 -1096,8 +1043,7 @@@@@@@@@ static void unqueue_me_pi(struct futex_
          * private futexes.
          */
         static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-- -----                                struct task_struct *newowner,
-- -----                                struct rw_semaphore *fshared)
++ +++++                                struct task_struct *newowner, int fshared)
         {
                 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
                 struct futex_pi_state *pi_state = q->pi_state;
@@@@@@@@@ -1176,7 -1176,7 -1122,7 -1176,7 -1176,7 -1176,7 -1176,7 -1176,7 +1122,7 @@@@@@@@@ retry
         handle_fault:
                 spin_unlock(q->lock_ptr);
         
-- -----        ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
++ +++++        ret = futex_handle_fault((unsigned long)uaddr, attempt++);
         
                 spin_lock(q->lock_ptr);
         
@@@@@@@@@ -1200,7 -1200,7 -1146,7 -1200,7 -1200,7 -1200,7 -1200,7 -1200,7 +1146,7 @@@@@@@@@
         
         static long futex_wait_restart(struct restart_block *restart);
         
-- -----static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
++ +++++static int futex_wait(u32 __user *uaddr, int fshared,
                               u32 val, ktime_t *abs_time, u32 bitset)
         {
                 struct task_struct *curr = current;
@@@@@@@@@ -1218,8 -1218,8 -1164,7 -1218,8 -1218,8 -1218,8 -1218,8 -1218,8 +1164,7 @@@@@@@@@
                 q.pi_state = NULL;
                 q.bitset = bitset;
          retry:
-- -----        futex_lock_mm(fshared);
-- -----
++ +++++        q.key = FUTEX_KEY_INIT;
                 ret = get_futex_key(uaddr, fshared, &q.key);
                 if (unlikely(ret != 0))
                         goto out_release_sem;
@@@@@@@@@ -1251,12 -1251,12 -1196,6 -1251,12 -1251,12 -1251,12 -1251,12 -1251,12 +1196,6 @@@@@@@@@
                 if (unlikely(ret)) {
                         queue_unlock(&q, hb);
         
-- -----                /*
-- -----                 * If we would have faulted, release mmap_sem, fault it in and
-- -----                 * start all over again.
-- -----                 */
-- -----                futex_unlock_mm(fshared);
-- -----
                         ret = get_user(uval, uaddr);
         
                         if (!ret)
@@@@@@@@@ -1270,12 -1270,12 -1209,6 -1270,12 -1270,12 -1270,12 -1270,12 -1270,12 +1209,6 @@@@@@@@@
                 /* Only actually queue if *uaddr contained val.  */
                 queue_me(&q, hb);
         
-- -----        /*
-- -----         * Now the futex is queued and we have checked the data, we
-- -----         * don't want to hold mmap_sem while we sleep.
-- -----         */
-- -----        futex_unlock_mm(fshared);
-- -----
                 /*
                  * There might have been scheduling since the queue_me(), as we
                  * cannot hold a spinlock across the get_user() in case it
@@@@@@@@@ -1296,16 -1296,13 -1229,13 -1296,16 -1296,16 -1296,16 -1296,16 -1296,13 +1229,16 @@@@@@@@@
                         if (!abs_time)
                                 schedule();
                         else {
+ ++    +                        unsigned long slack;
+ ++    +                        slack = current->timer_slack_ns;
+ ++    +                        if (rt_task(current))
+ ++    +                                slack = 0;
                                 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC,
                                                         HRTIMER_MODE_ABS);
                                 hrtimer_init_sleeper(&t, current);
- --    -                        t.timer.expires = *abs_time;
+ ++    +                        hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
         
- --    -                        hrtimer_start(&t.timer, t.timer.expires,
- --    -                                                HRTIMER_MODE_ABS);
+ ++    +                        hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
                                 if (!hrtimer_active(&t.timer))
                                         t.task = NULL;
         
@@@@@@@@@ -1363,7 -1360,7 -1293,7 -1363,7 -1363,7 -1363,7 -1363,7 -1360,7 +1296,7 @@@@@@@@@
                 queue_unlock(&q, hb);
         
          out_release_sem:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &q.key);
                 return ret;
         }
         
@@@@@@@@@ -1371,13 -1368,13 -1301,13 -1371,13 -1371,13 -1371,13 -1371,13 -1368,13 +1304,13 @@@@@@@@@
         static long futex_wait_restart(struct restart_block *restart)
         {
                 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
-- -----        struct rw_semaphore *fshared = NULL;
++ +++++        int fshared = 0;
                 ktime_t t;
         
                 t.tv64 = restart->futex.time;
                 restart->fn = do_no_restart_syscall;
                 if (restart->futex.flags & FLAGS_SHARED)
-- -----                fshared = &current->mm->mmap_sem;
++ +++++                fshared = 1;
                 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
                                         restart->futex.bitset);
         }
@@@@@@@@@ -1389,7 -1386,7 -1319,7 -1389,7 -1389,7 -1389,7 -1389,7 -1386,7 +1322,7 @@@@@@@@@
          * if there are waiters then it will block, it does PI, etc. (Due to
          * races the kernel might see a 0 value of the futex too.)
          */
-- -----static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
++ +++++static int futex_lock_pi(u32 __user *uaddr, int fshared,
                                  int detect, ktime_t *time, int trylock)
         {
                 struct hrtimer_sleeper timeout, *to = NULL;
@@@@@@@@@ -1407,13 -1404,13 -1337,12 -1407,13 -1407,13 -1407,13 -1407,13 -1404,13 +1340,12 @@@@@@@@@
                         hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
                                               HRTIMER_MODE_ABS);
                         hrtimer_init_sleeper(to, current);
- --    -                to->timer.expires = *time;
+ ++    +                hrtimer_set_expires(&to->timer, *time);
                 }
         
                 q.pi_state = NULL;
          retry:
-- -----        futex_lock_mm(fshared);
-- -----
++ +++++        q.key = FUTEX_KEY_INIT;
                 ret = get_futex_key(uaddr, fshared, &q.key);
                 if (unlikely(ret != 0))
                         goto out_release_sem;
@@@@@@@@@ -1502,7 -1499,7 -1431,6 -1502,7 -1502,7 -1502,7 -1502,7 -1499,7 +1434,6 @@@@@@@@@
                                  * exit to complete.
                                  */
                                 queue_unlock(&q, hb);
-- -----                        futex_unlock_mm(fshared);
                                 cond_resched();
                                 goto retry;
         
@@@@@@@@@ -1534,12 -1531,12 -1462,6 -1534,12 -1534,12 -1534,12 -1534,12 -1531,12 +1465,6 @@@@@@@@@
                  */
                 queue_me(&q, hb);
         
-- -----        /*
-- -----         * Now the futex is queued and we have checked the data, we
-- -----         * don't want to hold mmap_sem while we sleep.
-- -----         */
-- -----        futex_unlock_mm(fshared);
-- -----
                 WARN_ON(!q.pi_state);
                 /*
                  * Block on the PI mutex:
@@@@@@@@@ -1552,7 -1549,7 -1474,6 -1552,7 -1552,7 -1552,7 -1552,7 -1549,7 +1477,6 @@@@@@@@@
                         ret = ret ? 0 : -EWOULDBLOCK;
                 }
         
-- -----        futex_lock_mm(fshared);
                 spin_lock(q.lock_ptr);
         
                 if (!ret) {
@@@@@@@@@ -1618,7 -1615,7 -1539,6 -1618,7 -1618,7 -1618,7 -1618,7 -1615,7 +1542,6 @@@@@@@@@
         
                 /* Unqueue and drop the lock */
                 unqueue_me_pi(&q);
-- -----        futex_unlock_mm(fshared);
         
                 if (to)
                         destroy_hrtimer_on_stack(&to->timer);
@@@@@@@@@ -1628,7 -1625,7 -1548,7 -1628,7 -1628,7 -1628,7 -1628,7 -1625,7 +1551,7 @@@@@@@@@
                 queue_unlock(&q, hb);
         
          out_release_sem:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &q.key);
                 if (to)
                         destroy_hrtimer_on_stack(&to->timer);
                 return ret;
@@@@@@@@@ -1645,15 -1642,15 -1565,12 -1645,15 -1645,15 -1645,15 -1645,15 -1642,15 +1568,12 @@@@@@@@@
                 queue_unlock(&q, hb);
         
                 if (attempt++) {
-- -----                ret = futex_handle_fault((unsigned long)uaddr, fshared,
-- -----                                         attempt);
++ +++++                ret = futex_handle_fault((unsigned long)uaddr, attempt);
                         if (ret)
                                 goto out_release_sem;
                         goto retry_unlocked;
                 }
         
-- -----        futex_unlock_mm(fshared);
-- -----
                 ret = get_user(uval, uaddr);
                 if (!ret && (uval != -EFAULT))
                         goto retry;
@@@@@@@@@ -1668,13 -1665,13 -1585,13 -1668,13 -1668,13 -1668,13 -1668,13 -1665,13 +1588,13 @@@@@@@@@
          * This is the in-kernel slowpath: we look up the PI state (if any),
          * and do the rt-mutex unlock.
          */
-- -----static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
++ +++++static int futex_unlock_pi(u32 __user *uaddr, int fshared)
         {
                 struct futex_hash_bucket *hb;
                 struct futex_q *this, *next;
                 u32 uval;
                 struct plist_head *head;
-- -----        union futex_key key;
++ +++++        union futex_key key = FUTEX_KEY_INIT;
                 int ret, attempt = 0;
         
         retry:
@@@@@@@@@ -1685,10 -1682,10 -1602,6 -1685,10 -1685,10 -1685,10 -1685,10 -1682,10 +1605,6 @@@@@@@@@
                  */
                 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
                         return -EPERM;
-- -----        /*
-- -----         * First take all the futex related locks:
-- -----         */
-- -----        futex_lock_mm(fshared);
         
                 ret = get_futex_key(uaddr, fshared, &key);
                 if (unlikely(ret != 0))
@@@@@@@@@ -1747,7 -1744,7 -1660,7 -1747,7 -1747,7 -1747,7 -1747,7 -1744,7 +1663,7 @@@@@@@@@ retry_unlocked
         out_unlock:
                 spin_unlock(&hb->lock);
         out:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &key);
         
                 return ret;
         
@@@@@@@@@ -1763,16 -1760,16 -1676,13 -1763,16 -1763,16 -1763,16 -1763,16 -1760,16 +1679,13 @@@@@@@@@ pi_faulted
                 spin_unlock(&hb->lock);
         
                 if (attempt++) {
-- -----                ret = futex_handle_fault((unsigned long)uaddr, fshared,
-- -----                                         attempt);
++ +++++                ret = futex_handle_fault((unsigned long)uaddr, attempt);
                         if (ret)
                                 goto out;
                         uval = 0;
                         goto retry_unlocked;
                 }
         
-- -----        futex_unlock_mm(fshared);
-- -----
                 ret = get_user(uval, uaddr);
                 if (!ret && (uval != -EFAULT))
                         goto retry;
@@@@@@@@@ -1898,8 -1895,8 -1808,7 -1898,8 -1898,8 -1898,8 -1898,8 -1895,8 +1811,7 @@@@@@@@@ retry
                          * PI futexes happens in exit_pi_state():
                          */
                         if (!pi && (uval & FUTEX_WAITERS))
-- -----                        futex_wake(uaddr, &curr->mm->mmap_sem, 1,
-- -----                                   FUTEX_BITSET_MATCH_ANY);
++ +++++                        futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
                 }
                 return 0;
         }
@@@@@@@@@ -1995,10 -1992,10 -1904,10 -1995,10 -1995,10 -1995,10 -1995,10 -1992,10 +1907,10 @@@@@@@@@ long do_futex(u32 __user *uaddr, int op
         {
                 int ret = -ENOSYS;
                 int cmd = op & FUTEX_CMD_MASK;
-- -----        struct rw_semaphore *fshared = NULL;
++ +++++        int fshared = 0;
         
                 if (!(op & FUTEX_PRIVATE_FLAG))
-- -----                fshared = &current->mm->mmap_sem;
++ +++++                fshared = 1;
         
                 switch (cmd) {
                 case FUTEX_WAIT:
diff --combined kernel/lockdep.c

index 06e157119d2b24d2b254c9a38b5ace0935a05886,dbda475b13bd62876490fc5a523d373d72c931cd,dbda475b13bd62876490fc5a523d373d72c931cd,a4285830323352666b3c3e31b4ec04f9e7331d1b,06e157119d2b24d2b254c9a38b5ace0935a05886,06e157119d2b24d2b254c9a38b5ace0935a05886,46a404173db231a982baf3941c72e96911003906,dbda475b13bd62876490fc5a523d373d72c931cd..e4bdda8dcf0457364928fc0685ff3cff569d4f76
--- 1/kernel/lockdep.c
--- 2/kernel/lockdep.c
--- 3/kernel/lockdep.c
--- 4/kernel/lockdep.c
--- 5/kernel/lockdep.c
--- 6/kernel/lockdep.c
--- 7/kernel/lockdep.c
--- 8/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@@@@@@@@ -136,16 -136,16 -136,16 -136,16 -136,16 -136,16 -136,16 -136,16 +136,16 @@@@@@@@@ static inline struct lock_class *hlock_
         #ifdef CONFIG_LOCK_STAT
         static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
         
--- ----static int lock_contention_point(struct lock_class *class, unsigned long ip)
+++ ++++static int lock_point(unsigned long points[], unsigned long ip)
         {
                 int i;
         
--- ----        for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
--- ----                if (class->contention_point[i] == 0) {
--- ----                        class->contention_point[i] = ip;
+++ ++++        for (i = 0; i < LOCKSTAT_POINTS; i++) {
+++ ++++                if (points[i] == 0) {
+++ ++++                        points[i] = ip;
                                 break;
                         }
--- ----                if (class->contention_point[i] == ip)
+++ ++++                if (points[i] == ip)
                                 break;
                 }
         
@@@@@@@@@ -185,6 -185,6 -185,6 -185,9 -185,6 -185,6 -185,6 -185,6 +185,9 @@@@@@@@@ struct lock_class_stats lock_stats(stru
                         for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
                                 stats.contention_point[i] += pcs->contention_point[i];
         
+++ ++++                for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
+++ ++++                        stats.contending_point[i] += pcs->contending_point[i];
+++ ++++
                         lock_time_add(&pcs->read_waittime, &stats.read_waittime);
                         lock_time_add(&pcs->write_waittime, &stats.write_waittime);
         
@@@@@@@@@ -209,6 -209,6 -209,6 -212,7 -209,6 -209,6 -209,6 -209,6 +212,7 @@@@@@@@@ void clear_lock_stats(struct lock_clas
                         memset(cpu_stats, 0, sizeof(struct lock_class_stats));
                 }
                 memset(class->contention_point, 0, sizeof(class->contention_point));
+++ ++++        memset(class->contending_point, 0, sizeof(class->contending_point));
         }
         
         static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@@@@@@@@ -2169,11 -2169,12 -2169,12 -2173,11 -2169,11 -2169,11 -2169,11 -2169,12 +2173,11 @@@@@@@@@ void early_boot_irqs_on(void
         /*
          * Hardirqs will be enabled:
          */
- --    -void trace_hardirqs_on_caller(unsigned long a0)
+ ++    +void trace_hardirqs_on_caller(unsigned long ip)
         {
                 struct task_struct *curr = current;
- --    -        unsigned long ip;
         
- --    -        time_hardirqs_on(CALLER_ADDR0, a0);
+ ++    +        time_hardirqs_on(CALLER_ADDR0, ip);
         
                 if (unlikely(!debug_locks || current->lockdep_recursion))
                         return;
@@@@@@@@@ -2187,6 -2188,7 -2188,7 -2191,6 -2187,6 -2187,6 -2187,6 -2188,7 +2191,6 @@@@@@@@@
                 }
                 /* we'll do an OFF -> ON transition: */
                 curr->hardirqs_enabled = 1;
- --    -        ip = (unsigned long) __builtin_return_address(0);
         
                 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                         return;
@@@@@@@@@ -2222,11 -2224,11 -2224,11 -2226,11 -2222,11 -2222,11 -2222,11 -2224,11 +2226,11 @@@@@@@@@ EXPORT_SYMBOL(trace_hardirqs_on)
         /*
          * Hardirqs were disabled:
          */
- --    -void trace_hardirqs_off_caller(unsigned long a0)
+ ++    +void trace_hardirqs_off_caller(unsigned long ip)
         {
                 struct task_struct *curr = current;
         
- --    -        time_hardirqs_off(CALLER_ADDR0, a0);
+ ++    +        time_hardirqs_off(CALLER_ADDR0, ip);
         
                 if (unlikely(!debug_locks || current->lockdep_recursion))
                         return;
@@@@@@@@@ -2239,7 -2241,7 -2241,7 -2243,7 -2239,7 -2239,7 -2239,7 -2241,7 +2243,7 @@@@@@@@@
                          * We have done an ON -> OFF transition:
                          */
                         curr->hardirqs_enabled = 0;
- --    -                curr->hardirq_disable_ip = _RET_IP_;
+ ++    +                curr->hardirq_disable_ip = ip;
                         curr->hardirq_disable_event = ++curr->irq_events;
                         debug_atomic_inc(&hardirqs_off_events);
                 } else
@@@@@@@@@ -2999,7 -3001,7 -3001,7 -3003,7 -2999,7 -2999,7 -2999,7 -3001,7 +3003,7 @@@@@@@@@ __lock_contended(struct lockdep_map *lo
                 struct held_lock *hlock, *prev_hlock;
                 struct lock_class_stats *stats;
                 unsigned int depth;
--- ----        int i, point;
+++ ++++        int i, contention_point, contending_point;
         
                 depth = curr->lockdep_depth;
                 if (DEBUG_LOCKS_WARN_ON(!depth))
@@@@@@@@@ -3023,18 -3025,18 -3025,18 -3027,22 -3023,18 -3023,18 -3023,18 -3025,18 +3027,22 @@@@@@@@@
         found_it:
                 hlock->waittime_stamp = sched_clock();
         
--- ----        point = lock_contention_point(hlock_class(hlock), ip);
+++ ++++        contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
+++ ++++        contending_point = lock_point(hlock_class(hlock)->contending_point,
+++ ++++                                      lock->ip);
         
                 stats = get_lock_stats(hlock_class(hlock));
--- ----        if (point < ARRAY_SIZE(stats->contention_point))
--- ----                stats->contention_point[point]++;
+++ ++++        if (contention_point < LOCKSTAT_POINTS)
+++ ++++                stats->contention_point[contention_point]++;
+++ ++++        if (contending_point < LOCKSTAT_POINTS)
+++ ++++                stats->contending_point[contending_point]++;
                 if (lock->cpu != smp_processor_id())
                         stats->bounces[bounce_contended + !!hlock->read]++;
                 put_lock_stats(stats);
         }
         
         static void
--- ----__lock_acquired(struct lockdep_map *lock)
+++ ++++__lock_acquired(struct lockdep_map *lock, unsigned long ip)
         {
                 struct task_struct *curr = current;
                 struct held_lock *hlock, *prev_hlock;
@@@@@@@@@ -3083,6 -3085,6 -3085,6 -3091,7 -3083,6 -3083,6 -3083,6 -3085,6 +3091,7 @@@@@@@@@ found_it
                 put_lock_stats(stats);
         
                 lock->cpu = cpu;
+++ ++++        lock->ip = ip;
         }
         
         void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@@@@@@@@ -3104,7 -3106,7 -3106,7 -3113,7 -3104,7 -3104,7 -3104,7 -3106,7 +3113,7 @@@@@@@@@
         }
         EXPORT_SYMBOL_GPL(lock_contended);
         
--- ----void lock_acquired(struct lockdep_map *lock)
+++ ++++void lock_acquired(struct lockdep_map *lock, unsigned long ip)
         {
                 unsigned long flags;
         
@@@@@@@@@ -3117,7 -3119,7 -3119,7 -3126,7 -3117,7 -3117,7 -3117,7 -3119,7 +3126,7 @@@@@@@@@
                 raw_local_irq_save(flags);
                 check_flags(flags);
                 current->lockdep_recursion = 1;
--- ----        __lock_acquired(lock);
+++ ++++        __lock_acquired(lock, ip);
                 current->lockdep_recursion = 0;
                 raw_local_irq_restore(flags);
         }
@@@@@@@@@ -3276,10 -3278,10 -3278,10 -3285,10 -3276,10 -3276,10 -3276,10 -3278,10 +3285,10 @@@@@@@@@ void __init lockdep_info(void
         {
                 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
         
------ -        printk("... MAX_LOCKDEP_SUBCLASSES:    %lu\n", MAX_LOCKDEP_SUBCLASSES);
++++++ +        printk("... MAX_LOCKDEP_SUBCLASSES:  %lu\n", MAX_LOCKDEP_SUBCLASSES);
                 printk("... MAX_LOCK_DEPTH:          %lu\n", MAX_LOCK_DEPTH);
                 printk("... MAX_LOCKDEP_KEYS:        %lu\n", MAX_LOCKDEP_KEYS);
------ -        printk("... CLASSHASH_SIZE:           %lu\n", CLASSHASH_SIZE);
++++++ +        printk("... CLASSHASH_SIZE:          %lu\n", CLASSHASH_SIZE);
                 printk("... MAX_LOCKDEP_ENTRIES:     %lu\n", MAX_LOCKDEP_ENTRIES);
                 printk("... MAX_LOCKDEP_CHAINS:      %lu\n", MAX_LOCKDEP_CHAINS);
                 printk("... CHAINHASH_SIZE:          %lu\n", CHAINHASH_SIZE);
@@@@@@@@@ -3415,10 -3417,9 -3417,9 -3424,10 -3415,10 -3415,10 -3415,10 -3417,9 +3424,10 @@@@@@@@@ retry
                         }
                         printk(" ignoring it.\n");
                         unlock = 0;
+ ++    +        } else {
+ ++    +                if (count != 10)
+ ++    +                        printk(KERN_CONT " locked it.\n");
                 }
- --    -        if (count != 10)
- --    -                printk(" locked it.\n");
         
                 do_each_thread(g, p) {
                         /*
diff --combined kernel/notifier.c

index 4282c0a40a57ada651b86c7dcce2389abf489448,0f39e398ef609cbf11ba04ce4977e14d2f8518fa,823be11584efef8ef1d344f484cbf8c3d4f9617e,4282c0a40a57ada651b86c7dcce2389abf489448,4282c0a40a57ada651b86c7dcce2389abf489448,4282c0a40a57ada651b86c7dcce2389abf489448,4282c0a40a57ada651b86c7dcce2389abf489448,4282c0a40a57ada651b86c7dcce2389abf489448..61d5aa5eced3466393582e4f566b63c468ea7cc3
--- 1/kernel/notifier.c
--- 2/kernel/notifier.c
--- 3/kernel/notifier.c
--- 4/kernel/notifier.c
--- 5/kernel/notifier.c
--- 6/kernel/notifier.c
--- 7/kernel/notifier.c
--- 8/kernel/notifier.c
+++ b/kernel/notifier.c
@@@@@@@@@ -82,6 -82,14 -82,6 -82,6 -82,6 -82,6 -82,6 -82,6 +82,14 @@@@@@@@@ static int __kprobes notifier_call_chai
         
                 while (nb && nr_to_call) {
                         next_nb = rcu_dereference(nb->next);
+ ++++++
+ ++++++#ifdef CONFIG_DEBUG_NOTIFIERS
+ ++++++                if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
+ ++++++                        WARN(1, "Invalid notifier called!");
+ ++++++                        nb = next_nb;
+ ++++++                        continue;
+ ++++++                }
+ ++++++#endif
                         ret = nb->notifier_call(nb, val, v);
         
                         if (nr_calls)
@@@@@@@@@ -550,7 -558,7 -550,7 -550,7 -550,7 -550,7 -550,7 -550,7 +558,7 @@@@@@@@@ EXPORT_SYMBOL(unregister_reboot_notifie
         
         static ATOMIC_NOTIFIER_HEAD(die_chain);
         
- --     int notify_die(enum die_val val, const char *str,
+ ++     int notrace notify_die(enum die_val val, const char *str,
                        struct pt_regs *regs, long err, int trap, int sig)
         {
                 struct die_args args = {
diff --combined kernel/sched.c

index 9b1e79371c207b37c1617d3f7c0460709a3cc39b,cc1f81b50b82dddb19658dc4d10dd419087a59fc,13dd2db9fb2dc185a4a95a86aab9f89b4850ccaf,2a106b6b78b09006f75274defb2057b6e7a428e7,e8819bc6f462c18761c11861b4808fd28223b431,b388c9b243e94c71e15df02692ae3fb3658482da,9b1e79371c207b37c1617d3f7c0460709a3cc39b,d906f72b42d23ae1d8c2355d9b605e5fd0761eaa..558e5f284269bfd59a23008ad4906b51269047b2
--- 1/kernel/sched.c
--- 2/kernel/sched.c
--- 3/kernel/sched.c
--- 4/kernel/sched.c
--- 5/kernel/sched.c
--- 6/kernel/sched.c
--- 7/kernel/sched.c
--- 8/kernel/sched.c
+++ b/kernel/sched.c
@@@@@@@@@ -55,7 -55,6 -55,6 -55,7 -55,7 -55,7 -55,7 -55,6 +55,7 @@@@@@@@@
         #include <linux/cpuset.h>
         #include <linux/percpu.h>
         #include <linux/kthread.h>
+ ++    +#include <linux/proc_fs.h>
         #include <linux/seq_file.h>
         #include <linux/sysctl.h>
         #include <linux/syscalls.h>
@@@@@@@@@ -72,7 -71,6 -71,6 -72,7 -72,7 -72,7 -72,7 -71,7 +72,7 @@@@@@@@@
         #include <linux/debugfs.h>
         #include <linux/ctype.h>
         #include <linux/ftrace.h>
+ ++     #include <trace/sched.h>
         
         #include <asm/tlb.h>
         #include <asm/irq_regs.h>
@@@@@@@@@ -203,19 -201,14 -201,14 -203,19 -203,19 -203,19 -203,19 -202,19 +203,19 @@@@@@@@@ void init_rt_bandwidth(struct rt_bandwi
                 hrtimer_init(&rt_b->rt_period_timer,
                                 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
                 rt_b->rt_period_timer.function = sched_rt_period_timer;
- --             rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ ++             rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
+ ++     }
+ ++     
+ ++     static inline int rt_bandwidth_enabled(void)
+ ++     {
+ ++             return sysctl_sched_rt_runtime >= 0;
         }
         
         static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
         {
                 ktime_t now;
         
- --             if (rt_b->rt_runtime == RUNTIME_INF)
+ ++             if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
                         return;
         
                 if (hrtimer_active(&rt_b->rt_period_timer))
@@@@@@@@@ -228,8 -221,9 -221,9 -228,8 -228,8 -228,8 -228,8 -227,9 +228,8 @@@@@@@@@
         
                         now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
                         hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
- --    -                hrtimer_start(&rt_b->rt_period_timer,
- --    -                              rt_b->rt_period_timer.expires,
- --    -                              HRTIMER_MODE_ABS);
+ ++    +                hrtimer_start_expires(&rt_b->rt_period_timer,
+ ++    +                                HRTIMER_MODE_ABS);
                 }
                 spin_unlock(&rt_b->rt_runtime_lock);
         }
@@@@@@@@@ -304,9 -298,9 -298,9 -304,9 -304,9 -304,9 -304,9 -304,9 +304,9 @@@@@@@@@ static DEFINE_PER_CPU(struct cfs_rq, in
         static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
         static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
         #endif /* CONFIG_RT_GROUP_SCHED */
- --     #else /* !CONFIG_FAIR_GROUP_SCHED */
+ ++     #else /* !CONFIG_USER_SCHED */
         #define root_task_group init_task_group
- --     #endif /* CONFIG_FAIR_GROUP_SCHED */
+ ++     #endif /* CONFIG_USER_SCHED */
         
         /* task_group_lock serializes add/remove of task groups and also changes to
          * a task group's cpu shares.
@@@@@@@@@ -386,6 -380,7 -380,7 -386,6 -386,6 -386,6 -386,6 -386,7 +386,6 @@@@@@@@@ struct cfs_rq 
         
                 u64 exec_clock;
                 u64 min_vruntime;
- --    -        u64 pair_start;
         
                 struct rb_root tasks_timeline;
                 struct rb_node *rb_leftmost;
@@@@@@@@@ -397,9 -392,9 -392,9 -397,9 -397,9 -397,9 -397,9 -398,9 +397,9 @@@@@@@@@
                  * 'curr' points to currently running entity on this cfs_rq.
                  * It is set to NULL otherwise (i.e when none are currently running).
                  */
- -- -  -        struct sched_entity *curr, *next;
+ ++ +  +        struct sched_entity *curr, *next, *last;
         
- -- -  -        unsigned long nr_spread_over;
+ ++ +  +        unsigned int nr_spread_over;
         
         #ifdef CONFIG_FAIR_GROUP_SCHED
                 struct rq *rq;  /* cpu runqueue to which this cfs_rq is attached */
@@@@@@@@@ -609,9 -604,9 -604,9 -609,9 -609,9 -609,9 -609,9 -610,9 +609,9 @@@@@@@@@ struct rq 
         
         static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
         
- --     static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
+ ++     static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
         {
- --             rq->curr->sched_class->check_preempt_curr(rq, p);
+ ++             rq->curr->sched_class->check_preempt_curr(rq, p, sync);
         }
         
         static inline int cpu_of(struct rq *rq)
@@@@@@@@@ -817,13 -812,6 -812,6 -817,13 -817,13 -817,13 -817,13 -818,6 +817,13 @@@@@@@@@ const_debug unsigned int sysctl_sched_n
          */
         unsigned int sysctl_sched_shares_ratelimit = 250000;
         
+ ++    +/*
+ ++    + * Inject some fuzzyness into changing the per-cpu group shares
+ ++    + * this avoids remote rq-locks at the expense of fairness.
+ ++    + * default: 4
+ ++    + */
+ ++    +unsigned int sysctl_sched_shares_thresh = 4;
+ ++    +
         /*
          * period over which we measure -rt task cpu usage in us.
          * default: 1s
@@@@@@@@@ -969,14 -957,6 -957,6 -969,14 -969,6 -969,14 -969,14 -963,6 +969,14 @@@@@@@@@ static struct rq *task_rq_lock(struct t
                 }
         }
         
+ ++ +  +void task_rq_unlock_wait(struct task_struct *p)
+ ++ +  +{
+ ++ +  +        struct rq *rq = task_rq(p);
+ ++ +  +
+ ++ +  +        smp_mb(); /* spin-unlock-wait is not a full memory barrier */
+ ++ +  +        spin_unlock_wait(&rq->lock);
+ ++ +  +}
+ ++ +  +
         static void __task_rq_unlock(struct rq *rq)
                 __releases(rq->lock)
         {
@@@@@@@@@ -1078,7 -1058,7 -1058,7 -1078,7 -1070,7 -1078,7 -1078,7 -1064,7 +1078,7 @@@@@@@@@ static void hrtick_start(struct rq *rq
                 struct hrtimer *timer = &rq->hrtick_timer;
                 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
         
- --    -        timer->expires = time;
+ ++    +        hrtimer_set_expires(timer, time);
         
                 if (rq == this_rq()) {
                         hrtimer_restart(timer);
@@@@@@@@@ -1107,7 -1087,7 -1087,7 -1107,7 -1099,7 -1107,7 -1107,7 -1093,7 +1107,7 @@@@@@@@@ hotplug_hrtick(struct notifier_block *n
                 return NOTIFY_DONE;
         }
         
- -      static void init_hrtick(void)
+ +      static __init void init_hrtick(void)
         {
                 hotcpu_notifier(hotplug_hrtick, 0);
         }
@@@@@@@@@ -1122,7 -1102,7 -1102,7 -1122,7 -1114,7 -1122,7 -1122,7 -1108,7 +1122,7 @@@@@@@@@ static void hrtick_start(struct rq *rq
                 hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
         }
         
- --     static void init_hrtick(void)
+ ++     static inline void init_hrtick(void)
         {
         }
         #endif /* CONFIG_SMP */
@@@@@@@@@ -1139,9 -1119,9 -1119,9 -1139,9 -1131,9 -1139,9 -1139,9 -1125,9 +1139,9 @@@@@@@@@ static void init_rq_hrtick(struct rq *r
         
                 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
                 rq->hrtick_timer.function = hrtick;
- --             rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ ++             rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
         }
- --     #else
+ ++     #else   /* CONFIG_SCHED_HRTICK */
         static inline void hrtick_clear(struct rq *rq)
         {
         }
@@@@@@@@@ -1153,7 -1133,7 -1133,7 -1153,7 -1145,7 -1153,7 -1153,7 -1139,7 +1153,7 @@@@@@@@@ static inline void init_rq_hrtick(struc
         static inline void init_hrtick(void)
         {
         }
- --     #endif
+ ++     #endif  /* CONFIG_SCHED_HRTICK */
         
         /*
          * resched_task - mark a task 'to be rescheduled now'.
@@@@@@@@@ -1400,24 -1380,38 -1380,38 -1400,24 -1392,24 -1400,24 -1400,24 -1386,24 +1400,24 @@@@@@@@@ static inline void dec_cpu_load(struct 
                 update_load_sub(&rq->load, load);
         }
         
- --     #ifdef CONFIG_SMP
- --     static unsigned long source_load(int cpu, int type);
- --     static unsigned long target_load(int cpu, int type);
- --     static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
- --     
- --     static unsigned long cpu_avg_load_per_task(int cpu)
- --     {
- --             struct rq *rq = cpu_rq(cpu);
- --     
- --             if (rq->nr_running)
- --                     rq->avg_load_per_task = rq->load.weight / rq->nr_running;
- --     
- --             return rq->avg_load_per_task;
- --     }
- --     
- --     #ifdef CONFIG_FAIR_GROUP_SCHED
- --     
- --     typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
+ ++     #if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
+ ++     typedef int (*tg_visitor)(struct task_group *, void *);
         
         /*
          * Iterate the full tree, calling @down when first entering a node and @up when
          * leaving it for the final time.
          */
- --     static void
- --     walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
+ ++     static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
         {
                 struct task_group *parent, *child;
+ ++             int ret;
         
                 rcu_read_lock();
                 parent = &root_task_group;
         down:
- --             (*down)(parent, cpu, sd);
+ ++             ret = (*down)(parent, data);
+ ++             if (ret)
+ ++                     goto out_unlock;
                 list_for_each_entry_rcu(child, &parent->children, siblings) {
                         parent = child;
                         goto down;
@@@@@@@@@ -1425,53 -1419,23 -1419,23 -1425,51 -1417,51 -1425,53 -1425,53 -1411,51 +1425,53 @@@@@@@@@
         up:
                         continue;
                 }
- --             (*up)(parent, cpu, sd);
+ ++             ret = (*up)(parent, data);
+ ++             if (ret)
+ ++                     goto out_unlock;
         
                 child = parent;
                 parent = parent->parent;
                 if (parent)
                         goto up;
+ ++     out_unlock:
                 rcu_read_unlock();
+ ++     
+ ++             return ret;
+ ++     }
+ ++     
+ ++     static int tg_nop(struct task_group *tg, void *data)
+ ++     {
+ ++             return 0;
+ ++     }
+ ++     #endif
+ ++     
+ ++     #ifdef CONFIG_SMP
+ ++     static unsigned long source_load(int cpu, int type);
+ ++     static unsigned long target_load(int cpu, int type);
+ ++     static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
+ ++     
+ ++     static unsigned long cpu_avg_load_per_task(int cpu)
+ ++     {
+ ++             struct rq *rq = cpu_rq(cpu);
+ ++     
+ ++             if (rq->nr_running)
+ ++                     rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+ ++++  +        else
+ ++++  +                rq->avg_load_per_task = 0;
+ ++     
+ ++             return rq->avg_load_per_task;
         }
         
+ ++     #ifdef CONFIG_FAIR_GROUP_SCHED
+ ++     
         static void __set_se_shares(struct sched_entity *se, unsigned long shares);
         
         /*
          * Calculate and set the cpu's group shares.
          */
         static void
- --    -__update_group_shares_cpu(struct task_group *tg, int cpu,
- --    -                          unsigned long sd_shares, unsigned long sd_rq_weight)
+ ++    +update_group_shares_cpu(struct task_group *tg, int cpu,
+ ++    +                        unsigned long sd_shares, unsigned long sd_rq_weight)
         {
                 int boost = 0;
                 unsigned long shares;
@@@@@@@@@ -1502,23 -1466,19 -1466,19 -1500,23 -1492,23 -1502,23 -1502,23 -1486,19 +1502,23 @@@@@@@@@
                  *
                  */
                 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+ ++    +        shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
         
- --    -        /*
- --    -         * record the actual number of shares, not the boosted amount.
- --    -         */
- --    -        tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
- --    -        tg->cfs_rq[cpu]->rq_weight = rq_weight;
+ ++    +        if (abs(shares - tg->se[cpu]->load.weight) >
+ ++    +                        sysctl_sched_shares_thresh) {
+ ++    +                struct rq *rq = cpu_rq(cpu);
+ ++    +                unsigned long flags;
         
- --    -        if (shares < MIN_SHARES)
- --    -                shares = MIN_SHARES;
- --    -        else if (shares > MAX_SHARES)
- --    -                shares = MAX_SHARES;
+ ++    +                spin_lock_irqsave(&rq->lock, flags);
+ ++    +                /*
+ ++    +                 * record the actual number of shares, not the boosted amount.
+ ++    +                 */
+ ++    +                tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
+ ++    +                tg->cfs_rq[cpu]->rq_weight = rq_weight;
         
- --    -        __set_se_shares(tg->se[cpu], shares);
+ ++    +                __set_se_shares(tg->se[cpu], shares);
+ ++    +                spin_unlock_irqrestore(&rq->lock, flags);
+ ++    +        }
         }
         
         /*
@@@@@@@@@ -1526,11 -1486,11 -1486,11 -1524,11 -1516,11 -1526,11 -1526,11 -1506,11 +1526,11 @@@@@@@@@
          * This needs to be done in a bottom-up fashion because the rq weight of a
          * parent group depends on the shares of its child groups.
          */
- --     static void
- --     tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
+ ++     static int tg_shares_up(struct task_group *tg, void *data)
         {
                 unsigned long rq_weight = 0;
                 unsigned long shares = 0;
+ ++             struct sched_domain *sd = data;
                 int i;
         
                 for_each_cpu_mask(i, sd->span) {
@@@@@@@@@ -1547,10 -1507,14 -1507,14 -1545,10 -1537,10 -1547,10 -1547,10 -1527,16 +1547,10 @@@@@@@@@
                 if (!rq_weight)
                         rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
         
- --    -        for_each_cpu_mask(i, sd->span) {
- --    -                struct rq *rq = cpu_rq(i);
- --    -                unsigned long flags;
-       -
-       -                spin_lock_irqsave(&rq->lock, flags);
-       -                __update_group_shares_cpu(tg, i, shares, rq_weight);
-       -                spin_unlock_irqrestore(&rq->lock, flags);
-       -        }
+ ++    +        for_each_cpu_mask(i, sd->span)
+ ++    +                update_group_shares_cpu(tg, i, shares, rq_weight);
         
- --                     spin_lock_irqsave(&rq->lock, flags);
- --                     __update_group_shares_cpu(tg, i, shares, rq_weight);
- --                     spin_unlock_irqrestore(&rq->lock, flags);
- --             }
+ ++             return 0;
         }
         
         /*
@@@@@@@@@ -1558,10 -1522,10 -1522,10 -1556,10 -1548,10 -1558,10 -1558,10 -1544,10 +1558,10 @@@@@@@@@
          * This needs to be done in a top-down fashion because the load of a child
          * group is a fraction of its parents load.
          */
- --     static void
- --     tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
+ ++     static int tg_load_down(struct task_group *tg, void *data)
         {
                 unsigned long load;
+ ++             long cpu = (long)data;
         
                 if (!tg->parent) {
                         load = cpu_rq(cpu)->load.weight;
@@@@@@@@@ -1572,8 -1536,11 -1536,11 -1570,8 -1562,8 -1572,8 -1572,8 -1558,8 +1572,8 @@@@@@@@@
                 }
         
                 tg->cfs_rq[cpu]->h_load = load;
- --     }
         
- --     static void
- --     tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
- --     {
+ ++             return 0;
         }
         
         static void update_shares(struct sched_domain *sd)
@@@@@@@@@ -1583,7 -1550,7 -1550,7 -1581,7 -1573,7 -1583,7 -1583,7 -1569,7 +1583,7 @@@@@@@@@
         
                 if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
                         sd->last_update = now;
- --                     walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
+ ++                     walk_tg_tree(tg_nop, tg_shares_up, sd);
                 }
         }
         
@@@@@@@@@ -1594,9 -1561,9 -1561,9 -1592,9 -1584,9 -1594,9 -1594,9 -1580,9 +1594,9 @@@@@@@@@ static void update_shares_locked(struc
                 spin_lock(&rq->lock);
         }
         
- --     static void update_h_load(int cpu)
+ ++     static void update_h_load(long cpu)
         {
- --             walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
+ ++             walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
         }
         
         #else
@@@@@@@@@ -1815,9 -1782,7 -1782,7 -1813,9 -1805,7 -1815,9 -1815,9 -1801,7 +1815,9 @@@@@@@@@ task_hot(struct task_struct *p, u64 now
                 /*
                  * Buddy candidates are cache hot:
                  */
- -- -  -        if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next))
+ ++ +  +        if (sched_feat(CACHE_HOT_BUDDY) &&
+ ++ +  +                        (&p->se == cfs_rq_of(&p->se)->next ||
+ ++ +  +                         &p->se == cfs_rq_of(&p->se)->last))
                         return 1;
         
                 if (p->sched_class != &fair_sched_class)
@@@@@@@@@ -1953,12 -1918,14 -1918,14 -1951,12 -1941,12 -1953,12 -1953,12 -1937,12 +1953,12 @@@@@@@@@ unsigned long wait_task_inactive(struc
                          * just go back and repeat.
                          */
                         rq = task_rq_lock(p, &flags);
+ ++                     trace_sched_wait_task(rq, p);
                         running = task_running(rq, p);
                         on_rq = p->se.on_rq;
                         ncsw = 0;
- --                     if (!match_state || p->state == match_state) {
- --                             ncsw = p->nivcsw + p->nvcsw;
- --                             if (unlikely(!ncsw))
- --                                     ncsw = 1;
- --                     }
+ ++                     if (!match_state || p->state == match_state)
+ ++                             ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
                         task_rq_unlock(rq, &flags);
         
                         /*
@@@@@@@@@ -2315,8 -2282,10 -2282,10 -2313,8 -2303,8 -2315,8 -2315,8 -2299,8 +2315,8 @@@@@@@@@ out_activate
                 success = 1;
         
         out_running:
- --             trace_mark(kernel_sched_wakeup,
- --                     "pid %d state %ld ## rq %p task %p rq->curr %p",
- --                     p->pid, p->state, rq, p, rq->curr);
- --             check_preempt_curr(rq, p);
+ ++             trace_sched_wakeup(rq, p);
+ ++             check_preempt_curr(rq, p, sync);
         
                 p->state = TASK_RUNNING;
         #ifdef CONFIG_SMP
@@@@@@@@@ -2448,8 -2417,10 -2417,10 -2446,8 -2436,8 -2448,8 -2448,8 -2432,8 +2448,8 @@@@@@@@@ void wake_up_new_task(struct task_struc
                         p->sched_class->task_new(rq, p);
                         inc_nr_running(rq);
                 }
- --             trace_mark(kernel_sched_wakeup_new,
- --                     "pid %d state %ld ## rq %p task %p rq->curr %p",
- --                     p->pid, p->state, rq, p, rq->curr);
- --             check_preempt_curr(rq, p);
+ ++             trace_sched_wakeup_new(rq, p);
+ ++             check_preempt_curr(rq, p, 0);
         #ifdef CONFIG_SMP
                 if (p->sched_class->task_wake_up)
                         p->sched_class->task_wake_up(rq, p);
@@@@@@@@@ -2621,7 -2592,11 -2592,11 -2619,7 -2609,7 -2621,7 -2621,7 -2605,7 +2621,7 @@@@@@@@@ context_switch(struct rq *rq, struct ta
                 struct mm_struct *mm, *oldmm;
         
                 prepare_task_switch(rq, prev, next);
- --             trace_mark(kernel_sched_schedule,
- --                     "prev_pid %d next_pid %d prev_state %ld "
- --                     "## rq %p prev %p next %p",
- --                     prev->pid, next->pid, prev->state,
- --                     rq, prev, next);
+ ++             trace_sched_switch(rq, prev, next);
                 mm = next->mm;
                 oldmm = prev->active_mm;
                 /*
@@@@@@@@@ -2861,7 -2836,6 -2836,6 -2859,7 -2849,7 -2861,7 -2861,7 -2845,7 +2861,7 @@@@@@@@@ static void sched_migrate_task(struct t
                     || unlikely(!cpu_active(dest_cpu)))
                         goto out;
         
+ ++             trace_sched_migrate_task(rq, p, dest_cpu);
                 /* force the process onto the specified CPU */
                 if (migrate_task(p, dest_cpu, &req)) {
                         /* Need to wait for migration thread (might exit: take ref). */
@@@@@@@@@ -2906,7 -2880,7 -2880,7 -2904,7 -2894,7 -2906,7 -2906,7 -2890,7 +2906,7 @@@@@@@@@ static void pull_task(struct rq *src_rq
                  * Note that idle threads have a prio of MAX_PRIO, for this test
                  * to be always true for them.
                  */
- --             check_preempt_curr(this_rq, p);
+ ++             check_preempt_curr(this_rq, p, 0);
         }
         
         /*
@@@@@@@@@ -3355,7 -3329,7 -3329,7 -3353,7 -3343,7 -3355,7 -3355,7 -3339,7 +3355,7 @@@@@@@@@ small_imbalance
                         } else
                                 this_load_per_task = cpu_avg_load_per_task(this_cpu);
         
- --    -                if (max_load - this_load + 2*busiest_load_per_task >=
+ ++    +                if (max_load - this_load + busiest_load_per_task >=
                                                 busiest_load_per_task * imbn) {
                                 *imbalance = busiest_load_per_task;
                                 return busiest;
@@@@@@@@@ -4063,26 -4037,23 -4037,23 -4061,26 -4051,26 -4063,26 -4063,26 -4047,26 +4063,26 @@@@@@@@@ DEFINE_PER_CPU(struct kernel_stat, ksta
         EXPORT_PER_CPU_SYMBOL(kstat);
         
         /*
- --      * Return p->sum_exec_runtime plus any more ns on the sched_clock
- --      * that have not yet been banked in case the task is currently running.
+ ++      * Return any ns on the sched_clock that have not yet been banked in
+ ++      * @p in case that task is currently running.
          */
- --     unsigned long long task_sched_runtime(struct task_struct *p)
+ ++     unsigned long long task_delta_exec(struct task_struct *p)
         {
                 unsigned long flags;
- --             u64 ns, delta_exec;
                 struct rq *rq;
+ ++             u64 ns = 0;
         
                 rq = task_rq_lock(p, &flags);
- --             ns = p->se.sum_exec_runtime;
+ ++     
                 if (task_current(rq, p)) {
+ ++                     u64 delta_exec;
+ ++     
                         update_rq_clock(rq);
                         delta_exec = rq->clock - p->se.exec_start;
                         if ((s64)delta_exec > 0)
- --                             ns += delta_exec;
+ ++                             ns = delta_exec;
                 }
+ ++     
                 task_rq_unlock(rq, &flags);
         
                 return ns;
@@@@@@@@@ -4099,7 -4070,6 -4070,6 -4097,7 -4087,7 -4099,7 -4099,7 -4083,7 +4099,7 @@@@@@@@@ void account_user_time(struct task_stru
                 cputime64_t tmp;
         
                 p->utime = cputime_add(p->utime, cputime);
+ ++             account_group_user_time(p, cputime);
         
                 /* Add user time to cpustat. */
                 tmp = cputime_to_cputime64(cputime);
@@@@@@@@@ -4124,7 -4094,6 -4094,6 -4122,7 -4112,7 -4124,7 -4124,7 -4108,7 +4124,7 @@@@@@@@@ static void account_guest_time(struct t
                 tmp = cputime_to_cputime64(cputime);
         
                 p->utime = cputime_add(p->utime, cputime);
+ ++             account_group_user_time(p, cputime);
                 p->gtime = cputime_add(p->gtime, cputime);
         
                 cpustat->user = cputime64_add(cpustat->user, tmp);
@@@@@@@@@ -4160,7 -4129,6 -4129,6 -4158,7 -4148,7 -4160,7 -4160,7 -4144,7 +4160,7 @@@@@@@@@ void account_system_time(struct task_st
                 }
         
                 p->stime = cputime_add(p->stime, cputime);
+ ++             account_group_system_time(p, cputime);
         
                 /* Add system time to cpustat. */
                 tmp = cputime_to_cputime64(cputime);
@@@@@@@@@ -4202,7 -4170,6 -4170,6 -4200,7 -4190,7 -4202,6 -4202,7 -4186,7 +4202,6 @@@@@@@@@ void account_steal_time(struct task_str
         
                 if (p == rq->idle) {
                         p->stime = cputime_add(p->stime, steal);
-  -- --                account_group_system_time(p, steal);
                         if (atomic_read(&rq->nr_iowait) > 0)
                                 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                         else
@@@@@@@@@ -4338,7 -4305,7 -4305,7 -4336,7 -4326,7 -4337,7 -4338,7 -4322,7 +4337,7 @@@@@@@@@ void __kprobes sub_preempt_count(int va
                 /*
                  * Underflow?
                  */
--- ----        if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+++ ++++       if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
                         return;
                 /*
                  * Is the spinlock portion underflowing?
@@@@@@@@@ -4459,8 -4426,12 -4426,12 -4457,8 -4447,8 -4458,8 -4459,8 -4443,12 +4458,8 @@@@@@@@@ need_resched_nonpreemptible
                 if (sched_feat(HRTICK))
                         hrtick_clear(rq);
         
- --    -        /*
- --    -         * Do the rq-clock update outside the rq lock:
- --    -         */
- --    -        local_irq_disable();
+ ++    +        spin_lock_irq(&rq->lock);
                 update_rq_clock(rq);
- --    -        spin_lock(&rq->lock);
                 clear_tsk_need_resched(prev);
         
                 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@@@@@@@@ -4656,15 -4627,6 -4627,6 -4654,15 -4644,15 -4655,15 -4656,15 -4644,15 +4655,15 @@@@@@@@@ __wake_up_sync(wait_queue_head_t *q, un
         }
         EXPORT_SYMBOL_GPL(__wake_up_sync);      /* For internal use only */
         
+ ++     /**
+ ++      * complete: - signals a single thread waiting on this completion
+ ++      * @x:  holds the state of this particular completion
+ ++      *
+ ++      * This will wake up a single thread waiting on this completion. Threads will be
+ ++      * awakened in the same order in which they were queued.
+ ++      *
+ ++      * See also complete_all(), wait_for_completion() and related routines.
+ ++      */
         void complete(struct completion *x)
         {
                 unsigned long flags;
@@@@@@@@@ -4676,12 -4638,6 -4638,6 -4674,12 -4664,12 -4675,12 -4676,12 -4664,12 +4675,12 @@@@@@@@@
         }
         EXPORT_SYMBOL(complete);
         
+ ++     /**
+ ++      * complete_all: - signals all threads waiting on this completion
+ ++      * @x:  holds the state of this particular completion
+ ++      *
+ ++      * This will wake up all threads waiting on this particular completion event.
+ ++      */
         void complete_all(struct completion *x)
         {
                 unsigned long flags;
@@@@@@@@@ -4702,7 -4658,10 -4658,10 -4700,7 -4690,7 -4701,7 -4702,7 -4690,7 +4701,7 @@@@@@@@@ do_wait_for_common(struct completion *x
                         wait.flags |= WQ_FLAG_EXCLUSIVE;
                         __add_wait_queue_tail(&x->wait, &wait);
                         do {
- --                             if ((state == TASK_INTERRUPTIBLE &&
- --                                  signal_pending(current)) ||
- --                                 (state == TASK_KILLABLE &&
- --                                  fatal_signal_pending(current))) {
+ ++                             if (signal_pending_state(state, current)) {
                                         timeout = -ERESTARTSYS;
                                         break;
                                 }
@@@@@@@@@ -4730,31 -4689,12 -4689,12 -4728,31 -4718,31 -4729,31 -4730,31 -4718,31 +4729,31 @@@@@@@@@ wait_for_common(struct completion *x, l
                 return timeout;
         }
         
+ ++     /**
+ ++      * wait_for_completion: - waits for completion of a task
+ ++      * @x:  holds the state of this particular completion
+ ++      *
+ ++      * This waits to be signaled for completion of a specific task. It is NOT
+ ++      * interruptible and there is no timeout.
+ ++      *
+ ++      * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
+ ++      * and interrupt capability. Also see complete().
+ ++      */
         void __sched wait_for_completion(struct completion *x)
         {
                 wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
         }
         EXPORT_SYMBOL(wait_for_completion);
         
+ ++     /**
+ ++      * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
+ ++      * @x:  holds the state of this particular completion
+ ++      * @timeout:  timeout value in jiffies
+ ++      *
+ ++      * This waits for either a completion of a specific task to be signaled or for a
+ ++      * specified timeout to expire. The timeout is in jiffies. It is not
+ ++      * interruptible.
+ ++      */
         unsigned long __sched
         wait_for_completion_timeout(struct completion *x, unsigned long timeout)
         {
@@@@@@@@@ -4762,13 -4702,6 -4702,6 -4760,13 -4750,13 -4761,13 -4762,13 -4750,13 +4761,13 @@@@@@@@@
         }
         EXPORT_SYMBOL(wait_for_completion_timeout);
         
+ ++     /**
+ ++      * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
+ ++      * @x:  holds the state of this particular completion
+ ++      *
+ ++      * This waits for completion of a specific task to be signaled. It is
+ ++      * interruptible.
+ ++      */
         int __sched wait_for_completion_interruptible(struct completion *x)
         {
                 long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
@@@@@@@@@ -4778,14 -4711,6 -4711,6 -4776,14 -4766,14 -4777,14 -4778,14 -4766,14 +4777,14 @@@@@@@@@
         }
         EXPORT_SYMBOL(wait_for_completion_interruptible);
         
+ ++     /**
+ ++      * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
+ ++      * @x:  holds the state of this particular completion
+ ++      * @timeout:  timeout value in jiffies
+ ++      *
+ ++      * This waits for either a completion of a specific task to be signaled or for a
+ ++      * specified timeout to expire. It is interruptible. The timeout is in jiffies.
+ ++      */
         unsigned long __sched
         wait_for_completion_interruptible_timeout(struct completion *x,
                                                   unsigned long timeout)
@@@@@@@@@ -4794,13 -4719,6 -4719,6 -4792,13 -4782,13 -4793,13 -4794,13 -4782,13 +4793,13 @@@@@@@@@
         }
         EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
         
+ ++     /**
+ ++      * wait_for_completion_killable: - waits for completion of a task (killable)
+ ++      * @x:  holds the state of this particular completion
+ ++      *
+ ++      * This waits to be signaled for completion of a specific task. It can be
+ ++      * interrupted by a kill signal.
+ ++      */
         int __sched wait_for_completion_killable(struct completion *x)
         {
                 long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
@@@@@@@@@ -5203,8 -5121,7 -5121,7 -5201,8 -5191,8 -5202,8 -5203,8 -5191,8 +5202,8 @@@@@@@@@ recheck
                          * Do not allow realtime tasks into groups that have no runtime
                          * assigned.
                          */
- --                     if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+ ++                     if (rt_bandwidth_enabled() && rt_policy(policy) &&
+ ++                                     task_group(p)->rt_bandwidth.rt_runtime == 0)
                                 return -EPERM;
         #endif
         
@@@@@@@@@ -5870,8 -5787,6 -5787,6 -5868,6 -5858,6 -5869,8 -5870,8 -5858,6 +5869,8 @@@@@@@@@ void __cpuinit init_idle(struct task_st
                 struct rq *rq = cpu_rq(cpu);
                 unsigned long flags;
         
+ ++++  +        spin_lock_irqsave(&rq->lock, flags);
+ ++++  +
                 __sched_fork(idle);
                 idle->se.exec_start = sched_clock();
         
@@@@@@@@@ -5879,6 -5794,7 -5794,7 -5875,7 -5865,7 -5878,6 -5879,6 -5865,7 +5878,6 @@@@@@@@@
                 idle->cpus_allowed = cpumask_of_cpu(cpu);
                 __set_task_cpu(idle, cpu);
         
- ----  -        spin_lock_irqsave(&rq->lock, flags);
                 rq->curr = rq->idle = idle;
         #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
                 idle->oncpu = 1;
@@@@@@@@@ -6041,7 -5957,7 -5957,7 -6038,7 -6028,7 -6040,7 -6041,7 -6028,7 +6040,7 @@@@@@@@@ static int __migrate_task(struct task_s
                 set_task_cpu(p, dest_cpu);
                 if (on_rq) {
                         activate_task(rq_dest, p, 0);
- --                     check_preempt_curr(rq_dest, p);
+ ++                     check_preempt_curr(rq_dest, p, 0);
                 }
         done:
                 ret = 1;
@@@@@@@@@ -6366,7 -6282,7 -6282,7 -6363,7 -6353,7 -6365,7 -6366,7 -6353,7 +6365,7 @@@@@@@@@ set_table_entry(struct ctl_table *entry
         static struct ctl_table *
         sd_alloc_ctl_domain_table(struct sched_domain *sd)
         {
- --             struct ctl_table *table = sd_alloc_ctl_entry(12);
+ ++             struct ctl_table *table = sd_alloc_ctl_entry(13);
         
                 if (table == NULL)
                         return NULL;
@@@@@@@@@ -6394,9 -6310,7 -6310,7 -6391,9 -6381,9 -6393,9 -6394,9 -6381,9 +6393,9 @@@@@@@@@
                         sizeof(int), 0644, proc_dointvec_minmax);
                 set_table_entry(&table[10], "flags", &sd->flags,
                         sizeof(int), 0644, proc_dointvec_minmax);
- --             /* &table[11] is terminator */
+ ++             set_table_entry(&table[11], "name", sd->name,
+ ++                     CORENAME_MAX_SIZE, 0444, proc_dostring);
+ ++             /* &table[12] is terminator */
         
                 return table;
         }
@@@@@@@@@ -6888,17 -6802,15 -6802,15 -6885,17 -6875,15 -6887,17 -6888,17 -6875,15 +6887,17 @@@@@@@@@ cpu_attach_domain(struct sched_domain *
                 struct sched_domain *tmp;
         
                 /* Remove the sched domains which do not contribute to scheduling. */
- -- -  -        for (tmp = sd; tmp; tmp = tmp->parent) {
+ ++ +  +        for (tmp = sd; tmp; ) {
                         struct sched_domain *parent = tmp->parent;
                         if (!parent)
                                 break;
+ ++ +  +
                         if (sd_parent_degenerate(tmp, parent)) {
                                 tmp->parent = parent->parent;
                                 if (parent->parent)
                                         parent->parent->child = tmp;
- -- -  -                }
+ ++ +  +                } else
+ ++ +  +                        tmp = tmp->parent;
                 }
         
                 if (sd && sd_degenerate(sd)) {
@@@@@@@@@ -7282,21 -7194,13 -7194,13 -7279,21 -7267,21 -7281,21 -7282,21 -7267,21 +7281,21 @@@@@@@@@ static void init_sched_groups_power(in
          * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
          */
         
+ ++     #ifdef CONFIG_SCHED_DEBUG
+ ++     # define SD_INIT_NAME(sd, type)         sd->name = #type
+ ++     #else
+ ++     # define SD_INIT_NAME(sd, type)         do { } while (0)
+ ++     #endif
+ ++     
         #define SD_INIT(sd, type)       sd_init_##type(sd)
+ ++     
         #define SD_INIT_FUNC(type)      \
         static noinline void sd_init_##type(struct sched_domain *sd)    \
         {                                                               \
                 memset(sd, 0, sizeof(*sd));                             \
                 *sd = SD_##type##_INIT;                                 \
                 sd->level = SD_LV_##type;                               \
+ ++             SD_INIT_NAME(sd, type);                                 \
         }
         
         SD_INIT_FUNC(CPU)
@@@@@@@@@ -7687,7 -7591,6 -7591,6 -7684,7 -7672,6 -7686,7 -7687,7 -7672,6 +7686,7 @@@@@@@@@ static int __build_sched_domains(const 
         error:
                 free_sched_groups(cpu_map, tmpmask);
                 SCHED_CPUMASK_FREE((void *)allmasks);
+ ++ +  +        kfree(rd);
                 return -ENOMEM;
         #endif
         }
@@@@@@@@@ -7789,14 -7692,13 -7692,13 -7786,13 -7773,13 -7788,13 -7789,14 -7773,13 +7788,14 @@@@@@@@@ static int dattrs_equal(struct sched_do
          *
          * The passed in 'doms_new' should be kmalloc'd. This routine takes
          * ownership of it and will kfree it when done with it. If the caller
- ----- - * failed the kmalloc call, then it can pass in doms_new == NULL,
- ----- - * and partition_sched_domains() will fallback to the single partition
- ----- - * 'fallback_doms', it also forces the domains to be rebuilt.
+ +++++ + * failed the kmalloc call, then it can pass in doms_new == NULL &&
+ +++++ + * ndoms_new == 1, and partition_sched_domains() will fallback to
+ +++++ + * the single partition 'fallback_doms', it also forces the domains
+ +++++ + * to be rebuilt.
          *
- ----- - * If doms_new==NULL it will be replaced with cpu_online_map.
- ----- - * ndoms_new==0 is a special case for destroying existing domains.
- ----- - * It will not create the default domain.
+ +++++ + * If doms_new == NULL it will be replaced with cpu_online_map.
+ +++++ + * ndoms_new == 0 is a special case for destroying existing domains,
+ +++++ + * and it will not create the default domain.
          *
          * Call with hotplug lock held
          */
@@@@@@@@@ -8340,25 -8242,20 -8242,20 -8336,25 -8323,25 -8338,25 -8340,25 -8323,25 +8339,25 @@@@@@@@@ void __might_sleep(char *file, int line
         #ifdef in_atomic
                 static unsigned long prev_jiffy;        /* ratelimiting */
         
- --             if ((in_atomic() || irqs_disabled()) &&
- --                 system_state == SYSTEM_RUNNING && !oops_in_progress) {
- --                     if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
- --                             return;
- --                     prev_jiffy = jiffies;
- --                     printk(KERN_ERR "BUG: sleeping function called from invalid"
- --                                     " context at %s:%d\n", file, line);
- --                     printk("in_atomic():%d, irqs_disabled():%d\n",
- --                             in_atomic(), irqs_disabled());
- --                     debug_show_held_locks(current);
- --                     if (irqs_disabled())
- --                             print_irqtrace_events(current);
- --                     dump_stack();
- --             }
+ ++             if ((!in_atomic() && !irqs_disabled()) ||
+ ++                         system_state != SYSTEM_RUNNING || oops_in_progress)
+ ++                     return;
+ ++             if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+ ++                     return;
+ ++             prev_jiffy = jiffies;
+ ++     
+ ++             printk(KERN_ERR
+ ++                     "BUG: sleeping function called from invalid context at %s:%d\n",
+ ++                             file, line);
+ ++             printk(KERN_ERR
+ ++                     "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+ ++                             in_atomic(), irqs_disabled(),
+ ++                             current->pid, current->comm);
+ ++     
+ ++             debug_show_held_locks(current);
+ ++             if (irqs_disabled())
+ ++                     print_irqtrace_events(current);
+ ++             dump_stack();
         #endif
         }
         EXPORT_SYMBOL(__might_sleep);
@@@@@@@@@ -8856,95 -8753,73 -8753,73 -8852,95 -8839,95 -8854,95 -8856,95 -8839,95 +8855,95 @@@@@@@@@ static DEFINE_MUTEX(rt_constraints_mute
         static unsigned long to_ratio(u64 period, u64 runtime)
         {
                 if (runtime == RUNTIME_INF)
- --                     return 1ULL << 16;
+ ++                     return 1ULL << 20;
         
- --             return div64_u64(runtime << 16, period);
+ ++             return div64_u64(runtime << 20, period);
         }
         
- --     #ifdef CONFIG_CGROUP_SCHED
- --     static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+ ++     /* Must be called with tasklist_lock held */
+ ++     static inline int tg_has_rt_tasks(struct task_group *tg)
         {
- --             struct task_group *tgi, *parent = tg->parent;
- --             unsigned long total = 0;
+ ++             struct task_struct *g, *p;
         
- --             if (!parent) {
- --                     if (global_rt_period() < period)
- --                             return 0;
+ ++             do_each_thread(g, p) {
+ ++                     if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
+ ++                             return 1;
+ ++             } while_each_thread(g, p);
         
- --                     return to_ratio(period, runtime) <
- --                             to_ratio(global_rt_period(), global_rt_runtime());
- --             }
+ ++             return 0;
+ ++     }
         
- --             if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
- --                     return 0;
+ ++     struct rt_schedulable_data {
+ ++             struct task_group *tg;
+ ++             u64 rt_period;
+ ++             u64 rt_runtime;
+ ++     };
         
- --             rcu_read_lock();
- --             list_for_each_entry_rcu(tgi, &parent->children, siblings) {
- --                     if (tgi == tg)
- --                             continue;
+ ++     static int tg_schedulable(struct task_group *tg, void *data)
+ ++     {
+ ++             struct rt_schedulable_data *d = data;
+ ++             struct task_group *child;
+ ++             unsigned long total, sum = 0;
+ ++             u64 period, runtime;
         
- --                     total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
- --                                     tgi->rt_bandwidth.rt_runtime);
+ ++             period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+ ++             runtime = tg->rt_bandwidth.rt_runtime;
+ ++     
+ ++             if (tg == d->tg) {
+ ++                     period = d->rt_period;
+ ++                     runtime = d->rt_runtime;
                 }
- --             rcu_read_unlock();
         
- --             return total + to_ratio(period, runtime) <=
- --                     to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
- --                                     parent->rt_bandwidth.rt_runtime);
- --     }
- --     #elif defined CONFIG_USER_SCHED
- --     static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
- --     {
- --             struct task_group *tgi;
- --             unsigned long total = 0;
- --             unsigned long global_ratio =
- --                     to_ratio(global_rt_period(), global_rt_runtime());
+ ++             /*
+ ++              * Cannot have more runtime than the period.
+ ++              */
+ ++             if (runtime > period && runtime != RUNTIME_INF)
+ ++                     return -EINVAL;
         
- --             rcu_read_lock();
- --             list_for_each_entry_rcu(tgi, &task_groups, list) {
- --                     if (tgi == tg)
- --                             continue;
+ ++             /*
+ ++              * Ensure we don't starve existing RT tasks.
+ ++              */
+ ++             if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
+ ++                     return -EBUSY;
+ ++     
+ ++             total = to_ratio(period, runtime);
         
- --                     total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
- --                                     tgi->rt_bandwidth.rt_runtime);
+ ++             /*
+ ++              * Nobody can have more than the global setting allows.
+ ++              */
+ ++             if (total > to_ratio(global_rt_period(), global_rt_runtime()))
+ ++                     return -EINVAL;
+ ++     
+ ++             /*
+ ++              * The sum of our children's runtime should not exceed our own.
+ ++              */
+ ++             list_for_each_entry_rcu(child, &tg->children, siblings) {
+ ++                     period = ktime_to_ns(child->rt_bandwidth.rt_period);
+ ++                     runtime = child->rt_bandwidth.rt_runtime;
+ ++     
+ ++                     if (child == d->tg) {
+ ++                             period = d->rt_period;
+ ++                             runtime = d->rt_runtime;
+ ++                     }
+ ++     
+ ++                     sum += to_ratio(period, runtime);
                 }
- --             rcu_read_unlock();
         
- --             return total + to_ratio(period, runtime) < global_ratio;
+ ++             if (sum > total)
+ ++                     return -EINVAL;
+ ++     
+ ++             return 0;
         }
- --     #endif
         
- --     /* Must be called with tasklist_lock held */
- --     static inline int tg_has_rt_tasks(struct task_group *tg)
+ ++     static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
         {
- --             struct task_struct *g, *p;
- --             do_each_thread(g, p) {
- --                     if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
- --                             return 1;
- --             } while_each_thread(g, p);
- --             return 0;
+ ++             struct rt_schedulable_data data = {
+ ++                     .tg = tg,
+ ++                     .rt_period = period,
+ ++                     .rt_runtime = runtime,
+ ++             };
+ ++     
+ ++             return walk_tg_tree(tg_schedulable, tg_nop, &data);
         }
         
         static int tg_set_bandwidth(struct task_group *tg,
@@@@@@@@@ -8954,9 -8829,14 -8829,14 -8950,9 -8937,9 -8952,9 -8954,9 -8937,9 +8953,9 @@@@@@@@@
         
                 mutex_lock(&rt_constraints_mutex);
                 read_lock(&tasklist_lock);
- --             if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
- --                     err = -EBUSY;
- --                     goto unlock;
- --             }
- --             if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
- --                     err = -EINVAL;
+ ++             err = __rt_schedulable(tg, rt_period, rt_runtime);
+ ++             if (err)
                         goto unlock;
- --             }
         
                 spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
                 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@@@@@@@@ -9025,25 -8905,16 -8905,19 -9021,25 -9008,25 -9023,25 -9025,25 -9008,25 +9024,25 @@@@@@@@@ long sched_group_rt_period(struct task_
         
         static int sched_rt_global_constraints(void)
         {
- --             struct task_group *tg = &root_task_group;
- --             u64 rt_runtime, rt_period;
+ ++             u64 runtime, period;
                 int ret = 0;
         
- -              rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
- -              rt_runtime = tg->rt_bandwidth.rt_runtime;
+ +              if (sysctl_sched_rt_period <= 0)
+ +                      return -EINVAL;
+ +      
-  -             rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
-  -             rt_runtime = tg->rt_bandwidth.rt_runtime;
+ ++             runtime = global_rt_runtime();
+ ++             period = global_rt_period();
+ ++     
+ ++             /*
+ ++              * Sanity check on the sysctl variables.
+ ++              */
+ ++             if (runtime > period && runtime != RUNTIME_INF)
+ ++                     return -EINVAL;
         
                 mutex_lock(&rt_constraints_mutex);
- --             if (!__rt_schedulable(tg, rt_period, rt_runtime))
- --                     ret = -EINVAL;
+ ++             read_lock(&tasklist_lock);
+ ++             ret = __rt_schedulable(NULL, 0, 0);
+ ++             read_unlock(&tasklist_lock);
                 mutex_unlock(&rt_constraints_mutex);
         
                 return ret;
@@@@@@@@@ -9054,9 -8925,6 -8928,9 -9050,9 -9037,9 -9052,9 -9054,9 -9037,9 +9053,9 @@@@@@@@@ static int sched_rt_global_constraints(
                 unsigned long flags;
                 int i;
         
+ +              if (sysctl_sched_rt_period <= 0)
+ +                      return -EINVAL;
+ +      
                 spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
                 for_each_possible_cpu(i) {
                         struct rt_rq *rt_rq = &cpu_rq(i)->rt;
@@@@@@@@@ -9117,6 -8985,7 -8991,7 -9113,6 -9100,6 -9115,6 -9117,6 -9100,6 +9116,6 @@@@@@@@@ cpu_cgroup_create(struct cgroup_subsys 
         
                 if (!cgrp->parent) {
                         /* This is early initialization for the top cgroup */
- --                     init_task_group.css.cgroup = cgrp;
                         return &init_task_group.css;
                 }
         
@@@@@@@@@ -9125,6 -8994,9 -9000,9 -9121,6 -9108,6 -9123,6 -9125,6 -9108,6 +9124,6 @@@@@@@@@
                 if (IS_ERR(tg))
                         return ERR_PTR(-ENOMEM);
         
- --             /* Bind the cgroup to task_group object we just created */
- --             tg->css.cgroup = cgrp;
- --     
                 return &tg->css;
         }
         
diff --combined kernel/softlockup.c

index 3953e4aed733d32284f48ecfffa1fbff05e234c6,b9a528f22736adcfa78d9cd99209ba286d585a54,cb838ee93a82000bc5b313271487836e9fb5804e,3953e4aed733d32284f48ecfffa1fbff05e234c6,3953e4aed733d32284f48ecfffa1fbff05e234c6,3953e4aed733d32284f48ecfffa1fbff05e234c6,3953e4aed733d32284f48ecfffa1fbff05e234c6,3953e4aed733d32284f48ecfffa1fbff05e234c6..884e6cd2769c348a846d103122b6e49e20acae85
--- 1/kernel/softlockup.c
--- 2/kernel/softlockup.c
--- 3/kernel/softlockup.c
--- 4/kernel/softlockup.c
--- 5/kernel/softlockup.c
--- 6/kernel/softlockup.c
--- 7/kernel/softlockup.c
--- 8/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@@@@@@@@ -164,7 -164,7 -164,7 -164,7 -164,7 -164,7 -164,7 -164,7 +164,7 @@@@@@@@@ unsigned long __read_mostly sysctl_hung
         /*
          * Zero means infinite timeout - no checking done:
          */
- ------unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+ ++++++unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
         
         unsigned long __read_mostly sysctl_hung_task_warnings = 10;
         
@@@@@@@@@ -226,7 -226,7 -226,7 -226,7 -226,7 -226,7 -226,7 -226,7 +226,7 @@@@@@@@@ static void check_hung_uninterruptible_
                  * If the system crashed already then all bets are off,
                  * do not report extra hung tasks:
                  */
- --             if ((tainted & TAINT_DIE) || did_panic)
+ ++             if (test_taint(TAINT_DIE) || did_panic)
                         return;
         
                 read_lock(&tasklist_lock);
diff --combined lib/Kconfig.debug

index b0f239e443bc0fbb11a27ee98dbaf4e641d21971,4116e10ea14ace405520b27d41c12e2b751e63b6,0b504814e378067ff120b266e5b26c9fdd6fb90e,b0f239e443bc0fbb11a27ee98dbaf4e641d21971,b0f239e443bc0fbb11a27ee98dbaf4e641d21971,b0f239e443bc0fbb11a27ee98dbaf4e641d21971,b0f239e443bc0fbb11a27ee98dbaf4e641d21971,b0f239e443bc0fbb11a27ee98dbaf4e641d21971..1e3fd3e3436abf6e142c9f0b46cc6993cd19ec38
--- 1/lib/Kconfig.debug
--- 2/lib/Kconfig.debug
--- 3/lib/Kconfig.debug
--- 4/lib/Kconfig.debug
--- 5/lib/Kconfig.debug
--- 6/lib/Kconfig.debug
--- 7/lib/Kconfig.debug
--- 8/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@@@@@@@ -495,15 -495,6 -495,6 -495,15 -495,15 -495,15 -495,15 -495,15 +495,15 @@@@@@@@@ config DEBUG_V
         
                   If unsure, say N.
         
+ ++     config DEBUG_VIRTUAL
+ ++             bool "Debug VM translations"
+ ++             depends on DEBUG_KERNEL && X86
+ ++             help
+ ++               Enable some costly sanity checks in virtual to page code. This can
+ ++               catch mistakes with virt_to_page() and friends.
+ ++     
+ ++               If unsure, say N.
+ ++     
         config DEBUG_WRITECOUNT
                 bool "Debug filesystem writers count"
                 depends on DEBUG_KERNEL
@@@@@@@@@ -545,6 -536,16 -536,6 -545,6 -545,6 -545,6 -545,6 -545,6 +545,16 @@@@@@@@@ config DEBUG_S
         
                   If unsure, say N.
         
+ ++++++config DEBUG_NOTIFIERS
+ ++++++        bool "Debug notifier call chains"
+ ++++++        depends on DEBUG_KERNEL
+ ++++++        help
+ ++++++          Enable this to turn on sanity checking for notifier call chains.
+ ++++++          This is most useful for kernel developers to make sure that
+ ++++++          modules properly unregister themselves from notifier chains.
+ ++++++          This is a relatively cheap check but if you care about maximum
+ ++++++          performance, say N.
+ ++++++
         config FRAME_POINTER
                 bool "Compile the kernel with frame pointers"
                 depends on DEBUG_KERNEL && \
@@@@@@@@@ -606,19 -607,6 -597,6 -606,19 -606,19 -606,19 -606,19 -606,19 +616,19 @@@@@@@@@ config RCU_TORTURE_TEST_RUNNABL
                   Say N here if you want the RCU torture tests to start only
                   after being manually enabled via /proc.
         
+ ++     config RCU_CPU_STALL_DETECTOR
+ ++             bool "Check for stalled CPUs delaying RCU grace periods"
+ ++             depends on CLASSIC_RCU
+ ++             default n
+ ++             help
+ ++               This option causes RCU to printk information on which
+ ++               CPUs are delaying the current grace period, but only when
+ ++               the grace period extends for excessive time periods.
+ ++     
+ ++               Say Y if you want RCU to perform such checks.
+ ++     
+ ++               Say N if you are unsure.
+ ++     
         config KPROBES_SANITY_TEST
                 bool "Kprobes sanity tests"
                 depends on DEBUG_KERNEL
@@@@@@@@@ -646,33 -634,6 -624,6 -646,33 -646,33 -646,33 -646,33 -646,33 +656,33 @@@@@@@@@ config BACKTRACE_SELF_TES
         
                   Say N if you are unsure.
         
+ ++     config DEBUG_BLOCK_EXT_DEVT
+ ++             bool "Force extended block device numbers and spread them"
+ ++             depends on DEBUG_KERNEL
+ ++             depends on BLOCK
+ ++             default n
+ ++             help
+ ++               BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
+ ++               SOME DISTRIBUTIONS.  DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
+ ++               YOU ARE DOING.  Distros, please enable this and fix whatever
+ ++               is broken.
+ ++     
+ ++               Conventionally, block device numbers are allocated from
+ ++               predetermined contiguous area.  However, extended block area
+ ++               may introduce non-contiguous block device numbers.  This
+ ++               option forces most block device numbers to be allocated from
+ ++               the extended space and spreads them to discover kernel or
+ ++               userland code paths which assume predetermined contiguous
+ ++               device number allocation.
+ ++     
+ ++               Note that turning on this debug option shuffles all the
+ ++               device numbers for all IDE and SCSI devices including libata
+ ++               ones, so root partition specified using device number
+ ++               directly (via rdev or root=MAJ:MIN) won't work anymore.
+ ++               Textual device names (root=/dev/sdXn) will continue to work.
+ ++     
+ ++               Say N if you are unsure.
+ ++     
         config LKDTM
                 tristate "Linux Kernel Dump Test Tool Module"
                 depends on DEBUG_KERNEL
@@@@@@@@@ -710,21 -671,10 -661,10 -710,21 -710,21 -710,21 -710,21 -710,21 +720,21 @@@@@@@@@ config FAIL_PAGE_ALLO
         
         config FAIL_MAKE_REQUEST
                 bool "Fault-injection capability for disk IO"
- --             depends on FAULT_INJECTION
+ ++             depends on FAULT_INJECTION && BLOCK
                 help
                   Provide fault-injection capability for disk IO.
         
+ ++     config FAIL_IO_TIMEOUT
+ ++             bool "Faul-injection capability for faking disk interrupts"
+ ++             depends on FAULT_INJECTION && BLOCK
+ ++             help
+ ++               Provide fault-injection capability on end IO handling. This
+ ++               will make the block layer "forget" an interrupt as configured,
+ ++               thus exercising the error handling.
+ ++     
+ ++               Only works with drivers that use the generic timeout handling,
+ ++               for others it wont do anything.
+ ++     
         config FAULT_INJECTION_DEBUG_FS
                 bool "Debugfs entries for fault-injection capabilities"
                 depends on FAULT_INJECTION && SYSFS && DEBUG_FS
@@@@@@@@@ -812,61 -762,6 -752,6 -812,61 -812,61 -812,61 -812,61 -812,61 +822,61 @@@@@@@@@ menuconfig BUILD_DOCSR
         
                   Say N if you are unsure.
         
+ ++     config DYNAMIC_PRINTK_DEBUG
+ ++             bool "Enable dynamic printk() call support"
+ ++             default n
+ ++             depends on PRINTK
+ ++             select PRINTK_DEBUG
+ ++             help
+ ++     
+ ++               Compiles debug level messages into the kernel, which would not
+ ++               otherwise be available at runtime. These messages can then be
+ ++               enabled/disabled on a per module basis. This mechanism implicitly
+ ++               enables all pr_debug() and dev_dbg() calls. The impact of this
+ ++               compile option is a larger kernel text size of about 2%.
+ ++     
+ ++               Usage:
+ ++     
+ ++               Dynamic debugging is controlled by the debugfs file,
+ ++               dynamic_printk/modules. This file contains a list of the modules that
+ ++               can be enabled. The format of the file is the module name, followed
+ ++               by a set of flags that can be enabled. The first flag is always the
+ ++               'enabled' flag. For example:
+ ++     
+ ++                     <module_name> <enabled=0/1>
+ ++                                     .
+ ++                                     .
+ ++                                     .
+ ++     
+ ++               <module_name> : Name of the module in which the debug call resides
+ ++               <enabled=0/1> : whether the messages are enabled or not
+ ++     
+ ++               From a live system:
+ ++     
+ ++                     snd_hda_intel enabled=0
+ ++                     fixup enabled=0
+ ++                     driver enabled=0
+ ++     
+ ++               Enable a module:
+ ++     
+ ++                     $echo "set enabled=1 <module_name>" > dynamic_printk/modules
+ ++     
+ ++               Disable a module:
+ ++     
+ ++                     $echo "set enabled=0 <module_name>" > dynamic_printk/modules
+ ++     
+ ++               Enable all modules:
+ ++     
+ ++                     $echo "set enabled=1 all" > dynamic_printk/modules
+ ++     
+ ++               Disable all modules:
+ ++     
+ ++                     $echo "set enabled=0 all" > dynamic_printk/modules
+ ++     
+ ++               Finally, passing "dynamic_printk" at the command line enables
+ ++               debugging for all modules. This mode can be turned off via the above
+ ++               disable command.
+ ++     
         source "samples/Kconfig"
         
         source "lib/Kconfig.kgdb"
author	Ingo Molnar <mingo@elte.hu>
	Mon, 24 Nov 2008 16:44:55 +0000 (17:44 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Mon, 24 Nov 2008 16:44:55 +0000 (17:44 +0100)
		1	2	3	4	5	6	7	8
arch/x86/include/asm/uaccess_64.h	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	diff8 \|	blob \| history
include/linux/kernel.h	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	diff8 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	diff8 \|	blob \| history
kernel/futex.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	diff8 \|	blob \| history
kernel/lockdep.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	diff8 \|	blob \| history
kernel/notifier.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	diff8 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	diff8 \|	blob \| history
kernel/softlockup.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	diff8 \|	blob \| history
lib/Kconfig.debug	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	diff8 \|	blob \| history