]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branches 'core/debug', 'core/futexes', 'core/locking', 'core/rcu', 'core/signal...
authorIngo Molnar <mingo@elte.hu>
Mon, 24 Nov 2008 16:44:55 +0000 (17:44 +0100)
committerIngo Molnar <mingo@elte.hu>
Mon, 24 Nov 2008 16:44:55 +0000 (17:44 +0100)
1  2  3  4  5  6  7  8 
arch/x86/include/asm/uaccess_64.h
include/linux/kernel.h
kernel/exit.c
kernel/futex.c
kernel/lockdep.c
kernel/notifier.c
kernel/sched.c
kernel/softlockup.c
lib/Kconfig.debug

index f8cfd00db450f2e0f948ce7128a2d44867aebf59,515d4dce96b598bc6e9d07dba21332a44924948c,515d4dce96b598bc6e9d07dba21332a44924948c,543ba883cc66200ff0e2206aec36b434b63d3695,664f15280f14354dc057e1d97954db6baab4b959,664f15280f14354dc057e1d97954db6baab4b959,f8cfd00db450f2e0f948ce7128a2d44867aebf59,c96c1f5d07a2c88e4f5ca547ea0f7154ac68583c..84210c479fca83524c6cef4c6bc069bcff76e272
@@@@@@@@@ -1,5 -1,5 -1,5 -1,5 -1,5 -1,5 -1,5 -1,5 +1,5 @@@@@@@@@
 --     #ifndef __X86_64_UACCESS_H
 --     #define __X86_64_UACCESS_H
       -#ifndef ASM_X86__UACCESS_64_H
       -#define ASM_X86__UACCESS_64_H
 ++    +#ifndef _ASM_X86_UACCESS_64_H
 ++    +#define _ASM_X86_UACCESS_64_H
        
        /*
         * User space memory access functions
@@@@@@@@@ -7,7 -7,6 -7,6 -7,7 -7,7 -7,7 -7,7 -7,7 +7,7 @@@@@@@@@
        #include <linux/compiler.h>
        #include <linux/errno.h>
        #include <linux/prefetch.h>
 ++     #include <linux/lockdep.h>
        #include <asm/page.h>
        
        /*
@@@@@@@@@ -29,6 -28,6 -28,6 -29,8 -29,6 -29,6 -29,6 -29,6 +29,8 @@@@@@@@@ static __always_inline __must_chec
        int __copy_from_user(void *dst, const void __user *src, unsigned size)
        {
                int ret = 0;
+++ ++++
+++ ++++        might_fault();
                if (!__builtin_constant_p(size))
                        return copy_user_generic(dst, (__force void *)src, size);
                switch (size) {
                        return ret;
                case 10:
                        __get_user_asm(*(u64 *)dst, (u64 __user *)src,
 ----- -                               ret, "q", "", "=r", 16);
 +++++ +                               ret, "q", "", "=r", 10);
                        if (unlikely(ret))
                                return ret;
                        __get_user_asm(*(u16 *)(8 + (char *)dst),
@@@@@@@@@ -71,6 -70,6 -70,6 -73,8 -71,6 -71,6 -71,6 -71,6 +73,8 @@@@@@@@@ static __always_inline __must_chec
        int __copy_to_user(void __user *dst, const void *src, unsigned size)
        {
                int ret = 0;
+++ ++++
+++ ++++        might_fault();
                if (!__builtin_constant_p(size))
                        return copy_user_generic((__force void *)dst, src, size);
                switch (size) {
@@@@@@@@@ -113,6 -112,6 -112,6 -117,8 -113,6 -113,6 -113,6 -113,6 +117,8 @@@@@@@@@ static __always_inline __must_chec
        int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
        {
                int ret = 0;
+++ ++++
+++ ++++        might_fault();
                if (!__builtin_constant_p(size))
                        return copy_user_generic((__force void *)dst,
                                                 (__force void *)src, size);
@@@@@@@@@ -199,4 -198,4 -198,4 -205,4 -199,4 -199,4 -199,4 -199,4 +205,4 @@@@@@@@@ static inline int __copy_from_user_inat
        unsigned long
        copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
        
 --     #endif /* __X86_64_UACCESS_H */
       -#endif /* ASM_X86__UACCESS_64_H */
 ++    +#endif /* _ASM_X86_UACCESS_64_H */
diff --combined include/linux/kernel.h
index dc7e0d0a6474448aba71b4c32d2045afc44e240e,3f30557be2a3f2e209a2abd60c21ddef9086fc96,2651f805ba6d771b9ec1f26078609aebdb198853,69a9bfdf9c86d9de03919277d108fe242a4d1e4f,fba141d3ca0783303c661f39fb2c503ba418dc56,fba141d3ca0783303c661f39fb2c503ba418dc56,dc7e0d0a6474448aba71b4c32d2045afc44e240e,94d17ff64c5a3b48c0f6716d29b9832d6a896551..269df5a17b30af1b7349c131da05abec8aa95046
        #include <linux/log2.h>
        #include <linux/typecheck.h>
        #include <linux/ratelimit.h>
 ++     #include <linux/dynamic_printk.h>
        #include <asm/byteorder.h>
        #include <asm/bug.h>
        
@@@@@@@@@ -116,8 -115,6 -115,6 -116,8 -116,8 -116,8 -116,8 -116,6 +116,8 @@@@@@@@@ extern int _cond_resched(void)
        # define might_resched() do { } while (0)
        #endif
        
 ++    +#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 ++    +  void __might_sleep(char *file, int line);
        /**
         * might_sleep - annotation for functions that can sleep
         *
         * be bitten later when the calling function happens to sleep when it is not
         * supposed to.
         */
 --    -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 --    -  void __might_sleep(char *file, int line);
        # define might_sleep() \
                do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
        #else
                        (__x < 0) ? -__x : __x;         \
                })
        
+++ ++++#ifdef CONFIG_PROVE_LOCKING
+++ ++++void might_fault(void);
+++ ++++#else
+++ ++++static inline void might_fault(void)
+++ ++++{
+++ ++++        might_sleep();
+++ ++++}
+++ ++++#endif
+++ ++++
        extern struct atomic_notifier_head panic_notifier_list;
        extern long (*panic_blink)(long time);
        NORET_TYPE void panic(const char * fmt, ...)
@@@@@@@@@ -183,38 -182,16 -182,14 -192,38 -183,38 -183,38 -183,38 -183,14 +192,40 @@@@@@@@@ extern int vsscanf(const char *, const 
        
        extern int get_option(char **str, int *pint);
        extern char *get_options(const char *str, int nints, int *ints);
 --     extern unsigned long long memparse(char *ptr, char **retptr);
 ++     extern unsigned long long memparse(const char *ptr, char **retptr);
        
        extern int core_kernel_text(unsigned long addr);
        extern int __kernel_text_address(unsigned long addr);
        extern int kernel_text_address(unsigned long addr);
+ ++++++extern int func_ptr_is_kernel_text(void *ptr);
+ ++++++
        struct pid;
        extern struct pid *session_of_pgrp(struct pid *pgrp);
        
 ++    +/*
 ++    + * FW_BUG
 ++    + * Add this to a message where you are sure the firmware is buggy or behaves
 ++    + * really stupid or out of spec. Be aware that the responsible BIOS developer
 ++    + * should be able to fix this issue or at least get a concrete idea of the
 ++    + * problem by reading your message without the need of looking at the kernel
 ++    + * code.
 ++    + * 
 ++    + * Use it for definite and high priority BIOS bugs.
 ++    + *
 ++    + * FW_WARN
 ++    + * Use it for not that clear (e.g. could the kernel messed up things already?)
 ++    + * and medium priority BIOS bugs.
 ++    + *
 ++    + * FW_INFO
 ++    + * Use this one if you want to tell the user or vendor about something
 ++    + * suspicious, but generally harmless related to the firmware.
 ++    + *
 ++    + * Use it for information or very low priority BIOS bugs.
 ++    + */
 ++    +#define FW_BUG          "[Firmware Bug]: "
 ++    +#define FW_WARN         "[Firmware Warn]: "
 ++    +#define FW_INFO         "[Firmware Info]: "
 ++    +
        #ifdef CONFIG_PRINTK
        asmlinkage int vprintk(const char *fmt, va_list args)
                __attribute__ ((format (printf, 1, 0)));
@@@@@@@@@ -238,9 -215,6 -213,6 -247,9 -238,9 -238,9 -238,9 -214,9 +249,9 @@@@@@@@@ static inline bool printk_timed_ratelim
                        { return false; }
        #endif
        
 ++     extern int printk_needs_cpu(int cpu);
 ++     extern void printk_tick(void);
 ++     
        extern void asmlinkage __attribute__((format(printf, 1, 2)))
                early_printk(const char *fmt, ...);
        
@@@@@@@@@ -263,10 -237,9 -235,9 -272,10 -263,10 -263,10 -263,10 -239,10 +274,10 @@@@@@@@@ extern int oops_in_progress;          /* If set
        extern int panic_timeout;
        extern int panic_on_oops;
        extern int panic_on_unrecovered_nmi;
 --     extern int tainted;
        extern const char *print_tainted(void);
 --     extern void add_taint(unsigned);
 ++     extern void add_taint(unsigned flag);
 ++     extern int test_taint(unsigned flag);
 ++     extern unsigned long get_taint(void);
        extern int root_mountflags;
        
        /* Values used for system_state */
@@@@@@@@@ -279,17 -252,16 -250,16 -288,17 -279,17 -279,17 -279,17 -255,17 +290,17 @@@@@@@@@ extern enum system_states 
                SYSTEM_SUSPEND_DISK,
        } system_state;
        
 --     #define TAINT_PROPRIETARY_MODULE        (1<<0)
 --     #define TAINT_FORCED_MODULE             (1<<1)
 --     #define TAINT_UNSAFE_SMP                (1<<2)
 --     #define TAINT_FORCED_RMMOD              (1<<3)
 --     #define TAINT_MACHINE_CHECK             (1<<4)
 --     #define TAINT_BAD_PAGE                  (1<<5)
 --     #define TAINT_USER                      (1<<6)
 --     #define TAINT_DIE                       (1<<7)
 --     #define TAINT_OVERRIDDEN_ACPI_TABLE     (1<<8)
 --     #define TAINT_WARN                      (1<<9)
 ++     #define TAINT_PROPRIETARY_MODULE        0
 ++     #define TAINT_FORCED_MODULE             1
 ++     #define TAINT_UNSAFE_SMP                2
 ++     #define TAINT_FORCED_RMMOD              3
 ++     #define TAINT_MACHINE_CHECK             4
 ++     #define TAINT_BAD_PAGE                  5
 ++     #define TAINT_USER                      6
 ++     #define TAINT_DIE                       7
 ++     #define TAINT_OVERRIDDEN_ACPI_TABLE     8
 ++     #define TAINT_WARN                      9
 ++     #define TAINT_CRAP                      10
        
        extern void dump_stack(void) __cold;
        
@@@@@@@@@ -318,36 -290,28 -288,28 -327,32 -318,32 -318,32 -318,36 -294,32 +329,36 @@@@@@@@@ static inline char *pack_hex_byte(char 
                return buf;
        }
        
 ----- -#define pr_emerg(fmt, arg...) \
 ----- -        printk(KERN_EMERG fmt, ##arg)
 ----- -#define pr_alert(fmt, arg...) \
 ----- -        printk(KERN_ALERT fmt, ##arg)
 ----- -#define pr_crit(fmt, arg...) \
 ----- -        printk(KERN_CRIT fmt, ##arg)
 ----- -#define pr_err(fmt, arg...) \
 ----- -        printk(KERN_ERR fmt, ##arg)
 ----- -#define pr_warning(fmt, arg...) \
 ----- -        printk(KERN_WARNING fmt, ##arg)
 ----- -#define pr_notice(fmt, arg...) \
 ----- -        printk(KERN_NOTICE fmt, ##arg)
 ----- -#define pr_info(fmt, arg...) \
 ----- -        printk(KERN_INFO fmt, ##arg)
 --     
 --     #ifdef DEBUG
 +++++ +#ifndef pr_fmt
 +++++ +#define pr_fmt(fmt) fmt
 +++++ +#endif
 +++++ +
 +++++ +#define pr_emerg(fmt, ...) \
 +++++ +        printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_alert(fmt, ...) \
 +++++ +        printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_crit(fmt, ...) \
 +++++ +        printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_err(fmt, ...) \
 +++++ +        printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_warning(fmt, ...) \
 +++++ +        printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_notice(fmt, ...) \
 +++++ +        printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_info(fmt, ...) \
 +++++ +        printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 ++     
        /* If you are writing a driver, please use dev_dbg instead */
 --     #define pr_debug(fmt, arg...) \
 --             printk(KERN_DEBUG fmt, ##arg)
 ++     #if defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
 ++     #define pr_debug(fmt, ...) do { \
   --- -        dynamic_pr_debug(fmt, ##__VA_ARGS__); \
 +++++ +        dynamic_pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
 ++             } while (0)
 ++     #elif defined(DEBUG)
   --- -#define pr_debug(fmt, arg...) \
   --- -        printk(KERN_DEBUG fmt, ##arg)
 +++++ +#define pr_debug(fmt, ...) \
 +++++ +        printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
        #else
 ----- -#define pr_debug(fmt, arg...) \
 ----- -        ({ if (0) printk(KERN_DEBUG fmt, ##arg); 0; })
 +++++ +#define pr_debug(fmt, ...) \
 +++++ +        ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
        #endif
        
        /*
@@@@@@@@@ -524,9 -488,4 -486,4 -529,9 -520,9 -520,9 -524,9 -496,9 +535,9 @@@@@@@@@ struct sysinfo 
        #define NUMA_BUILD 0
        #endif
        
 ++     /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
 ++     #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 ++     # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
 ++     #endif
 ++     
        #endif
diff --combined kernel/exit.c
index 2d8be7ebb0f73499f894a1828fd827f0217290f1,16395644a98ff8c060b2f5fd776fe7abadd61c6a,85a83c831856c193570e40a3b7d3e03ef8862d1c,ae2b92be5faec1efa73beefb63304a22d030fc16,80137a5d9467811ba4dab35c6e95790002a5f12a,b9c4d8bb72e5aedb49b9aab8ff96dd7ff8380f9e,2d8be7ebb0f73499f894a1828fd827f0217290f1,80137a5d9467811ba4dab35c6e95790002a5f12a..30fcdf16737a2bb013a78b62bc6b7f1eb6eb165b
        #include <linux/cn_proc.h>
        #include <linux/mutex.h>
        #include <linux/futex.h>
 ----- -#include <linux/compat.h>
        #include <linux/pipe_fs_i.h>
        #include <linux/audit.h> /* for audit_free() */
        #include <linux/resource.h>
        #include <linux/blkdev.h>
        #include <linux/task_io_accounting_ops.h>
        #include <linux/tracehook.h>
 ++     #include <trace/sched.h>
        
        #include <asm/uaccess.h>
        #include <asm/unistd.h>
@@@@@@@@@ -112,6 -112,8 -112,8 -113,6 -113,6 -113,6 -112,6 -113,6 +112,6 @@@@@@@@@ static void __exit_signal(struct task_s
                         * We won't ever get here for the group leader, since it
                         * will have been the last reference on the signal_struct.
                         */
 --                     sig->utime = cputime_add(sig->utime, task_utime(tsk));
 --                     sig->stime = cputime_add(sig->stime, task_stime(tsk));
                        sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
                        sig->min_flt += tsk->min_flt;
                        sig->maj_flt += tsk->maj_flt;
                        sig->inblock += task_io_get_inblock(tsk);
                        sig->oublock += task_io_get_oublock(tsk);
                        task_io_accounting_add(&sig->ioac, &tsk->ioac);
 --                     sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
                        sig = NULL; /* Marker for below. */
                }
        
                if (sig) {
                        flush_sigqueue(&sig->shared_pending);
                        taskstats_tgid_free(sig);
 ++ +  +                /*
 ++ +  +                 * Make sure ->signal can't go away under rq->lock,
 ++ +  +                 * see account_group_exec_runtime().
 ++ +  +                 */
 ++ +  +                task_rq_unlock_wait(tsk);
                        __cleanup_signal(sig);
                }
        }
        
        static void delayed_put_task_struct(struct rcu_head *rhp)
        {
 --             put_task_struct(container_of(rhp, struct task_struct, rcu));
 ++             struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 ++     
 ++             trace_sched_process_free(tsk);
 ++             put_task_struct(tsk);
        }
        
        
@@@@@@@@@ -588,6 -583,8 -583,6 -589,6 -584,6 -589,6 -588,6 -584,6 +588,6 @@@@@@@@@ mm_need_new_owner(struct mm_struct *mm
                 * If there are other users of the mm and the owner (us) is exiting
                 * we need to find a new owner to take on the responsibility.
                 */
 -              if (!mm)
 -                      return 0;
                if (atomic_read(&mm->mm_users) <= 1)
                        return 0;
                if (mm->owner != p)
                } while_each_thread(g, c);
        
                read_unlock(&tasklist_lock);
 +              /*
 +               * We found no owner yet mm_users > 1: this implies that we are
 +               * most likely racing with swapoff (try_to_unuse()) or /proc or
 +               * ptrace or page migration (get_task_mm()).  Mark owner as NULL,
 +               * so that subsystems can understand the callback and take action.
 +               */
 +              down_write(&mm->mmap_sem);
 +              cgroup_mm_owner_callbacks(mm->owner, NULL);
 +              mm->owner = NULL;
 +              up_write(&mm->mmap_sem);
                return;
        
        assign_new_owner:
                BUG_ON(c == p);
                get_task_struct(c);
 ++             read_unlock(&tasklist_lock);
 ++             down_write(&mm->mmap_sem);
                /*
                 * The task_lock protects c->mm from changing.
                 * We always want mm->owner->mm == mm
                 */
                task_lock(c);
 --             /*
 --              * Delay read_unlock() till we have the task_lock()
 --              * to ensure that c does not slip away underneath us
 --              */
 --             read_unlock(&tasklist_lock);
                if (c->mm != mm) {
                        task_unlock(c);
 ++                     up_write(&mm->mmap_sem);
                        put_task_struct(c);
                        goto retry;
                }
                cgroup_mm_owner_callbacks(mm->owner, c);
                mm->owner = c;
                task_unlock(c);
 ++             up_write(&mm->mmap_sem);
                put_task_struct(c);
        }
        #endif /* CONFIG_MM_OWNER */
@@@@@@@@@ -1058,6 -1046,14 -1054,14 -1059,14 -1054,14 -1059,14 -1058,6 -1054,14 +1058,6 @@@@@@@@@ NORET_TYPE void do_exit(long code
                        exit_itimers(tsk->signal);
                }
                acct_collect(code, group_dead);
 ----- -#ifdef CONFIG_FUTEX
 ----- -        if (unlikely(tsk->robust_list))
 ----- -                exit_robust_list(tsk);
 ----- -#ifdef CONFIG_COMPAT
 ----- -        if (unlikely(tsk->compat_robust_list))
 ----- -                compat_exit_robust_list(tsk);
 ----- -#endif
 ----- -#endif
                if (group_dead)
                        tty_audit_exit();
                if (unlikely(tsk->audit_context))
        
                if (group_dead)
                        acct_process();
 ++             trace_sched_process_exit(tsk);
 ++     
                exit_sem(tsk);
                exit_files(tsk);
                exit_fs(tsk);
@@@@@@@@@ -1300,7 -1294,6 -1302,6 -1309,7 -1304,7 -1309,7 -1300,7 -1304,7 +1300,7 @@@@@@@@@ static int wait_task_zombie(struct task
                if (likely(!traced)) {
                        struct signal_struct *psig;
                        struct signal_struct *sig;
 ++                     struct task_cputime cputime;
        
                        /*
                         * The resource counters for the group leader are in its
                         * need to protect the access to p->parent->signal fields,
                         * as other threads in the parent group can be right
                         * here reaping other children at the same time.
 ++                      *
 ++                      * We use thread_group_cputime() to get times for the thread
 ++                      * group, which consolidates times for all threads in the
 ++                      * group including the group leader.
                         */
+++++ ++                thread_group_cputime(p, &cputime);
                        spin_lock_irq(&p->parent->sighand->siglock);
                        psig = p->parent->signal;
                        sig = p->signal;
-  -- --                thread_group_cputime(p, &cputime);
                        psig->cutime =
                                cputime_add(psig->cutime,
 --                             cputime_add(p->utime,
 --                             cputime_add(sig->utime,
 --                                         sig->cutime)));
 ++                             cputime_add(cputime.utime,
 ++                                         sig->cutime));
                        psig->cstime =
                                cputime_add(psig->cstime,
 --                             cputime_add(p->stime,
 --                             cputime_add(sig->stime,
 --                                         sig->cstime)));
 ++                             cputime_add(cputime.stime,
 ++                                         sig->cstime));
                        psig->cgtime =
                                cputime_add(psig->cgtime,
                                cputime_add(p->gtime,
@@@@@@@@@ -1677,8 -1667,6 -1675,6 -1686,8 -1681,8 -1686,8 -1677,8 -1681,8 +1677,8 @@@@@@@@@ static long do_wait(enum pid_type type
                struct task_struct *tsk;
                int retval;
        
 ++             trace_sched_process_wait(pid);
 ++     
                add_wait_queue(&current->signal->wait_chldexit,&wait);
        repeat:
                /*
diff --combined kernel/futex.c
index 8af10027514bb1cc9cb2702051330e52bf43a533,7d1136e97c142d198b897dab1846acd99f1f655f,62cbd648e28a663fc5e165adf177d227bed38a33,8af10027514bb1cc9cb2702051330e52bf43a533,8af10027514bb1cc9cb2702051330e52bf43a533,8af10027514bb1cc9cb2702051330e52bf43a533,8af10027514bb1cc9cb2702051330e52bf43a533,7d1136e97c142d198b897dab1846acd99f1f655f..e10c5c8786a614619c943f5102189fdb428c5ac3
@@@@@@@@@ -122,24 -122,24 -122,6 -122,24 -122,24 -122,24 -122,24 -122,24 +122,6 @@@@@@@@@ struct futex_hash_bucket 
        
        static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
        
-- -----/*
-- ----- * Take mm->mmap_sem, when futex is shared
-- ----- */
-- -----static inline void futex_lock_mm(struct rw_semaphore *fshared)
-- -----{
-- -----        if (fshared)
-- -----                down_read(fshared);
-- -----}
-- -----
-- -----/*
-- ----- * Release mm->mmap_sem, when the futex is shared
-- ----- */
-- -----static inline void futex_unlock_mm(struct rw_semaphore *fshared)
-- -----{
-- -----        if (fshared)
-- -----                up_read(fshared);
-- -----}
-- -----
        /*
         * We hash on the keys returned from get_futex_key (see below).
         */
@@@@@@@@@ -161,6 -161,6 -143,45 -161,6 -161,6 -161,6 -161,6 -161,6 +143,45 @@@@@@@@@ static inline int match_futex(union fut
                        && key1->both.offset == key2->both.offset);
        }
        
++ +++++/*
++ +++++ * Take a reference to the resource addressed by a key.
++ +++++ * Can be called while holding spinlocks.
++ +++++ *
++ +++++ */
++ +++++static void get_futex_key_refs(union futex_key *key)
++ +++++{
++ +++++        if (!key->both.ptr)
++ +++++                return;
++ +++++
++ +++++        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
++ +++++        case FUT_OFF_INODE:
++ +++++                atomic_inc(&key->shared.inode->i_count);
++ +++++                break;
++ +++++        case FUT_OFF_MMSHARED:
++ +++++                atomic_inc(&key->private.mm->mm_count);
++ +++++                break;
++ +++++        }
++ +++++}
++ +++++
++ +++++/*
++ +++++ * Drop a reference to the resource addressed by a key.
++ +++++ * The hash bucket spinlock must not be held.
++ +++++ */
++ +++++static void drop_futex_key_refs(union futex_key *key)
++ +++++{
++ +++++        if (!key->both.ptr)
++ +++++                return;
++ +++++
++ +++++        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
++ +++++        case FUT_OFF_INODE:
++ +++++                iput(key->shared.inode);
++ +++++                break;
++ +++++        case FUT_OFF_MMSHARED:
++ +++++                mmdrop(key->private.mm);
++ +++++                break;
++ +++++        }
++ +++++}
++ +++++
        /**
         * get_futex_key - Get parameters which are the keys for a futex.
         * @uaddr: virtual address of the futex
         * For other futexes, it points to &current->mm->mmap_sem and
         * caller must have taken the reader lock. but NOT any spinlocks.
         */
-- -----static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
-- -----                         union futex_key *key)
++ +++++static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
        {
                unsigned long address = (unsigned long)uaddr;
                struct mm_struct *mm = current->mm;
-- -----        struct vm_area_struct *vma;
                struct page *page;
                int err;
        
                                return -EFAULT;
                        key->private.mm = mm;
                        key->private.address = address;
++ +++++                get_futex_key_refs(key);
                        return 0;
                }
-- -----        /*
-- -----         * The futex is hashed differently depending on whether
-- -----         * it's in a shared or private mapping.  So check vma first.
-- -----         */
-- -----        vma = find_extend_vma(mm, address);
-- -----        if (unlikely(!vma))
-- -----                return -EFAULT;
        
-- -----        /*
-- -----         * Permissions.
-- -----         */
-- -----        if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
-- -----                return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
++ +++++again:
++ +++++        err = get_user_pages_fast(address, 1, 0, &page);
++ +++++        if (err < 0)
++ +++++                return err;
++ +++++
++ +++++        lock_page(page);
++ +++++        if (!page->mapping) {
++ +++++                unlock_page(page);
++ +++++                put_page(page);
++ +++++                goto again;
++ +++++        }
        
                /*
                 * Private mappings are handled in a simple way.
                 *
                 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
                 * it's a read-only handle, it's expected that futexes attach to
-- -----         * the object not the particular process.  Therefore we use
-- -----         * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
-- -----         * mappings of _writable_ handles.
++ +++++         * the object not the particular process.
                 */
-- -----        if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
-- -----                key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
++ +++++        if (PageAnon(page)) {
++ +++++                key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                        key->private.mm = mm;
                        key->private.address = address;
 -     -                return 0;
 -     -        }
 -     -
 -     -        /*
 -     -         * Linear file mappings are also simple.
 -     -         */
 -     -        key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
 -     -        key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
 -     -        if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
 -     -                key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
 -     -                                     + vma->vm_pgoff);
-- -----                return 0;
++ +++++        } else {
++ +++++                key->both.offset |= FUT_OFF_INODE; /* inode-based key */
++ +++++                key->shared.inode = page->mapping->host;
++ +++++                key->shared.pgoff = page->index;
                }
        
-- -----        /*
-  ----          * Linear file mappings are also simple.
 -     -         * We could walk the page table to read the non-linear
 -     -         * pte, and get the page index without fetching the page
 -     -         * from swap.  But that's a lot of code to duplicate here
 -     -         * for a rare case, so we simply fetch the page.
-- -----         */
-  ----         key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
-  ----         key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
-  ----         if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
-  ----                 key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
-  ----                                      + vma->vm_pgoff);
 -     -        err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
 -     -        if (err >= 0) {
 -     -                key->shared.pgoff =
 -     -                        page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 -     -                put_page(page);
-- -----                return 0;
-- -----        }
 -     -        return err;
 -     -}
++ +++++        get_futex_key_refs(key);
        
-  ----         /*
-  ----          * We could walk the page table to read the non-linear
-  ----          * pte, and get the page index without fetching the page
-  ----          * from swap.  But that's a lot of code to duplicate here
-  ----          * for a rare case, so we simply fetch the page.
-  ----          */
-  ----         err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
-  ----         if (err >= 0) {
-  ----                 key->shared.pgoff =
-  ----                         page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-  ----                 put_page(page);
-  ----                 return 0;
-  ----         }
-  ----         return err;
-  ---- }
-  ---- 
-- -----/*
-- ----- * Take a reference to the resource addressed by a key.
-- ----- * Can be called while holding spinlocks.
-- ----- *
-- ----- */
-- -----static void get_futex_key_refs(union futex_key *key)
-- -----{
-- -----        if (key->both.ptr == NULL)
-- -----                return;
-- -----        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-- -----                case FUT_OFF_INODE:
-- -----                        atomic_inc(&key->shared.inode->i_count);
-- -----                        break;
-- -----                case FUT_OFF_MMSHARED:
-- -----                        atomic_inc(&key->private.mm->mm_count);
-- -----                        break;
-- -----        }
++ +++++        unlock_page(page);
++ +++++        put_page(page);
++ +++++        return 0;
        }
        
-- -----/*
-- ----- * Drop a reference to the resource addressed by a key.
-- ----- * The hash bucket spinlock must not be held.
-- ----- */
-- -----static void drop_futex_key_refs(union futex_key *key)
++ +++++static inline
++ +++++void put_futex_key(int fshared, union futex_key *key)
        {
-- -----        if (!key->both.ptr)
-- -----                return;
-- -----        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-- -----                case FUT_OFF_INODE:
-- -----                        iput(key->shared.inode);
-- -----                        break;
-- -----                case FUT_OFF_MMSHARED:
-- -----                        mmdrop(key->private.mm);
-- -----                        break;
-- -----        }
++ +++++        drop_futex_key_refs(key);
        }
        
        static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@@@@@@@@ -328,10 -328,10 -297,8 -328,10 -328,10 -328,10 -328,10 -328,10 +297,8 @@@@@@@@@ static int get_futex_value_locked(u32 *
        
        /*
         * Fault handling.
-- ----- * if fshared is non NULL, current->mm->mmap_sem is already held
         */
-- -----static int futex_handle_fault(unsigned long address,
-- -----                              struct rw_semaphore *fshared, int attempt)
++ +++++static int futex_handle_fault(unsigned long address, int attempt)
        {
                struct vm_area_struct * vma;
                struct mm_struct *mm = current->mm;
                if (attempt > 2)
                        return ret;
        
-- -----        if (!fshared)
-- -----                down_read(&mm->mmap_sem);
++ +++++        down_read(&mm->mmap_sem);
                vma = find_vma(mm, address);
                if (vma && address >= vma->vm_start &&
                    (vma->vm_flags & VM_WRITE)) {
                                        current->min_flt++;
                        }
                }
-- -----        if (!fshared)
-- -----                up_read(&mm->mmap_sem);
++ +++++        up_read(&mm->mmap_sem);
                return ret;
        }
        
@@@@@@@@@ -385,6 -385,6 -350,7 -385,6 -385,6 -385,6 -385,6 -385,6 +350,7 @@@@@@@@@ static int refill_pi_state_cache(void
                /* pi_mutex gets initialized later */
                pi_state->owner = NULL;
                atomic_set(&pi_state->refcount, 1);
++ +++++        pi_state->key = FUTEX_KEY_INIT;
        
                current->pi_state_cache = pi_state;
        
@@@@@@@@@ -462,7 -462,7 -428,7 -462,7 -462,7 -462,7 -462,7 -462,7 +428,7 @@@@@@@@@ void exit_pi_state_list(struct task_str
                struct list_head *next, *head = &curr->pi_state_list;
                struct futex_pi_state *pi_state;
                struct futex_hash_bucket *hb;
-- -----        union futex_key key;
++ +++++        union futex_key key = FUTEX_KEY_INIT;
        
                if (!futex_cmpxchg_enabled)
                        return;
@@@@@@@@@ -719,20 -719,20 -685,17 -719,20 -719,20 -719,20 -719,20 -719,20 +685,17 @@@@@@@@@ double_lock_hb(struct futex_hash_bucke
         * Wake up all waiters hashed on the physical page that is mapped
         * to this virtual address:
         */
-- -----static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
-- -----                      int nr_wake, u32 bitset)
++ +++++static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
        {
                struct futex_hash_bucket *hb;
                struct futex_q *this, *next;
                struct plist_head *head;
-- -----        union futex_key key;
++ +++++        union futex_key key = FUTEX_KEY_INIT;
                int ret;
        
                if (!bitset)
                        return -EINVAL;
        
-- -----        futex_lock_mm(fshared);
-- -----
                ret = get_futex_key(uaddr, fshared, &key);
                if (unlikely(ret != 0))
                        goto out;
        
                spin_unlock(&hb->lock);
        out:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &key);
                return ret;
        }
        
         * to this virtual address:
         */
        static int
-- -----futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
-- -----              u32 __user *uaddr2,
++ +++++futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
                      int nr_wake, int nr_wake2, int op)
        {
-- -----        union futex_key key1, key2;
++ +++++        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
                struct futex_hash_bucket *hb1, *hb2;
                struct plist_head *head;
                struct futex_q *this, *next;
                int ret, op_ret, attempt = 0;
        
        retryfull:
-- -----        futex_lock_mm(fshared);
-- -----
                ret = get_futex_key(uaddr1, fshared, &key1);
                if (unlikely(ret != 0))
                        goto out;
                         */
                        if (attempt++) {
                                ret = futex_handle_fault((unsigned long)uaddr2,
-- -----                                                 fshared, attempt);
++ +++++                                                 attempt);
                                if (ret)
                                        goto out;
                                goto retry;
                        }
        
-- -----                /*
-- -----                 * If we would have faulted, release mmap_sem,
-- -----                 * fault it in and start all over again.
-- -----                 */
-- -----                futex_unlock_mm(fshared);
-- -----
                        ret = get_user(dummy, uaddr2);
                        if (ret)
                                return ret;
                if (hb1 != hb2)
                        spin_unlock(&hb2->lock);
        out:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &key2);
++ +++++        put_futex_key(fshared, &key1);
        
                return ret;
        }
         * Requeue all waiters hashed on one physical page to another
         * physical page.
         */
-- -----static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
-- -----                         u32 __user *uaddr2,
++ +++++static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
                                 int nr_wake, int nr_requeue, u32 *cmpval)
        {
-- -----        union futex_key key1, key2;
++ +++++        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
                struct futex_hash_bucket *hb1, *hb2;
                struct plist_head *head1;
                struct futex_q *this, *next;
                int ret, drop_count = 0;
        
         retry:
-- -----        futex_lock_mm(fshared);
-- -----
                ret = get_futex_key(uaddr1, fshared, &key1);
                if (unlikely(ret != 0))
                        goto out;
                                if (hb1 != hb2)
                                        spin_unlock(&hb2->lock);
        
-- -----                        /*
-- -----                         * If we would have faulted, release mmap_sem, fault
-- -----                         * it in and start all over again.
-- -----                         */
-- -----                        futex_unlock_mm(fshared);
-- -----
                                ret = get_user(curval, uaddr1);
        
                                if (!ret)
@@@@@@@@@ -974,7 -974,7 -920,8 -974,7 -974,7 -974,7 -974,7 -974,7 +920,8 @@@@@@@@@ out_unlock
                        drop_futex_key_refs(&key1);
        
        out:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &key2);
++ +++++        put_futex_key(fshared, &key1);
                return ret;
        }
        
@@@@@@@@@ -1096,8 -1096,8 -1043,7 -1096,8 -1096,8 -1096,8 -1096,8 -1096,8 +1043,7 @@@@@@@@@ static void unqueue_me_pi(struct futex_
         * private futexes.
         */
        static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-- -----                                struct task_struct *newowner,
-- -----                                struct rw_semaphore *fshared)
++ +++++                                struct task_struct *newowner, int fshared)
        {
                u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
                struct futex_pi_state *pi_state = q->pi_state;
        handle_fault:
                spin_unlock(q->lock_ptr);
        
-- -----        ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
++ +++++        ret = futex_handle_fault((unsigned long)uaddr, attempt++);
        
                spin_lock(q->lock_ptr);
        
        
        static long futex_wait_restart(struct restart_block *restart);
        
-- -----static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
++ +++++static int futex_wait(u32 __user *uaddr, int fshared,
                              u32 val, ktime_t *abs_time, u32 bitset)
        {
                struct task_struct *curr = current;
                q.pi_state = NULL;
                q.bitset = bitset;
         retry:
-- -----        futex_lock_mm(fshared);
-- -----
++ +++++        q.key = FUTEX_KEY_INIT;
                ret = get_futex_key(uaddr, fshared, &q.key);
                if (unlikely(ret != 0))
                        goto out_release_sem;
                if (unlikely(ret)) {
                        queue_unlock(&q, hb);
        
-- -----                /*
-- -----                 * If we would have faulted, release mmap_sem, fault it in and
-- -----                 * start all over again.
-- -----                 */
-- -----                futex_unlock_mm(fshared);
-- -----
                        ret = get_user(uval, uaddr);
        
                        if (!ret)
                /* Only actually queue if *uaddr contained val.  */
                queue_me(&q, hb);
        
-- -----        /*
-- -----         * Now the futex is queued and we have checked the data, we
-- -----         * don't want to hold mmap_sem while we sleep.
-- -----         */
-- -----        futex_unlock_mm(fshared);
-- -----
                /*
                 * There might have been scheduling since the queue_me(), as we
                 * cannot hold a spinlock across the get_user() in case it
                        if (!abs_time)
                                schedule();
                        else {
 ++    +                        unsigned long slack;
 ++    +                        slack = current->timer_slack_ns;
 ++    +                        if (rt_task(current))
 ++    +                                slack = 0;
                                hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC,
                                                        HRTIMER_MODE_ABS);
                                hrtimer_init_sleeper(&t, current);
 --    -                        t.timer.expires = *abs_time;
 ++    +                        hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
        
 --    -                        hrtimer_start(&t.timer, t.timer.expires,
 --    -                                                HRTIMER_MODE_ABS);
 ++    +                        hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
                                if (!hrtimer_active(&t.timer))
                                        t.task = NULL;
        
                queue_unlock(&q, hb);
        
         out_release_sem:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &q.key);
                return ret;
        }
        
        static long futex_wait_restart(struct restart_block *restart)
        {
                u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
-- -----        struct rw_semaphore *fshared = NULL;
++ +++++        int fshared = 0;
                ktime_t t;
        
                t.tv64 = restart->futex.time;
                restart->fn = do_no_restart_syscall;
                if (restart->futex.flags & FLAGS_SHARED)
-- -----                fshared = &current->mm->mmap_sem;
++ +++++                fshared = 1;
                return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
                                        restart->futex.bitset);
        }
         * if there are waiters then it will block, it does PI, etc. (Due to
         * races the kernel might see a 0 value of the futex too.)
         */
-- -----static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
++ +++++static int futex_lock_pi(u32 __user *uaddr, int fshared,
                                 int detect, ktime_t *time, int trylock)
        {
                struct hrtimer_sleeper timeout, *to = NULL;
                        hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
                                              HRTIMER_MODE_ABS);
                        hrtimer_init_sleeper(to, current);
 --    -                to->timer.expires = *time;
 ++    +                hrtimer_set_expires(&to->timer, *time);
                }
        
                q.pi_state = NULL;
         retry:
-- -----        futex_lock_mm(fshared);
-- -----
++ +++++        q.key = FUTEX_KEY_INIT;
                ret = get_futex_key(uaddr, fshared, &q.key);
                if (unlikely(ret != 0))
                        goto out_release_sem;
                                 * exit to complete.
                                 */
                                queue_unlock(&q, hb);
-- -----                        futex_unlock_mm(fshared);
                                cond_resched();
                                goto retry;
        
                 */
                queue_me(&q, hb);
        
-- -----        /*
-- -----         * Now the futex is queued and we have checked the data, we
-- -----         * don't want to hold mmap_sem while we sleep.
-- -----         */
-- -----        futex_unlock_mm(fshared);
-- -----
                WARN_ON(!q.pi_state);
                /*
                 * Block on the PI mutex:
                        ret = ret ? 0 : -EWOULDBLOCK;
                }
        
-- -----        futex_lock_mm(fshared);
                spin_lock(q.lock_ptr);
        
                if (!ret) {
        
                /* Unqueue and drop the lock */
                unqueue_me_pi(&q);
-- -----        futex_unlock_mm(fshared);
        
                if (to)
                        destroy_hrtimer_on_stack(&to->timer);
                queue_unlock(&q, hb);
        
         out_release_sem:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &q.key);
                if (to)
                        destroy_hrtimer_on_stack(&to->timer);
                return ret;
                queue_unlock(&q, hb);
        
                if (attempt++) {
-- -----                ret = futex_handle_fault((unsigned long)uaddr, fshared,
-- -----                                         attempt);
++ +++++                ret = futex_handle_fault((unsigned long)uaddr, attempt);
                        if (ret)
                                goto out_release_sem;
                        goto retry_unlocked;
                }
        
-- -----        futex_unlock_mm(fshared);
-- -----
                ret = get_user(uval, uaddr);
                if (!ret && (uval != -EFAULT))
                        goto retry;
         * This is the in-kernel slowpath: we look up the PI state (if any),
         * and do the rt-mutex unlock.
         */
-- -----static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
++ +++++static int futex_unlock_pi(u32 __user *uaddr, int fshared)
        {
                struct futex_hash_bucket *hb;
                struct futex_q *this, *next;
                u32 uval;
                struct plist_head *head;
-- -----        union futex_key key;
++ +++++        union futex_key key = FUTEX_KEY_INIT;
                int ret, attempt = 0;
        
        retry:
                 */
                if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
                        return -EPERM;
-- -----        /*
-- -----         * First take all the futex related locks:
-- -----         */
-- -----        futex_lock_mm(fshared);
        
                ret = get_futex_key(uaddr, fshared, &key);
                if (unlikely(ret != 0))
@@@@@@@@@ -1747,7 -1744,7 -1660,7 -1747,7 -1747,7 -1747,7 -1747,7 -1744,7 +1663,7 @@@@@@@@@ retry_unlocked
        out_unlock:
                spin_unlock(&hb->lock);
        out:
-- -----        futex_unlock_mm(fshared);
++ +++++        put_futex_key(fshared, &key);
        
                return ret;
        
                spin_unlock(&hb->lock);
        
                if (attempt++) {
-- -----                ret = futex_handle_fault((unsigned long)uaddr, fshared,
-- -----                                         attempt);
++ +++++                ret = futex_handle_fault((unsigned long)uaddr, attempt);
                        if (ret)
                                goto out;
                        uval = 0;
                        goto retry_unlocked;
                }
        
-- -----        futex_unlock_mm(fshared);
-- -----
                ret = get_user(uval, uaddr);
                if (!ret && (uval != -EFAULT))
                        goto retry;
                         * PI futexes happens in exit_pi_state():
                         */
                        if (!pi && (uval & FUTEX_WAITERS))
-- -----                        futex_wake(uaddr, &curr->mm->mmap_sem, 1,
-- -----                                   FUTEX_BITSET_MATCH_ANY);
++ +++++                        futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
                }
                return 0;
        }
@@@@@@@@@ -1995,10 -1992,10 -1904,10 -1995,10 -1995,10 -1995,10 -1995,10 -1992,10 +1907,10 @@@@@@@@@ long do_futex(u32 __user *uaddr, int op
        {
                int ret = -ENOSYS;
                int cmd = op & FUTEX_CMD_MASK;
-- -----        struct rw_semaphore *fshared = NULL;
++ +++++        int fshared = 0;
        
                if (!(op & FUTEX_PRIVATE_FLAG))
-- -----                fshared = &current->mm->mmap_sem;
++ +++++                fshared = 1;
        
                switch (cmd) {
                case FUTEX_WAIT:
diff --combined kernel/lockdep.c
index 06e157119d2b24d2b254c9a38b5ace0935a05886,dbda475b13bd62876490fc5a523d373d72c931cd,dbda475b13bd62876490fc5a523d373d72c931cd,a4285830323352666b3c3e31b4ec04f9e7331d1b,06e157119d2b24d2b254c9a38b5ace0935a05886,06e157119d2b24d2b254c9a38b5ace0935a05886,46a404173db231a982baf3941c72e96911003906,dbda475b13bd62876490fc5a523d373d72c931cd..e4bdda8dcf0457364928fc0685ff3cff569d4f76
@@@@@@@@@ -136,16 -136,16 -136,16 -136,16 -136,16 -136,16 -136,16 -136,16 +136,16 @@@@@@@@@ static inline struct lock_class *hlock_
        #ifdef CONFIG_LOCK_STAT
        static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
        
--- ----static int lock_contention_point(struct lock_class *class, unsigned long ip)
+++ ++++static int lock_point(unsigned long points[], unsigned long ip)
        {
                int i;
        
--- ----        for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
--- ----                if (class->contention_point[i] == 0) {
--- ----                        class->contention_point[i] = ip;
+++ ++++        for (i = 0; i < LOCKSTAT_POINTS; i++) {
+++ ++++                if (points[i] == 0) {
+++ ++++                        points[i] = ip;
                                break;
                        }
--- ----                if (class->contention_point[i] == ip)
+++ ++++                if (points[i] == ip)
                                break;
                }
        
@@@@@@@@@ -185,6 -185,6 -185,6 -185,9 -185,6 -185,6 -185,6 -185,6 +185,9 @@@@@@@@@ struct lock_class_stats lock_stats(stru
                        for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
                                stats.contention_point[i] += pcs->contention_point[i];
        
+++ ++++                for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
+++ ++++                        stats.contending_point[i] += pcs->contending_point[i];
+++ ++++
                        lock_time_add(&pcs->read_waittime, &stats.read_waittime);
                        lock_time_add(&pcs->write_waittime, &stats.write_waittime);
        
@@@@@@@@@ -209,6 -209,6 -209,6 -212,7 -209,6 -209,6 -209,6 -209,6 +212,7 @@@@@@@@@ void clear_lock_stats(struct lock_clas
                        memset(cpu_stats, 0, sizeof(struct lock_class_stats));
                }
                memset(class->contention_point, 0, sizeof(class->contention_point));
+++ ++++        memset(class->contending_point, 0, sizeof(class->contending_point));
        }
        
        static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@@@@@@@@ -2169,11 -2169,12 -2169,12 -2173,11 -2169,11 -2169,11 -2169,11 -2169,12 +2173,11 @@@@@@@@@ void early_boot_irqs_on(void
        /*
         * Hardirqs will be enabled:
         */
 --    -void trace_hardirqs_on_caller(unsigned long a0)
 ++    +void trace_hardirqs_on_caller(unsigned long ip)
        {
                struct task_struct *curr = current;
 --    -        unsigned long ip;
        
 --    -        time_hardirqs_on(CALLER_ADDR0, a0);
 ++    +        time_hardirqs_on(CALLER_ADDR0, ip);
        
                if (unlikely(!debug_locks || current->lockdep_recursion))
                        return;
                }
                /* we'll do an OFF -> ON transition: */
                curr->hardirqs_enabled = 1;
 --    -        ip = (unsigned long) __builtin_return_address(0);
        
                if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
                        return;
@@@@@@@@@ -2222,11 -2224,11 -2224,11 -2226,11 -2222,11 -2222,11 -2222,11 -2224,11 +2226,11 @@@@@@@@@ EXPORT_SYMBOL(trace_hardirqs_on)
        /*
         * Hardirqs were disabled:
         */
 --    -void trace_hardirqs_off_caller(unsigned long a0)
 ++    +void trace_hardirqs_off_caller(unsigned long ip)
        {
                struct task_struct *curr = current;
        
 --    -        time_hardirqs_off(CALLER_ADDR0, a0);
 ++    +        time_hardirqs_off(CALLER_ADDR0, ip);
        
                if (unlikely(!debug_locks || current->lockdep_recursion))
                        return;
                         * We have done an ON -> OFF transition:
                         */
                        curr->hardirqs_enabled = 0;
 --    -                curr->hardirq_disable_ip = _RET_IP_;
 ++    +                curr->hardirq_disable_ip = ip;
                        curr->hardirq_disable_event = ++curr->irq_events;
                        debug_atomic_inc(&hardirqs_off_events);
                } else
@@@@@@@@@ -2999,7 -3001,7 -3001,7 -3003,7 -2999,7 -2999,7 -2999,7 -3001,7 +3003,7 @@@@@@@@@ __lock_contended(struct lockdep_map *lo
                struct held_lock *hlock, *prev_hlock;
                struct lock_class_stats *stats;
                unsigned int depth;
--- ----        int i, point;
+++ ++++        int i, contention_point, contending_point;
        
                depth = curr->lockdep_depth;
                if (DEBUG_LOCKS_WARN_ON(!depth))
        found_it:
                hlock->waittime_stamp = sched_clock();
        
--- ----        point = lock_contention_point(hlock_class(hlock), ip);
+++ ++++        contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
+++ ++++        contending_point = lock_point(hlock_class(hlock)->contending_point,
+++ ++++                                      lock->ip);
        
                stats = get_lock_stats(hlock_class(hlock));
--- ----        if (point < ARRAY_SIZE(stats->contention_point))
--- ----                stats->contention_point[point]++;
+++ ++++        if (contention_point < LOCKSTAT_POINTS)
+++ ++++                stats->contention_point[contention_point]++;
+++ ++++        if (contending_point < LOCKSTAT_POINTS)
+++ ++++                stats->contending_point[contending_point]++;
                if (lock->cpu != smp_processor_id())
                        stats->bounces[bounce_contended + !!hlock->read]++;
                put_lock_stats(stats);
        }
        
        static void
--- ----__lock_acquired(struct lockdep_map *lock)
+++ ++++__lock_acquired(struct lockdep_map *lock, unsigned long ip)
        {
                struct task_struct *curr = current;
                struct held_lock *hlock, *prev_hlock;
                put_lock_stats(stats);
        
                lock->cpu = cpu;
+++ ++++        lock->ip = ip;
        }
        
        void lock_contended(struct lockdep_map *lock, unsigned long ip)
        }
        EXPORT_SYMBOL_GPL(lock_contended);
        
--- ----void lock_acquired(struct lockdep_map *lock)
+++ ++++void lock_acquired(struct lockdep_map *lock, unsigned long ip)
        {
                unsigned long flags;
        
                raw_local_irq_save(flags);
                check_flags(flags);
                current->lockdep_recursion = 1;
--- ----        __lock_acquired(lock);
+++ ++++        __lock_acquired(lock, ip);
                current->lockdep_recursion = 0;
                raw_local_irq_restore(flags);
        }
@@@@@@@@@ -3276,10 -3278,10 -3278,10 -3285,10 -3276,10 -3276,10 -3276,10 -3278,10 +3285,10 @@@@@@@@@ void __init lockdep_info(void
        {
                printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
        
------ -        printk("... MAX_LOCKDEP_SUBCLASSES:    %lu\n", MAX_LOCKDEP_SUBCLASSES);
++++++ +        printk("... MAX_LOCKDEP_SUBCLASSES:  %lu\n", MAX_LOCKDEP_SUBCLASSES);
                printk("... MAX_LOCK_DEPTH:          %lu\n", MAX_LOCK_DEPTH);
                printk("... MAX_LOCKDEP_KEYS:        %lu\n", MAX_LOCKDEP_KEYS);
------ -        printk("... CLASSHASH_SIZE:           %lu\n", CLASSHASH_SIZE);
++++++ +        printk("... CLASSHASH_SIZE:          %lu\n", CLASSHASH_SIZE);
                printk("... MAX_LOCKDEP_ENTRIES:     %lu\n", MAX_LOCKDEP_ENTRIES);
                printk("... MAX_LOCKDEP_CHAINS:      %lu\n", MAX_LOCKDEP_CHAINS);
                printk("... CHAINHASH_SIZE:          %lu\n", CHAINHASH_SIZE);
                        }
                        printk(" ignoring it.\n");
                        unlock = 0;
 ++    +        } else {
 ++    +                if (count != 10)
 ++    +                        printk(KERN_CONT " locked it.\n");
                }
 --    -        if (count != 10)
 --    -                printk(" locked it.\n");
        
                do_each_thread(g, p) {
                        /*
diff --combined kernel/notifier.c
index 4282c0a40a57ada651b86c7dcce2389abf489448,0f39e398ef609cbf11ba04ce4977e14d2f8518fa,823be11584efef8ef1d344f484cbf8c3d4f9617e,4282c0a40a57ada651b86c7dcce2389abf489448,4282c0a40a57ada651b86c7dcce2389abf489448,4282c0a40a57ada651b86c7dcce2389abf489448,4282c0a40a57ada651b86c7dcce2389abf489448,4282c0a40a57ada651b86c7dcce2389abf489448..61d5aa5eced3466393582e4f566b63c468ea7cc3
@@@@@@@@@ -82,6 -82,14 -82,6 -82,6 -82,6 -82,6 -82,6 -82,6 +82,14 @@@@@@@@@ static int __kprobes notifier_call_chai
        
                while (nb && nr_to_call) {
                        next_nb = rcu_dereference(nb->next);
+ ++++++
+ ++++++#ifdef CONFIG_DEBUG_NOTIFIERS
+ ++++++                if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
+ ++++++                        WARN(1, "Invalid notifier called!");
+ ++++++                        nb = next_nb;
+ ++++++                        continue;
+ ++++++                }
+ ++++++#endif
                        ret = nb->notifier_call(nb, val, v);
        
                        if (nr_calls)
@@@@@@@@@ -550,7 -558,7 -550,7 -550,7 -550,7 -550,7 -550,7 -550,7 +558,7 @@@@@@@@@ EXPORT_SYMBOL(unregister_reboot_notifie
        
        static ATOMIC_NOTIFIER_HEAD(die_chain);
        
 --     int notify_die(enum die_val val, const char *str,
 ++     int notrace notify_die(enum die_val val, const char *str,
                       struct pt_regs *regs, long err, int trap, int sig)
        {
                struct die_args args = {
diff --combined kernel/sched.c
index 9b1e79371c207b37c1617d3f7c0460709a3cc39b,cc1f81b50b82dddb19658dc4d10dd419087a59fc,13dd2db9fb2dc185a4a95a86aab9f89b4850ccaf,2a106b6b78b09006f75274defb2057b6e7a428e7,e8819bc6f462c18761c11861b4808fd28223b431,b388c9b243e94c71e15df02692ae3fb3658482da,9b1e79371c207b37c1617d3f7c0460709a3cc39b,d906f72b42d23ae1d8c2355d9b605e5fd0761eaa..558e5f284269bfd59a23008ad4906b51269047b2
        #include <linux/cpuset.h>
        #include <linux/percpu.h>
        #include <linux/kthread.h>
 ++    +#include <linux/proc_fs.h>
        #include <linux/seq_file.h>
        #include <linux/sysctl.h>
        #include <linux/syscalls.h>
        #include <linux/debugfs.h>
        #include <linux/ctype.h>
        #include <linux/ftrace.h>
 ++     #include <trace/sched.h>
        
        #include <asm/tlb.h>
        #include <asm/irq_regs.h>
@@@@@@@@@ -203,19 -201,14 -201,14 -203,19 -203,19 -203,19 -203,19 -202,19 +203,19 @@@@@@@@@ void init_rt_bandwidth(struct rt_bandwi
                hrtimer_init(&rt_b->rt_period_timer,
                                CLOCK_MONOTONIC, HRTIMER_MODE_REL);
                rt_b->rt_period_timer.function = sched_rt_period_timer;
 --             rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 ++             rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 ++     }
 ++     
 ++     static inline int rt_bandwidth_enabled(void)
 ++     {
 ++             return sysctl_sched_rt_runtime >= 0;
        }
        
        static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
        {
                ktime_t now;
        
 --             if (rt_b->rt_runtime == RUNTIME_INF)
 ++             if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
                        return;
        
                if (hrtimer_active(&rt_b->rt_period_timer))
        
                        now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
                        hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
 --    -                hrtimer_start(&rt_b->rt_period_timer,
 --    -                              rt_b->rt_period_timer.expires,
 --    -                              HRTIMER_MODE_ABS);
 ++    +                hrtimer_start_expires(&rt_b->rt_period_timer,
 ++    +                                HRTIMER_MODE_ABS);
                }
                spin_unlock(&rt_b->rt_runtime_lock);
        }
@@@@@@@@@ -304,9 -298,9 -298,9 -304,9 -304,9 -304,9 -304,9 -304,9 +304,9 @@@@@@@@@ static DEFINE_PER_CPU(struct cfs_rq, in
        static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
        static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
        #endif /* CONFIG_RT_GROUP_SCHED */
 --     #else /* !CONFIG_FAIR_GROUP_SCHED */
 ++     #else /* !CONFIG_USER_SCHED */
        #define root_task_group init_task_group
 --     #endif /* CONFIG_FAIR_GROUP_SCHED */
 ++     #endif /* CONFIG_USER_SCHED */
        
        /* task_group_lock serializes add/remove of task groups and also changes to
         * a task group's cpu shares.
@@@@@@@@@ -386,6 -380,7 -380,7 -386,6 -386,6 -386,6 -386,6 -386,7 +386,6 @@@@@@@@@ struct cfs_rq 
        
                u64 exec_clock;
                u64 min_vruntime;
 --    -        u64 pair_start;
        
                struct rb_root tasks_timeline;
                struct rb_node *rb_leftmost;
                 * 'curr' points to currently running entity on this cfs_rq.
                 * It is set to NULL otherwise (i.e when none are currently running).
                 */
 -- -  -        struct sched_entity *curr, *next;
 ++ +  +        struct sched_entity *curr, *next, *last;
        
 -- -  -        unsigned long nr_spread_over;
 ++ +  +        unsigned int nr_spread_over;
        
        #ifdef CONFIG_FAIR_GROUP_SCHED
                struct rq *rq;  /* cpu runqueue to which this cfs_rq is attached */
@@@@@@@@@ -609,9 -604,9 -604,9 -609,9 -609,9 -609,9 -609,9 -610,9 +609,9 @@@@@@@@@ struct rq 
        
        static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
        
 --     static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
 ++     static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
        {
 --             rq->curr->sched_class->check_preempt_curr(rq, p);
 ++             rq->curr->sched_class->check_preempt_curr(rq, p, sync);
        }
        
        static inline int cpu_of(struct rq *rq)
@@@@@@@@@ -817,13 -812,6 -812,6 -817,13 -817,13 -817,13 -817,13 -818,6 +817,13 @@@@@@@@@ const_debug unsigned int sysctl_sched_n
         */
        unsigned int sysctl_sched_shares_ratelimit = 250000;
        
 ++    +/*
 ++    + * Inject some fuzzyness into changing the per-cpu group shares
 ++    + * this avoids remote rq-locks at the expense of fairness.
 ++    + * default: 4
 ++    + */
 ++    +unsigned int sysctl_sched_shares_thresh = 4;
 ++    +
        /*
         * period over which we measure -rt task cpu usage in us.
         * default: 1s
@@@@@@@@@ -969,14 -957,6 -957,6 -969,14 -969,6 -969,14 -969,14 -963,6 +969,14 @@@@@@@@@ static struct rq *task_rq_lock(struct t
                }
        }
        
 ++ +  +void task_rq_unlock_wait(struct task_struct *p)
 ++ +  +{
 ++ +  +        struct rq *rq = task_rq(p);
 ++ +  +
 ++ +  +        smp_mb(); /* spin-unlock-wait is not a full memory barrier */
 ++ +  +        spin_unlock_wait(&rq->lock);
 ++ +  +}
 ++ +  +
        static void __task_rq_unlock(struct rq *rq)
                __releases(rq->lock)
        {
@@@@@@@@@ -1078,7 -1058,7 -1058,7 -1078,7 -1070,7 -1078,7 -1078,7 -1064,7 +1078,7 @@@@@@@@@ static void hrtick_start(struct rq *rq
                struct hrtimer *timer = &rq->hrtick_timer;
                ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
        
 --    -        timer->expires = time;
 ++    +        hrtimer_set_expires(timer, time);
        
                if (rq == this_rq()) {
                        hrtimer_restart(timer);
@@@@@@@@@ -1107,7 -1087,7 -1087,7 -1107,7 -1099,7 -1107,7 -1107,7 -1093,7 +1107,7 @@@@@@@@@ hotplug_hrtick(struct notifier_block *n
                return NOTIFY_DONE;
        }
        
 -      static void init_hrtick(void)
 +      static __init void init_hrtick(void)
        {
                hotcpu_notifier(hotplug_hrtick, 0);
        }
@@@@@@@@@ -1122,7 -1102,7 -1102,7 -1122,7 -1114,7 -1122,7 -1122,7 -1108,7 +1122,7 @@@@@@@@@ static void hrtick_start(struct rq *rq
                hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
        }
        
 --     static void init_hrtick(void)
 ++     static inline void init_hrtick(void)
        {
        }
        #endif /* CONFIG_SMP */
@@@@@@@@@ -1139,9 -1119,9 -1119,9 -1139,9 -1131,9 -1139,9 -1139,9 -1125,9 +1139,9 @@@@@@@@@ static void init_rq_hrtick(struct rq *r
        
                hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
                rq->hrtick_timer.function = hrtick;
 --             rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 ++             rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
        }
 --     #else
 ++     #else   /* CONFIG_SCHED_HRTICK */
        static inline void hrtick_clear(struct rq *rq)
        {
        }
@@@@@@@@@ -1153,7 -1133,7 -1133,7 -1153,7 -1145,7 -1153,7 -1153,7 -1139,7 +1153,7 @@@@@@@@@ static inline void init_rq_hrtick(struc
        static inline void init_hrtick(void)
        {
        }
 --     #endif
 ++     #endif  /* CONFIG_SCHED_HRTICK */
        
        /*
         * resched_task - mark a task 'to be rescheduled now'.
@@@@@@@@@ -1400,24 -1380,38 -1380,38 -1400,24 -1392,24 -1400,24 -1400,24 -1386,24 +1400,24 @@@@@@@@@ static inline void dec_cpu_load(struct 
                update_load_sub(&rq->load, load);
        }
        
 --     #ifdef CONFIG_SMP
 --     static unsigned long source_load(int cpu, int type);
 --     static unsigned long target_load(int cpu, int type);
 --     static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
 --     
 --     static unsigned long cpu_avg_load_per_task(int cpu)
 --     {
 --             struct rq *rq = cpu_rq(cpu);
 --     
 --             if (rq->nr_running)
 --                     rq->avg_load_per_task = rq->load.weight / rq->nr_running;
 --     
 --             return rq->avg_load_per_task;
 --     }
 --     
 --     #ifdef CONFIG_FAIR_GROUP_SCHED
 --     
 --     typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
 ++     #if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
 ++     typedef int (*tg_visitor)(struct task_group *, void *);
        
        /*
         * Iterate the full tree, calling @down when first entering a node and @up when
         * leaving it for the final time.
         */
 --     static void
 --     walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
 ++     static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
        {
                struct task_group *parent, *child;
 ++             int ret;
        
                rcu_read_lock();
                parent = &root_task_group;
        down:
 --             (*down)(parent, cpu, sd);
 ++             ret = (*down)(parent, data);
 ++             if (ret)
 ++                     goto out_unlock;
                list_for_each_entry_rcu(child, &parent->children, siblings) {
                        parent = child;
                        goto down;
        up:
                        continue;
                }
 --             (*up)(parent, cpu, sd);
 ++             ret = (*up)(parent, data);
 ++             if (ret)
 ++                     goto out_unlock;
        
                child = parent;
                parent = parent->parent;
                if (parent)
                        goto up;
 ++     out_unlock:
                rcu_read_unlock();
 ++     
 ++             return ret;
 ++     }
 ++     
 ++     static int tg_nop(struct task_group *tg, void *data)
 ++     {
 ++             return 0;
 ++     }
 ++     #endif
 ++     
 ++     #ifdef CONFIG_SMP
 ++     static unsigned long source_load(int cpu, int type);
 ++     static unsigned long target_load(int cpu, int type);
 ++     static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
 ++     
 ++     static unsigned long cpu_avg_load_per_task(int cpu)
 ++     {
 ++             struct rq *rq = cpu_rq(cpu);
 ++     
 ++             if (rq->nr_running)
 ++                     rq->avg_load_per_task = rq->load.weight / rq->nr_running;
 ++++  +        else
 ++++  +                rq->avg_load_per_task = 0;
 ++     
 ++             return rq->avg_load_per_task;
        }
        
 ++     #ifdef CONFIG_FAIR_GROUP_SCHED
 ++     
        static void __set_se_shares(struct sched_entity *se, unsigned long shares);
        
        /*
         * Calculate and set the cpu's group shares.
         */
        static void
 --    -__update_group_shares_cpu(struct task_group *tg, int cpu,
 --    -                          unsigned long sd_shares, unsigned long sd_rq_weight)
 ++    +update_group_shares_cpu(struct task_group *tg, int cpu,
 ++    +                        unsigned long sd_shares, unsigned long sd_rq_weight)
        {
                int boost = 0;
                unsigned long shares;
                 *
                 */
                shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
 ++    +        shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
        
 --    -        /*
 --    -         * record the actual number of shares, not the boosted amount.
 --    -         */
 --    -        tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
 --    -        tg->cfs_rq[cpu]->rq_weight = rq_weight;
 ++    +        if (abs(shares - tg->se[cpu]->load.weight) >
 ++    +                        sysctl_sched_shares_thresh) {
 ++    +                struct rq *rq = cpu_rq(cpu);
 ++    +                unsigned long flags;
        
 --    -        if (shares < MIN_SHARES)
 --    -                shares = MIN_SHARES;
 --    -        else if (shares > MAX_SHARES)
 --    -                shares = MAX_SHARES;
 ++    +                spin_lock_irqsave(&rq->lock, flags);
 ++    +                /*
 ++    +                 * record the actual number of shares, not the boosted amount.
 ++    +                 */
 ++    +                tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
 ++    +                tg->cfs_rq[cpu]->rq_weight = rq_weight;
        
 --    -        __set_se_shares(tg->se[cpu], shares);
 ++    +                __set_se_shares(tg->se[cpu], shares);
 ++    +                spin_unlock_irqrestore(&rq->lock, flags);
 ++    +        }
        }
        
        /*
         * This needs to be done in a bottom-up fashion because the rq weight of a
         * parent group depends on the shares of its child groups.
         */
 --     static void
 --     tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
 ++     static int tg_shares_up(struct task_group *tg, void *data)
        {
                unsigned long rq_weight = 0;
                unsigned long shares = 0;
 ++             struct sched_domain *sd = data;
                int i;
        
                for_each_cpu_mask(i, sd->span) {
                if (!rq_weight)
                        rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
        
 --    -        for_each_cpu_mask(i, sd->span) {
 --    -                struct rq *rq = cpu_rq(i);
 --    -                unsigned long flags;
       -
       -                spin_lock_irqsave(&rq->lock, flags);
       -                __update_group_shares_cpu(tg, i, shares, rq_weight);
       -                spin_unlock_irqrestore(&rq->lock, flags);
       -        }
 ++    +        for_each_cpu_mask(i, sd->span)
 ++    +                update_group_shares_cpu(tg, i, shares, rq_weight);
        
 --                     spin_lock_irqsave(&rq->lock, flags);
 --                     __update_group_shares_cpu(tg, i, shares, rq_weight);
 --                     spin_unlock_irqrestore(&rq->lock, flags);
 --             }
 ++             return 0;
        }
        
        /*
         * This needs to be done in a top-down fashion because the load of a child
         * group is a fraction of its parents load.
         */
 --     static void
 --     tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
 ++     static int tg_load_down(struct task_group *tg, void *data)
        {
                unsigned long load;
 ++             long cpu = (long)data;
        
                if (!tg->parent) {
                        load = cpu_rq(cpu)->load.weight;
                }
        
                tg->cfs_rq[cpu]->h_load = load;
 --     }
        
 --     static void
 --     tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
 --     {
 ++             return 0;
        }
        
        static void update_shares(struct sched_domain *sd)
        
                if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
                        sd->last_update = now;
 --                     walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
 ++                     walk_tg_tree(tg_nop, tg_shares_up, sd);
                }
        }
        
@@@@@@@@@ -1594,9 -1561,9 -1561,9 -1592,9 -1584,9 -1594,9 -1594,9 -1580,9 +1594,9 @@@@@@@@@ static void update_shares_locked(struc
                spin_lock(&rq->lock);
        }
        
 --     static void update_h_load(int cpu)
 ++     static void update_h_load(long cpu)
        {
 --             walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
 ++             walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
        }
        
        #else
@@@@@@@@@ -1815,9 -1782,7 -1782,7 -1813,9 -1805,7 -1815,9 -1815,9 -1801,7 +1815,9 @@@@@@@@@ task_hot(struct task_struct *p, u64 now
                /*
                 * Buddy candidates are cache hot:
                 */
 -- -  -        if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next))
 ++ +  +        if (sched_feat(CACHE_HOT_BUDDY) &&
 ++ +  +                        (&p->se == cfs_rq_of(&p->se)->next ||
 ++ +  +                         &p->se == cfs_rq_of(&p->se)->last))
                        return 1;
        
                if (p->sched_class != &fair_sched_class)
@@@@@@@@@ -1953,12 -1918,14 -1918,14 -1951,12 -1941,12 -1953,12 -1953,12 -1937,12 +1953,12 @@@@@@@@@ unsigned long wait_task_inactive(struc
                         * just go back and repeat.
                         */
                        rq = task_rq_lock(p, &flags);
 ++                     trace_sched_wait_task(rq, p);
                        running = task_running(rq, p);
                        on_rq = p->se.on_rq;
                        ncsw = 0;
 --                     if (!match_state || p->state == match_state) {
 --                             ncsw = p->nivcsw + p->nvcsw;
 --                             if (unlikely(!ncsw))
 --                                     ncsw = 1;
 --                     }
 ++                     if (!match_state || p->state == match_state)
 ++                             ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
                        task_rq_unlock(rq, &flags);
        
                        /*
                success = 1;
        
        out_running:
 --             trace_mark(kernel_sched_wakeup,
 --                     "pid %d state %ld ## rq %p task %p rq->curr %p",
 --                     p->pid, p->state, rq, p, rq->curr);
 --             check_preempt_curr(rq, p);
 ++             trace_sched_wakeup(rq, p);
 ++             check_preempt_curr(rq, p, sync);
        
                p->state = TASK_RUNNING;
        #ifdef CONFIG_SMP
@@@@@@@@@ -2448,8 -2417,10 -2417,10 -2446,8 -2436,8 -2448,8 -2448,8 -2432,8 +2448,8 @@@@@@@@@ void wake_up_new_task(struct task_struc
                        p->sched_class->task_new(rq, p);
                        inc_nr_running(rq);
                }
 --             trace_mark(kernel_sched_wakeup_new,
 --                     "pid %d state %ld ## rq %p task %p rq->curr %p",
 --                     p->pid, p->state, rq, p, rq->curr);
 --             check_preempt_curr(rq, p);
 ++             trace_sched_wakeup_new(rq, p);
 ++             check_preempt_curr(rq, p, 0);
        #ifdef CONFIG_SMP
                if (p->sched_class->task_wake_up)
                        p->sched_class->task_wake_up(rq, p);
@@@@@@@@@ -2621,7 -2592,11 -2592,11 -2619,7 -2609,7 -2621,7 -2621,7 -2605,7 +2621,7 @@@@@@@@@ context_switch(struct rq *rq, struct ta
                struct mm_struct *mm, *oldmm;
        
                prepare_task_switch(rq, prev, next);
 --             trace_mark(kernel_sched_schedule,
 --                     "prev_pid %d next_pid %d prev_state %ld "
 --                     "## rq %p prev %p next %p",
 --                     prev->pid, next->pid, prev->state,
 --                     rq, prev, next);
 ++             trace_sched_switch(rq, prev, next);
                mm = next->mm;
                oldmm = prev->active_mm;
                /*
@@@@@@@@@ -2861,7 -2836,6 -2836,6 -2859,7 -2849,7 -2861,7 -2861,7 -2845,7 +2861,7 @@@@@@@@@ static void sched_migrate_task(struct t
                    || unlikely(!cpu_active(dest_cpu)))
                        goto out;
        
 ++             trace_sched_migrate_task(rq, p, dest_cpu);
                /* force the process onto the specified CPU */
                if (migrate_task(p, dest_cpu, &req)) {
                        /* Need to wait for migration thread (might exit: take ref). */
@@@@@@@@@ -2906,7 -2880,7 -2880,7 -2904,7 -2894,7 -2906,7 -2906,7 -2890,7 +2906,7 @@@@@@@@@ static void pull_task(struct rq *src_rq
                 * Note that idle threads have a prio of MAX_PRIO, for this test
                 * to be always true for them.
                 */
 --             check_preempt_curr(this_rq, p);
 ++             check_preempt_curr(this_rq, p, 0);
        }
        
        /*
@@@@@@@@@ -3355,7 -3329,7 -3329,7 -3353,7 -3343,7 -3355,7 -3355,7 -3339,7 +3355,7 @@@@@@@@@ small_imbalance
                        } else
                                this_load_per_task = cpu_avg_load_per_task(this_cpu);
        
 --    -                if (max_load - this_load + 2*busiest_load_per_task >=
 ++    +                if (max_load - this_load + busiest_load_per_task >=
                                                busiest_load_per_task * imbn) {
                                *imbalance = busiest_load_per_task;
                                return busiest;
@@@@@@@@@ -4063,26 -4037,23 -4037,23 -4061,26 -4051,26 -4063,26 -4063,26 -4047,26 +4063,26 @@@@@@@@@ DEFINE_PER_CPU(struct kernel_stat, ksta
        EXPORT_PER_CPU_SYMBOL(kstat);
        
        /*
 --      * Return p->sum_exec_runtime plus any more ns on the sched_clock
 --      * that have not yet been banked in case the task is currently running.
 ++      * Return any ns on the sched_clock that have not yet been banked in
 ++      * @p in case that task is currently running.
         */
 --     unsigned long long task_sched_runtime(struct task_struct *p)
 ++     unsigned long long task_delta_exec(struct task_struct *p)
        {
                unsigned long flags;
 --             u64 ns, delta_exec;
                struct rq *rq;
 ++             u64 ns = 0;
        
                rq = task_rq_lock(p, &flags);
 --             ns = p->se.sum_exec_runtime;
 ++     
                if (task_current(rq, p)) {
 ++                     u64 delta_exec;
 ++     
                        update_rq_clock(rq);
                        delta_exec = rq->clock - p->se.exec_start;
                        if ((s64)delta_exec > 0)
 --                             ns += delta_exec;
 ++                             ns = delta_exec;
                }
 ++     
                task_rq_unlock(rq, &flags);
        
                return ns;
@@@@@@@@@ -4099,7 -4070,6 -4070,6 -4097,7 -4087,7 -4099,7 -4099,7 -4083,7 +4099,7 @@@@@@@@@ void account_user_time(struct task_stru
                cputime64_t tmp;
        
                p->utime = cputime_add(p->utime, cputime);
 ++             account_group_user_time(p, cputime);
        
                /* Add user time to cpustat. */
                tmp = cputime_to_cputime64(cputime);
@@@@@@@@@ -4124,7 -4094,6 -4094,6 -4122,7 -4112,7 -4124,7 -4124,7 -4108,7 +4124,7 @@@@@@@@@ static void account_guest_time(struct t
                tmp = cputime_to_cputime64(cputime);
        
                p->utime = cputime_add(p->utime, cputime);
 ++             account_group_user_time(p, cputime);
                p->gtime = cputime_add(p->gtime, cputime);
        
                cpustat->user = cputime64_add(cpustat->user, tmp);
@@@@@@@@@ -4160,7 -4129,6 -4129,6 -4158,7 -4148,7 -4160,7 -4160,7 -4144,7 +4160,7 @@@@@@@@@ void account_system_time(struct task_st
                }
        
                p->stime = cputime_add(p->stime, cputime);
 ++             account_group_system_time(p, cputime);
        
                /* Add system time to cpustat. */
                tmp = cputime_to_cputime64(cputime);
@@@@@@@@@ -4202,7 -4170,6 -4170,6 -4200,7 -4190,7 -4202,6 -4202,7 -4186,7 +4202,6 @@@@@@@@@ void account_steal_time(struct task_str
        
                if (p == rq->idle) {
                        p->stime = cputime_add(p->stime, steal);
-  -- --                account_group_system_time(p, steal);
                        if (atomic_read(&rq->nr_iowait) > 0)
                                cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                        else
@@@@@@@@@ -4338,7 -4305,7 -4305,7 -4336,7 -4326,7 -4337,7 -4338,7 -4322,7 +4337,7 @@@@@@@@@ void __kprobes sub_preempt_count(int va
                /*
                 * Underflow?
                 */
--- ----        if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+++ ++++       if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
                        return;
                /*
                 * Is the spinlock portion underflowing?
@@@@@@@@@ -4459,8 -4426,12 -4426,12 -4457,8 -4447,8 -4458,8 -4459,8 -4443,12 +4458,8 @@@@@@@@@ need_resched_nonpreemptible
                if (sched_feat(HRTICK))
                        hrtick_clear(rq);
        
 --    -        /*
 --    -         * Do the rq-clock update outside the rq lock:
 --    -         */
 --    -        local_irq_disable();
 ++    +        spin_lock_irq(&rq->lock);
                update_rq_clock(rq);
 --    -        spin_lock(&rq->lock);
                clear_tsk_need_resched(prev);
        
                if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@@@@@@@@ -4656,15 -4627,6 -4627,6 -4654,15 -4644,15 -4655,15 -4656,15 -4644,15 +4655,15 @@@@@@@@@ __wake_up_sync(wait_queue_head_t *q, un
        }
        EXPORT_SYMBOL_GPL(__wake_up_sync);      /* For internal use only */
        
 ++     /**
 ++      * complete: - signals a single thread waiting on this completion
 ++      * @x:  holds the state of this particular completion
 ++      *
 ++      * This will wake up a single thread waiting on this completion. Threads will be
 ++      * awakened in the same order in which they were queued.
 ++      *
 ++      * See also complete_all(), wait_for_completion() and related routines.
 ++      */
        void complete(struct completion *x)
        {
                unsigned long flags;
        }
        EXPORT_SYMBOL(complete);
        
 ++     /**
 ++      * complete_all: - signals all threads waiting on this completion
 ++      * @x:  holds the state of this particular completion
 ++      *
 ++      * This will wake up all threads waiting on this particular completion event.
 ++      */
        void complete_all(struct completion *x)
        {
                unsigned long flags;
@@@@@@@@@ -4702,7 -4658,10 -4658,10 -4700,7 -4690,7 -4701,7 -4702,7 -4690,7 +4701,7 @@@@@@@@@ do_wait_for_common(struct completion *x
                        wait.flags |= WQ_FLAG_EXCLUSIVE;
                        __add_wait_queue_tail(&x->wait, &wait);
                        do {
 --                             if ((state == TASK_INTERRUPTIBLE &&
 --                                  signal_pending(current)) ||
 --                                 (state == TASK_KILLABLE &&
 --                                  fatal_signal_pending(current))) {
 ++                             if (signal_pending_state(state, current)) {
                                        timeout = -ERESTARTSYS;
                                        break;
                                }
@@@@@@@@@ -4730,31 -4689,12 -4689,12 -4728,31 -4718,31 -4729,31 -4730,31 -4718,31 +4729,31 @@@@@@@@@ wait_for_common(struct completion *x, l
                return timeout;
        }
        
 ++     /**
 ++      * wait_for_completion: - waits for completion of a task
 ++      * @x:  holds the state of this particular completion
 ++      *
 ++      * This waits to be signaled for completion of a specific task. It is NOT
 ++      * interruptible and there is no timeout.
 ++      *
 ++      * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
 ++      * and interrupt capability. Also see complete().
 ++      */
        void __sched wait_for_completion(struct completion *x)
        {
                wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
        }
        EXPORT_SYMBOL(wait_for_completion);
        
 ++     /**
 ++      * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
 ++      * @x:  holds the state of this particular completion
 ++      * @timeout:  timeout value in jiffies
 ++      *
 ++      * This waits for either a completion of a specific task to be signaled or for a
 ++      * specified timeout to expire. The timeout is in jiffies. It is not
 ++      * interruptible.
 ++      */
        unsigned long __sched
        wait_for_completion_timeout(struct completion *x, unsigned long timeout)
        {
        }
        EXPORT_SYMBOL(wait_for_completion_timeout);
        
 ++     /**
 ++      * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
 ++      * @x:  holds the state of this particular completion
 ++      *
 ++      * This waits for completion of a specific task to be signaled. It is
 ++      * interruptible.
 ++      */
        int __sched wait_for_completion_interruptible(struct completion *x)
        {
                long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
        }
        EXPORT_SYMBOL(wait_for_completion_interruptible);
        
 ++     /**
 ++      * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
 ++      * @x:  holds the state of this particular completion
 ++      * @timeout:  timeout value in jiffies
 ++      *
 ++      * This waits for either a completion of a specific task to be signaled or for a
 ++      * specified timeout to expire. It is interruptible. The timeout is in jiffies.
 ++      */
        unsigned long __sched
        wait_for_completion_interruptible_timeout(struct completion *x,
                                                  unsigned long timeout)
        }
        EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
        
 ++     /**
 ++      * wait_for_completion_killable: - waits for completion of a task (killable)
 ++      * @x:  holds the state of this particular completion
 ++      *
 ++      * This waits to be signaled for completion of a specific task. It can be
 ++      * interrupted by a kill signal.
 ++      */
        int __sched wait_for_completion_killable(struct completion *x)
        {
                long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
                         * Do not allow realtime tasks into groups that have no runtime
                         * assigned.
                         */
 --                     if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
 ++                     if (rt_bandwidth_enabled() && rt_policy(policy) &&
 ++                                     task_group(p)->rt_bandwidth.rt_runtime == 0)
                                return -EPERM;
        #endif
        
@@@@@@@@@ -5870,8 -5787,6 -5787,6 -5868,6 -5858,6 -5869,8 -5870,8 -5858,6 +5869,8 @@@@@@@@@ void __cpuinit init_idle(struct task_st
                struct rq *rq = cpu_rq(cpu);
                unsigned long flags;
        
 ++++  +        spin_lock_irqsave(&rq->lock, flags);
 ++++  +
                __sched_fork(idle);
                idle->se.exec_start = sched_clock();
        
                idle->cpus_allowed = cpumask_of_cpu(cpu);
                __set_task_cpu(idle, cpu);
        
 ----  -        spin_lock_irqsave(&rq->lock, flags);
                rq->curr = rq->idle = idle;
        #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
                idle->oncpu = 1;
@@@@@@@@@ -6041,7 -5957,7 -5957,7 -6038,7 -6028,7 -6040,7 -6041,7 -6028,7 +6040,7 @@@@@@@@@ static int __migrate_task(struct task_s
                set_task_cpu(p, dest_cpu);
                if (on_rq) {
                        activate_task(rq_dest, p, 0);
 --                     check_preempt_curr(rq_dest, p);
 ++                     check_preempt_curr(rq_dest, p, 0);
                }
        done:
                ret = 1;
@@@@@@@@@ -6366,7 -6282,7 -6282,7 -6363,7 -6353,7 -6365,7 -6366,7 -6353,7 +6365,7 @@@@@@@@@ set_table_entry(struct ctl_table *entry
        static struct ctl_table *
        sd_alloc_ctl_domain_table(struct sched_domain *sd)
        {
 --             struct ctl_table *table = sd_alloc_ctl_entry(12);
 ++             struct ctl_table *table = sd_alloc_ctl_entry(13);
        
                if (table == NULL)
                        return NULL;
                        sizeof(int), 0644, proc_dointvec_minmax);
                set_table_entry(&table[10], "flags", &sd->flags,
                        sizeof(int), 0644, proc_dointvec_minmax);
 --             /* &table[11] is terminator */
 ++             set_table_entry(&table[11], "name", sd->name,
 ++                     CORENAME_MAX_SIZE, 0444, proc_dostring);
 ++             /* &table[12] is terminator */
        
                return table;
        }
@@@@@@@@@ -6888,17 -6802,15 -6802,15 -6885,17 -6875,15 -6887,17 -6888,17 -6875,15 +6887,17 @@@@@@@@@ cpu_attach_domain(struct sched_domain *
                struct sched_domain *tmp;
        
                /* Remove the sched domains which do not contribute to scheduling. */
 -- -  -        for (tmp = sd; tmp; tmp = tmp->parent) {
 ++ +  +        for (tmp = sd; tmp; ) {
                        struct sched_domain *parent = tmp->parent;
                        if (!parent)
                                break;
 ++ +  +
                        if (sd_parent_degenerate(tmp, parent)) {
                                tmp->parent = parent->parent;
                                if (parent->parent)
                                        parent->parent->child = tmp;
 -- -  -                }
 ++ +  +                } else
 ++ +  +                        tmp = tmp->parent;
                }
        
                if (sd && sd_degenerate(sd)) {
@@@@@@@@@ -7282,21 -7194,13 -7194,13 -7279,21 -7267,21 -7281,21 -7282,21 -7267,21 +7281,21 @@@@@@@@@ static void init_sched_groups_power(in
         * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
         */
        
 ++     #ifdef CONFIG_SCHED_DEBUG
 ++     # define SD_INIT_NAME(sd, type)         sd->name = #type
 ++     #else
 ++     # define SD_INIT_NAME(sd, type)         do { } while (0)
 ++     #endif
 ++     
        #define SD_INIT(sd, type)       sd_init_##type(sd)
 ++     
        #define SD_INIT_FUNC(type)      \
        static noinline void sd_init_##type(struct sched_domain *sd)    \
        {                                                               \
                memset(sd, 0, sizeof(*sd));                             \
                *sd = SD_##type##_INIT;                                 \
                sd->level = SD_LV_##type;                               \
 ++             SD_INIT_NAME(sd, type);                                 \
        }
        
        SD_INIT_FUNC(CPU)
@@@@@@@@@ -7687,7 -7591,6 -7591,6 -7684,7 -7672,6 -7686,7 -7687,7 -7672,6 +7686,7 @@@@@@@@@ static int __build_sched_domains(const 
        error:
                free_sched_groups(cpu_map, tmpmask);
                SCHED_CPUMASK_FREE((void *)allmasks);
 ++ +  +        kfree(rd);
                return -ENOMEM;
        #endif
        }
@@@@@@@@@ -7789,14 -7692,13 -7692,13 -7786,13 -7773,13 -7788,13 -7789,14 -7773,13 +7788,14 @@@@@@@@@ static int dattrs_equal(struct sched_do
         *
         * The passed in 'doms_new' should be kmalloc'd. This routine takes
         * ownership of it and will kfree it when done with it. If the caller
 ----- - * failed the kmalloc call, then it can pass in doms_new == NULL,
 ----- - * and partition_sched_domains() will fallback to the single partition
 ----- - * 'fallback_doms', it also forces the domains to be rebuilt.
 +++++ + * failed the kmalloc call, then it can pass in doms_new == NULL &&
 +++++ + * ndoms_new == 1, and partition_sched_domains() will fallback to
 +++++ + * the single partition 'fallback_doms', it also forces the domains
 +++++ + * to be rebuilt.
         *
 ----- - * If doms_new==NULL it will be replaced with cpu_online_map.
 ----- - * ndoms_new==0 is a special case for destroying existing domains.
 ----- - * It will not create the default domain.
 +++++ + * If doms_new == NULL it will be replaced with cpu_online_map.
 +++++ + * ndoms_new == 0 is a special case for destroying existing domains,
 +++++ + * and it will not create the default domain.
         *
         * Call with hotplug lock held
         */
@@@@@@@@@ -8340,25 -8242,20 -8242,20 -8336,25 -8323,25 -8338,25 -8340,25 -8323,25 +8339,25 @@@@@@@@@ void __might_sleep(char *file, int line
        #ifdef in_atomic
                static unsigned long prev_jiffy;        /* ratelimiting */
        
 --             if ((in_atomic() || irqs_disabled()) &&
 --                 system_state == SYSTEM_RUNNING && !oops_in_progress) {
 --                     if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 --                             return;
 --                     prev_jiffy = jiffies;
 --                     printk(KERN_ERR "BUG: sleeping function called from invalid"
 --                                     " context at %s:%d\n", file, line);
 --                     printk("in_atomic():%d, irqs_disabled():%d\n",
 --                             in_atomic(), irqs_disabled());
 --                     debug_show_held_locks(current);
 --                     if (irqs_disabled())
 --                             print_irqtrace_events(current);
 --                     dump_stack();
 --             }
 ++             if ((!in_atomic() && !irqs_disabled()) ||
 ++                         system_state != SYSTEM_RUNNING || oops_in_progress)
 ++                     return;
 ++             if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 ++                     return;
 ++             prev_jiffy = jiffies;
 ++     
 ++             printk(KERN_ERR
 ++                     "BUG: sleeping function called from invalid context at %s:%d\n",
 ++                             file, line);
 ++             printk(KERN_ERR
 ++                     "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
 ++                             in_atomic(), irqs_disabled(),
 ++                             current->pid, current->comm);
 ++     
 ++             debug_show_held_locks(current);
 ++             if (irqs_disabled())
 ++                     print_irqtrace_events(current);
 ++             dump_stack();
        #endif
        }
        EXPORT_SYMBOL(__might_sleep);
@@@@@@@@@ -8856,95 -8753,73 -8753,73 -8852,95 -8839,95 -8854,95 -8856,95 -8839,95 +8855,95 @@@@@@@@@ static DEFINE_MUTEX(rt_constraints_mute
        static unsigned long to_ratio(u64 period, u64 runtime)
        {
                if (runtime == RUNTIME_INF)
 --                     return 1ULL << 16;
 ++                     return 1ULL << 20;
        
 --             return div64_u64(runtime << 16, period);
 ++             return div64_u64(runtime << 20, period);
        }
        
 --     #ifdef CONFIG_CGROUP_SCHED
 --     static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 ++     /* Must be called with tasklist_lock held */
 ++     static inline int tg_has_rt_tasks(struct task_group *tg)
        {
 --             struct task_group *tgi, *parent = tg->parent;
 --             unsigned long total = 0;
 ++             struct task_struct *g, *p;
        
 --             if (!parent) {
 --                     if (global_rt_period() < period)
 --                             return 0;
 ++             do_each_thread(g, p) {
 ++                     if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
 ++                             return 1;
 ++             } while_each_thread(g, p);
        
 --                     return to_ratio(period, runtime) <
 --                             to_ratio(global_rt_period(), global_rt_runtime());
 --             }
 ++             return 0;
 ++     }
        
 --             if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
 --                     return 0;
 ++     struct rt_schedulable_data {
 ++             struct task_group *tg;
 ++             u64 rt_period;
 ++             u64 rt_runtime;
 ++     };
        
 --             rcu_read_lock();
 --             list_for_each_entry_rcu(tgi, &parent->children, siblings) {
 --                     if (tgi == tg)
 --                             continue;
 ++     static int tg_schedulable(struct task_group *tg, void *data)
 ++     {
 ++             struct rt_schedulable_data *d = data;
 ++             struct task_group *child;
 ++             unsigned long total, sum = 0;
 ++             u64 period, runtime;
        
 --                     total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
 --                                     tgi->rt_bandwidth.rt_runtime);
 ++             period = ktime_to_ns(tg->rt_bandwidth.rt_period);
 ++             runtime = tg->rt_bandwidth.rt_runtime;
 ++     
 ++             if (tg == d->tg) {
 ++                     period = d->rt_period;
 ++                     runtime = d->rt_runtime;
                }
 --             rcu_read_unlock();
        
 --             return total + to_ratio(period, runtime) <=
 --                     to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
 --                                     parent->rt_bandwidth.rt_runtime);
 --     }
 --     #elif defined CONFIG_USER_SCHED
 --     static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 --     {
 --             struct task_group *tgi;
 --             unsigned long total = 0;
 --             unsigned long global_ratio =
 --                     to_ratio(global_rt_period(), global_rt_runtime());
 ++             /*
 ++              * Cannot have more runtime than the period.
 ++              */
 ++             if (runtime > period && runtime != RUNTIME_INF)
 ++                     return -EINVAL;
        
 --             rcu_read_lock();
 --             list_for_each_entry_rcu(tgi, &task_groups, list) {
 --                     if (tgi == tg)
 --                             continue;
 ++             /*
 ++              * Ensure we don't starve existing RT tasks.
 ++              */
 ++             if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
 ++                     return -EBUSY;
 ++     
 ++             total = to_ratio(period, runtime);
        
 --                     total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
 --                                     tgi->rt_bandwidth.rt_runtime);
 ++             /*
 ++              * Nobody can have more than the global setting allows.
 ++              */
 ++             if (total > to_ratio(global_rt_period(), global_rt_runtime()))
 ++                     return -EINVAL;
 ++     
 ++             /*
 ++              * The sum of our children's runtime should not exceed our own.
 ++              */
 ++             list_for_each_entry_rcu(child, &tg->children, siblings) {
 ++                     period = ktime_to_ns(child->rt_bandwidth.rt_period);
 ++                     runtime = child->rt_bandwidth.rt_runtime;
 ++     
 ++                     if (child == d->tg) {
 ++                             period = d->rt_period;
 ++                             runtime = d->rt_runtime;
 ++                     }
 ++     
 ++                     sum += to_ratio(period, runtime);
                }
 --             rcu_read_unlock();
        
 --             return total + to_ratio(period, runtime) < global_ratio;
 ++             if (sum > total)
 ++                     return -EINVAL;
 ++     
 ++             return 0;
        }
 --     #endif
        
 --     /* Must be called with tasklist_lock held */
 --     static inline int tg_has_rt_tasks(struct task_group *tg)
 ++     static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
        {
 --             struct task_struct *g, *p;
 --             do_each_thread(g, p) {
 --                     if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
 --                             return 1;
 --             } while_each_thread(g, p);
 --             return 0;
 ++             struct rt_schedulable_data data = {
 ++                     .tg = tg,
 ++                     .rt_period = period,
 ++                     .rt_runtime = runtime,
 ++             };
 ++     
 ++             return walk_tg_tree(tg_schedulable, tg_nop, &data);
        }
        
        static int tg_set_bandwidth(struct task_group *tg,
        
                mutex_lock(&rt_constraints_mutex);
                read_lock(&tasklist_lock);
 --             if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
 --                     err = -EBUSY;
 --                     goto unlock;
 --             }
 --             if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
 --                     err = -EINVAL;
 ++             err = __rt_schedulable(tg, rt_period, rt_runtime);
 ++             if (err)
                        goto unlock;
 --             }
        
                spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
                tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@@@@@@@@ -9025,25 -8905,16 -8905,19 -9021,25 -9008,25 -9023,25 -9025,25 -9008,25 +9024,25 @@@@@@@@@ long sched_group_rt_period(struct task_
        
        static int sched_rt_global_constraints(void)
        {
 --             struct task_group *tg = &root_task_group;
 --             u64 rt_runtime, rt_period;
 ++             u64 runtime, period;
                int ret = 0;
        
 -              rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
 -              rt_runtime = tg->rt_bandwidth.rt_runtime;
 +              if (sysctl_sched_rt_period <= 0)
 +                      return -EINVAL;
 +      
  -             rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
  -             rt_runtime = tg->rt_bandwidth.rt_runtime;
 ++             runtime = global_rt_runtime();
 ++             period = global_rt_period();
 ++     
 ++             /*
 ++              * Sanity check on the sysctl variables.
 ++              */
 ++             if (runtime > period && runtime != RUNTIME_INF)
 ++                     return -EINVAL;
        
                mutex_lock(&rt_constraints_mutex);
 --             if (!__rt_schedulable(tg, rt_period, rt_runtime))
 --                     ret = -EINVAL;
 ++             read_lock(&tasklist_lock);
 ++             ret = __rt_schedulable(NULL, 0, 0);
 ++             read_unlock(&tasklist_lock);
                mutex_unlock(&rt_constraints_mutex);
        
                return ret;
@@@@@@@@@ -9054,9 -8925,6 -8928,9 -9050,9 -9037,9 -9052,9 -9054,9 -9037,9 +9053,9 @@@@@@@@@ static int sched_rt_global_constraints(
                unsigned long flags;
                int i;
        
 +              if (sysctl_sched_rt_period <= 0)
 +                      return -EINVAL;
 +      
                spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
                for_each_possible_cpu(i) {
                        struct rt_rq *rt_rq = &cpu_rq(i)->rt;
@@@@@@@@@ -9117,6 -8985,7 -8991,7 -9113,6 -9100,6 -9115,6 -9117,6 -9100,6 +9116,6 @@@@@@@@@ cpu_cgroup_create(struct cgroup_subsys 
        
                if (!cgrp->parent) {
                        /* This is early initialization for the top cgroup */
 --                     init_task_group.css.cgroup = cgrp;
                        return &init_task_group.css;
                }
        
                if (IS_ERR(tg))
                        return ERR_PTR(-ENOMEM);
        
 --             /* Bind the cgroup to task_group object we just created */
 --             tg->css.cgroup = cgrp;
 --     
                return &tg->css;
        }
        
diff --combined kernel/softlockup.c
index 3953e4aed733d32284f48ecfffa1fbff05e234c6,b9a528f22736adcfa78d9cd99209ba286d585a54,cb838ee93a82000bc5b313271487836e9fb5804e,3953e4aed733d32284f48ecfffa1fbff05e234c6,3953e4aed733d32284f48ecfffa1fbff05e234c6,3953e4aed733d32284f48ecfffa1fbff05e234c6,3953e4aed733d32284f48ecfffa1fbff05e234c6,3953e4aed733d32284f48ecfffa1fbff05e234c6..884e6cd2769c348a846d103122b6e49e20acae85
@@@@@@@@@ -164,7 -164,7 -164,7 -164,7 -164,7 -164,7 -164,7 -164,7 +164,7 @@@@@@@@@ unsigned long __read_mostly sysctl_hung
        /*
         * Zero means infinite timeout - no checking done:
         */
- ------unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+ ++++++unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
        
        unsigned long __read_mostly sysctl_hung_task_warnings = 10;
        
@@@@@@@@@ -226,7 -226,7 -226,7 -226,7 -226,7 -226,7 -226,7 -226,7 +226,7 @@@@@@@@@ static void check_hung_uninterruptible_
                 * If the system crashed already then all bets are off,
                 * do not report extra hung tasks:
                 */
 --             if ((tainted & TAINT_DIE) || did_panic)
 ++             if (test_taint(TAINT_DIE) || did_panic)
                        return;
        
                read_lock(&tasklist_lock);
diff --combined lib/Kconfig.debug
index b0f239e443bc0fbb11a27ee98dbaf4e641d21971,4116e10ea14ace405520b27d41c12e2b751e63b6,0b504814e378067ff120b266e5b26c9fdd6fb90e,b0f239e443bc0fbb11a27ee98dbaf4e641d21971,b0f239e443bc0fbb11a27ee98dbaf4e641d21971,b0f239e443bc0fbb11a27ee98dbaf4e641d21971,b0f239e443bc0fbb11a27ee98dbaf4e641d21971,b0f239e443bc0fbb11a27ee98dbaf4e641d21971..1e3fd3e3436abf6e142c9f0b46cc6993cd19ec38
@@@@@@@@@ -495,15 -495,6 -495,6 -495,15 -495,15 -495,15 -495,15 -495,15 +495,15 @@@@@@@@@ config DEBUG_V
        
                  If unsure, say N.
        
 ++     config DEBUG_VIRTUAL
 ++             bool "Debug VM translations"
 ++             depends on DEBUG_KERNEL && X86
 ++             help
 ++               Enable some costly sanity checks in virtual to page code. This can
 ++               catch mistakes with virt_to_page() and friends.
 ++     
 ++               If unsure, say N.
 ++     
        config DEBUG_WRITECOUNT
                bool "Debug filesystem writers count"
                depends on DEBUG_KERNEL
@@@@@@@@@ -545,6 -536,16 -536,6 -545,6 -545,6 -545,6 -545,6 -545,6 +545,16 @@@@@@@@@ config DEBUG_S
        
                  If unsure, say N.
        
+ ++++++config DEBUG_NOTIFIERS
+ ++++++        bool "Debug notifier call chains"
+ ++++++        depends on DEBUG_KERNEL
+ ++++++        help
+ ++++++          Enable this to turn on sanity checking for notifier call chains.
+ ++++++          This is most useful for kernel developers to make sure that
+ ++++++          modules properly unregister themselves from notifier chains.
+ ++++++          This is a relatively cheap check but if you care about maximum
+ ++++++          performance, say N.
+ ++++++
        config FRAME_POINTER
                bool "Compile the kernel with frame pointers"
                depends on DEBUG_KERNEL && \
@@@@@@@@@ -606,19 -607,6 -597,6 -606,19 -606,19 -606,19 -606,19 -606,19 +616,19 @@@@@@@@@ config RCU_TORTURE_TEST_RUNNABL
                  Say N here if you want the RCU torture tests to start only
                  after being manually enabled via /proc.
        
 ++     config RCU_CPU_STALL_DETECTOR
 ++             bool "Check for stalled CPUs delaying RCU grace periods"
 ++             depends on CLASSIC_RCU
 ++             default n
 ++             help
 ++               This option causes RCU to printk information on which
 ++               CPUs are delaying the current grace period, but only when
 ++               the grace period extends for excessive time periods.
 ++     
 ++               Say Y if you want RCU to perform such checks.
 ++     
 ++               Say N if you are unsure.
 ++     
        config KPROBES_SANITY_TEST
                bool "Kprobes sanity tests"
                depends on DEBUG_KERNEL
@@@@@@@@@ -646,33 -634,6 -624,6 -646,33 -646,33 -646,33 -646,33 -646,33 +656,33 @@@@@@@@@ config BACKTRACE_SELF_TES
        
                  Say N if you are unsure.
        
 ++     config DEBUG_BLOCK_EXT_DEVT
 ++             bool "Force extended block device numbers and spread them"
 ++             depends on DEBUG_KERNEL
 ++             depends on BLOCK
 ++             default n
 ++             help
 ++               BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
 ++               SOME DISTRIBUTIONS.  DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
 ++               YOU ARE DOING.  Distros, please enable this and fix whatever
 ++               is broken.
 ++     
 ++               Conventionally, block device numbers are allocated from
 ++               predetermined contiguous area.  However, extended block area
 ++               may introduce non-contiguous block device numbers.  This
 ++               option forces most block device numbers to be allocated from
 ++               the extended space and spreads them to discover kernel or
 ++               userland code paths which assume predetermined contiguous
 ++               device number allocation.
 ++     
 ++               Note that turning on this debug option shuffles all the
 ++               device numbers for all IDE and SCSI devices including libata
 ++               ones, so root partition specified using device number
 ++               directly (via rdev or root=MAJ:MIN) won't work anymore.
 ++               Textual device names (root=/dev/sdXn) will continue to work.
 ++     
 ++               Say N if you are unsure.
 ++     
        config LKDTM
                tristate "Linux Kernel Dump Test Tool Module"
                depends on DEBUG_KERNEL
@@@@@@@@@ -710,21 -671,10 -661,10 -710,21 -710,21 -710,21 -710,21 -710,21 +720,21 @@@@@@@@@ config FAIL_PAGE_ALLO
        
        config FAIL_MAKE_REQUEST
                bool "Fault-injection capability for disk IO"
 --             depends on FAULT_INJECTION
 ++             depends on FAULT_INJECTION && BLOCK
                help
                  Provide fault-injection capability for disk IO.
        
 ++     config FAIL_IO_TIMEOUT
 ++             bool "Faul-injection capability for faking disk interrupts"
 ++             depends on FAULT_INJECTION && BLOCK
 ++             help
 ++               Provide fault-injection capability on end IO handling. This
 ++               will make the block layer "forget" an interrupt as configured,
 ++               thus exercising the error handling.
 ++     
 ++               Only works with drivers that use the generic timeout handling,
 ++               for others it wont do anything.
 ++     
        config FAULT_INJECTION_DEBUG_FS
                bool "Debugfs entries for fault-injection capabilities"
                depends on FAULT_INJECTION && SYSFS && DEBUG_FS
@@@@@@@@@ -812,61 -762,6 -752,6 -812,61 -812,61 -812,61 -812,61 -812,61 +822,61 @@@@@@@@@ menuconfig BUILD_DOCSR
        
                  Say N if you are unsure.
        
 ++     config DYNAMIC_PRINTK_DEBUG
 ++             bool "Enable dynamic printk() call support"
 ++             default n
 ++             depends on PRINTK
 ++             select PRINTK_DEBUG
 ++             help
 ++     
 ++               Compiles debug level messages into the kernel, which would not
 ++               otherwise be available at runtime. These messages can then be
 ++               enabled/disabled on a per module basis. This mechanism implicitly
 ++               enables all pr_debug() and dev_dbg() calls. The impact of this
 ++               compile option is a larger kernel text size of about 2%.
 ++     
 ++               Usage:
 ++     
 ++               Dynamic debugging is controlled by the debugfs file,
 ++               dynamic_printk/modules. This file contains a list of the modules that
 ++               can be enabled. The format of the file is the module name, followed
 ++               by a set of flags that can be enabled. The first flag is always the
 ++               'enabled' flag. For example:
 ++     
 ++                     <module_name> <enabled=0/1>
 ++                                     .
 ++                                     .
 ++                                     .
 ++     
 ++               <module_name> : Name of the module in which the debug call resides
 ++               <enabled=0/1> : whether the messages are enabled or not
 ++     
 ++               From a live system:
 ++     
 ++                     snd_hda_intel enabled=0
 ++                     fixup enabled=0
 ++                     driver enabled=0
 ++     
 ++               Enable a module:
 ++     
 ++                     $echo "set enabled=1 <module_name>" > dynamic_printk/modules
 ++     
 ++               Disable a module:
 ++     
 ++                     $echo "set enabled=0 <module_name>" > dynamic_printk/modules
 ++     
 ++               Enable all modules:
 ++     
 ++                     $echo "set enabled=1 all" > dynamic_printk/modules
 ++     
 ++               Disable all modules:
 ++     
 ++                     $echo "set enabled=0 all" > dynamic_printk/modules
 ++     
 ++               Finally, passing "dynamic_printk" at the command line enables
 ++               debugging for all modules. This mode can be turned off via the above
 ++               disable command.
 ++     
        source "samples/Kconfig"
        
        source "lib/Kconfig.kgdb"