From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 24 Nov 2008 16:44:55 +0000 (+0100)
Subject: Merge branches 'core/debug', 'core/futexes', 'core/locking', 'core/rcu', 'core/signal... 
X-Git-Tag: v2.6.29-rc1~572^2~4
X-Git-Url: http://www.pilppa.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=b19b3c74c7bbec45a848631b8f970ac110665a01;hp=-c;p=linux-2.6-omap-h63xx.git

Merge branches 'core/debug', 'core/futexes', 'core/locking', 'core/rcu', 'core/signal', 'core/urgent' and 'core/xen' into core/core
---

b19b3c74c7bbec45a848631b8f970ac110665a01
diff --combined arch/x86/include/asm/uaccess_64.h
index f8cfd00db45,515d4dce96b,515d4dce96b,543ba883cc6,664f15280f1,664f15280f1,f8cfd00db45,c96c1f5d07a..84210c479fc
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@@@@@@@@ -1,5 -1,5 -1,5 -1,5 -1,5 -1,5 -1,5 -1,5 +1,5 @@@@@@@@@
 --     #ifndef __X86_64_UACCESS_H
 --     #define __X86_64_UACCESS_H
       -#ifndef ASM_X86__UACCESS_64_H
       -#define ASM_X86__UACCESS_64_H
 ++    +#ifndef _ASM_X86_UACCESS_64_H
 ++    +#define _ASM_X86_UACCESS_64_H
        
        /*
         * User space memory access functions
@@@@@@@@@ -7,7 -7,6 -7,6 -7,7 -7,7 -7,7 -7,7 -7,7 +7,7 @@@@@@@@@
        #include <linux/compiler.h>
        #include <linux/errno.h>
        #include <linux/prefetch.h>
 ++     #include <linux/lockdep.h>
        #include <asm/page.h>
        
        /*
@@@@@@@@@ -29,6 -28,6 -28,6 -29,8 -29,6 -29,6 -29,6 -29,6 +29,8 @@@@@@@@@ static __always_inline __must_chec
        int __copy_from_user(void *dst, const void __user *src, unsigned size)
        {
        	int ret = 0;
+++ ++++
+++ ++++	might_fault();
        	if (!__builtin_constant_p(size))
        		return copy_user_generic(dst, (__force void *)src, size);
        	switch (size) {
@@@@@@@@@ -46,7 -45,7 -45,7 -48,7 -46,7 -46,7 -46,7 -46,7 +48,7 @@@@@@@@@
        		return ret;
        	case 10:
        		__get_user_asm(*(u64 *)dst, (u64 __user *)src,
 ----- -			       ret, "q", "", "=r", 16);
 +++++ +			       ret, "q", "", "=r", 10);
        		if (unlikely(ret))
        			return ret;
        		__get_user_asm(*(u16 *)(8 + (char *)dst),
@@@@@@@@@ -71,6 -70,6 -70,6 -73,8 -71,6 -71,6 -71,6 -71,6 +73,8 @@@@@@@@@ static __always_inline __must_chec
        int __copy_to_user(void __user *dst, const void *src, unsigned size)
        {
        	int ret = 0;
+++ ++++
+++ ++++	might_fault();
        	if (!__builtin_constant_p(size))
        		return copy_user_generic((__force void *)dst, src, size);
        	switch (size) {
@@@@@@@@@ -113,6 -112,6 -112,6 -117,8 -113,6 -113,6 -113,6 -113,6 +117,8 @@@@@@@@@ static __always_inline __must_chec
        int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
        {
        	int ret = 0;
+++ ++++
+++ ++++	might_fault();
        	if (!__builtin_constant_p(size))
        		return copy_user_generic((__force void *)dst,
        					 (__force void *)src, size);
@@@@@@@@@ -199,4 -198,4 -198,4 -205,4 -199,4 -199,4 -199,4 -199,4 +205,4 @@@@@@@@@ static inline int __copy_from_user_inat
        unsigned long
        copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
        
 --     #endif /* __X86_64_UACCESS_H */
       -#endif /* ASM_X86__UACCESS_64_H */
 ++    +#endif /* _ASM_X86_UACCESS_64_H */
diff --combined include/linux/kernel.h
index dc7e0d0a647,3f30557be2a,2651f805ba6,69a9bfdf9c8,fba141d3ca0,fba141d3ca0,dc7e0d0a647,94d17ff64c5..269df5a17b3
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@@@@@@@@ -16,7 -16,6 -16,6 -16,7 -16,7 -16,7 -16,7 -16,7 +16,7 @@@@@@@@@
        #include <linux/log2.h>
        #include <linux/typecheck.h>
        #include <linux/ratelimit.h>
 ++     #include <linux/dynamic_printk.h>
        #include <asm/byteorder.h>
        #include <asm/bug.h>
        
@@@@@@@@@ -116,8 -115,6 -115,6 -116,8 -116,8 -116,8 -116,8 -116,6 +116,8 @@@@@@@@@ extern int _cond_resched(void)
        # define might_resched() do { } while (0)
        #endif
        
 ++    +#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 ++    +  void __might_sleep(char *file, int line);
        /**
         * might_sleep - annotation for functions that can sleep
         *
@@@@@@@@@ -128,6 -125,8 -125,8 -128,6 -128,6 -128,6 -128,6 -126,8 +128,6 @@@@@@@@@
         * be bitten later when the calling function happens to sleep when it is not
         * supposed to.
         */
 --    -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 --    -  void __might_sleep(char *file, int line);
        # define might_sleep() \
        	do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
        #else
@@@@@@@@@ -141,6 -140,6 -140,6 -141,15 -141,6 -141,6 -141,6 -141,6 +141,15 @@@@@@@@@
        		(__x < 0) ? -__x : __x;		\
        	})
        
+++ ++++#ifdef CONFIG_PROVE_LOCKING
+++ ++++void might_fault(void);
+++ ++++#else
+++ ++++static inline void might_fault(void)
+++ ++++{
+++ ++++	might_sleep();
+++ ++++}
+++ ++++#endif
+++ ++++
        extern struct atomic_notifier_head panic_notifier_list;
        extern long (*panic_blink)(long time);
        NORET_TYPE void panic(const char * fmt, ...)
@@@@@@@@@ -183,38 -182,16 -182,14 -192,38 -183,38 -183,38 -183,38 -183,14 +192,40 @@@@@@@@@ extern int vsscanf(const char *, const 
        
        extern int get_option(char **str, int *pint);
        extern char *get_options(const char *str, int nints, int *ints);
 --     extern unsigned long long memparse(char *ptr, char **retptr);
 ++     extern unsigned long long memparse(const char *ptr, char **retptr);
        
        extern int core_kernel_text(unsigned long addr);
        extern int __kernel_text_address(unsigned long addr);
        extern int kernel_text_address(unsigned long addr);
+ ++++++extern int func_ptr_is_kernel_text(void *ptr);
+ ++++++
        struct pid;
        extern struct pid *session_of_pgrp(struct pid *pgrp);
        
 ++    +/*
 ++    + * FW_BUG
 ++    + * Add this to a message where you are sure the firmware is buggy or behaves
 ++    + * really stupid or out of spec. Be aware that the responsible BIOS developer
 ++    + * should be able to fix this issue or at least get a concrete idea of the
 ++    + * problem by reading your message without the need of looking at the kernel
 ++    + * code.
 ++    + * 
 ++    + * Use it for definite and high priority BIOS bugs.
 ++    + *
 ++    + * FW_WARN
 ++    + * Use it for not that clear (e.g. could the kernel messed up things already?)
 ++    + * and medium priority BIOS bugs.
 ++    + *
 ++    + * FW_INFO
 ++    + * Use this one if you want to tell the user or vendor about something
 ++    + * suspicious, but generally harmless related to the firmware.
 ++    + *
 ++    + * Use it for information or very low priority BIOS bugs.
 ++    + */
 ++    +#define FW_BUG		"[Firmware Bug]: "
 ++    +#define FW_WARN		"[Firmware Warn]: "
 ++    +#define FW_INFO		"[Firmware Info]: "
 ++    +
        #ifdef CONFIG_PRINTK
        asmlinkage int vprintk(const char *fmt, va_list args)
        	__attribute__ ((format (printf, 1, 0)));
@@@@@@@@@ -238,9 -215,6 -213,6 -247,9 -238,9 -238,9 -238,9 -214,9 +249,9 @@@@@@@@@ static inline bool printk_timed_ratelim
        		{ return false; }
        #endif
        
 ++     extern int printk_needs_cpu(int cpu);
 ++     extern void printk_tick(void);
 ++     
        extern void asmlinkage __attribute__((format(printf, 1, 2)))
        	early_printk(const char *fmt, ...);
        
@@@@@@@@@ -263,10 -237,9 -235,9 -272,10 -263,10 -263,10 -263,10 -239,10 +274,10 @@@@@@@@@ extern int oops_in_progress;		/* If set
        extern int panic_timeout;
        extern int panic_on_oops;
        extern int panic_on_unrecovered_nmi;
 --     extern int tainted;
        extern const char *print_tainted(void);
 --     extern void add_taint(unsigned);
 ++     extern void add_taint(unsigned flag);
 ++     extern int test_taint(unsigned flag);
 ++     extern unsigned long get_taint(void);
        extern int root_mountflags;
        
        /* Values used for system_state */
@@@@@@@@@ -279,17 -252,16 -250,16 -288,17 -279,17 -279,17 -279,17 -255,17 +290,17 @@@@@@@@@ extern enum system_states 
        	SYSTEM_SUSPEND_DISK,
        } system_state;
        
 --     #define TAINT_PROPRIETARY_MODULE	(1<<0)
 --     #define TAINT_FORCED_MODULE		(1<<1)
 --     #define TAINT_UNSAFE_SMP		(1<<2)
 --     #define TAINT_FORCED_RMMOD		(1<<3)
 --     #define TAINT_MACHINE_CHECK		(1<<4)
 --     #define TAINT_BAD_PAGE			(1<<5)
 --     #define TAINT_USER			(1<<6)
 --     #define TAINT_DIE			(1<<7)
 --     #define TAINT_OVERRIDDEN_ACPI_TABLE	(1<<8)
 --     #define TAINT_WARN			(1<<9)
 ++     #define TAINT_PROPRIETARY_MODULE	0
 ++     #define TAINT_FORCED_MODULE		1
 ++     #define TAINT_UNSAFE_SMP		2
 ++     #define TAINT_FORCED_RMMOD		3
 ++     #define TAINT_MACHINE_CHECK		4
 ++     #define TAINT_BAD_PAGE			5
 ++     #define TAINT_USER			6
 ++     #define TAINT_DIE			7
 ++     #define TAINT_OVERRIDDEN_ACPI_TABLE	8
 ++     #define TAINT_WARN			9
 ++     #define TAINT_CRAP			10
        
        extern void dump_stack(void) __cold;
        
@@@@@@@@@ -318,36 -290,28 -288,28 -327,32 -318,32 -318,32 -318,36 -294,32 +329,36 @@@@@@@@@ static inline char *pack_hex_byte(char 
        	return buf;
        }
        
 ----- -#define pr_emerg(fmt, arg...) \
 ----- -	printk(KERN_EMERG fmt, ##arg)
 ----- -#define pr_alert(fmt, arg...) \
 ----- -	printk(KERN_ALERT fmt, ##arg)
 ----- -#define pr_crit(fmt, arg...) \
 ----- -	printk(KERN_CRIT fmt, ##arg)
 ----- -#define pr_err(fmt, arg...) \
 ----- -	printk(KERN_ERR fmt, ##arg)
 ----- -#define pr_warning(fmt, arg...) \
 ----- -	printk(KERN_WARNING fmt, ##arg)
 ----- -#define pr_notice(fmt, arg...) \
 ----- -	printk(KERN_NOTICE fmt, ##arg)
 ----- -#define pr_info(fmt, arg...) \
 ----- -	printk(KERN_INFO fmt, ##arg)
 --     
 --     #ifdef DEBUG
 +++++ +#ifndef pr_fmt
 +++++ +#define pr_fmt(fmt) fmt
 +++++ +#endif
 +++++ +
 +++++ +#define pr_emerg(fmt, ...) \
 +++++ +        printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_alert(fmt, ...) \
 +++++ +        printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_crit(fmt, ...) \
 +++++ +        printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_err(fmt, ...) \
 +++++ +        printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_warning(fmt, ...) \
 +++++ +        printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_notice(fmt, ...) \
 +++++ +        printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
 +++++ +#define pr_info(fmt, ...) \
 +++++ +        printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 ++     
        /* If you are writing a driver, please use dev_dbg instead */
 --     #define pr_debug(fmt, arg...) \
 --     	printk(KERN_DEBUG fmt, ##arg)
 ++     #if defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
 ++     #define pr_debug(fmt, ...) do { \
   --- -	dynamic_pr_debug(fmt, ##__VA_ARGS__); \
 +++++ +	dynamic_pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
 ++     	} while (0)
 ++     #elif defined(DEBUG)
   --- -#define pr_debug(fmt, arg...) \
   --- -	printk(KERN_DEBUG fmt, ##arg)
 +++++ +#define pr_debug(fmt, ...) \
 +++++ +	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
        #else
 ----- -#define pr_debug(fmt, arg...) \
 ----- -	({ if (0) printk(KERN_DEBUG fmt, ##arg); 0; })
 +++++ +#define pr_debug(fmt, ...) \
 +++++ +	({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
        #endif
        
        /*
@@@@@@@@@ -524,9 -488,4 -486,4 -529,9 -520,9 -520,9 -524,9 -496,9 +535,9 @@@@@@@@@ struct sysinfo 
        #define NUMA_BUILD 0
        #endif
        
 ++     /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
 ++     #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 ++     # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
 ++     #endif
 ++     
        #endif
diff --combined kernel/exit.c
index 2d8be7ebb0f,16395644a98,85a83c83185,ae2b92be5fa,80137a5d946,b9c4d8bb72e,2d8be7ebb0f,80137a5d946..30fcdf16737
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@@@@@@@@ -40,13 -40,13 -40,13 -40,14 -40,14 -40,14 -40,13 -40,14 +40,13 @@@@@@@@@
        #include <linux/cn_proc.h>
        #include <linux/mutex.h>
        #include <linux/futex.h>
 ----- -#include <linux/compat.h>
        #include <linux/pipe_fs_i.h>
        #include <linux/audit.h> /* for audit_free() */
        #include <linux/resource.h>
        #include <linux/blkdev.h>
        #include <linux/task_io_accounting_ops.h>
        #include <linux/tracehook.h>
 ++     #include <trace/sched.h>
        
        #include <asm/uaccess.h>
        #include <asm/unistd.h>
@@@@@@@@@ -112,6 -112,8 -112,8 -113,6 -113,6 -113,6 -112,6 -113,6 +112,6 @@@@@@@@@ static void __exit_signal(struct task_s
        		 * We won't ever get here for the group leader, since it
        		 * will have been the last reference on the signal_struct.
        		 */
 --     		sig->utime = cputime_add(sig->utime, task_utime(tsk));
 --     		sig->stime = cputime_add(sig->stime, task_stime(tsk));
        		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
        		sig->min_flt += tsk->min_flt;
        		sig->maj_flt += tsk->maj_flt;
@@@@@@@@@ -120,6 -122,7 -122,7 -121,6 -121,6 -121,6 -120,6 -121,6 +120,6 @@@@@@@@@
        		sig->inblock += task_io_get_inblock(tsk);
        		sig->oublock += task_io_get_oublock(tsk);
        		task_io_accounting_add(&sig->ioac, &tsk->ioac);
 --     		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
        		sig = NULL; /* Marker for below. */
        	}
        
@@@@@@@@@ -140,21 -143,13 -143,13 -141,21 -141,16 -141,21 -140,21 -141,16 +140,21 @@@@@@@@@
        	if (sig) {
        		flush_sigqueue(&sig->shared_pending);
        		taskstats_tgid_free(sig);
 ++ +  +		/*
 ++ +  +		 * Make sure ->signal can't go away under rq->lock,
 ++ +  +		 * see account_group_exec_runtime().
 ++ +  +		 */
 ++ +  +		task_rq_unlock_wait(tsk);
        		__cleanup_signal(sig);
        	}
        }
        
        static void delayed_put_task_struct(struct rcu_head *rhp)
        {
 --     	put_task_struct(container_of(rhp, struct task_struct, rcu));
 ++     	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 ++     
 ++     	trace_sched_process_free(tsk);
 ++     	put_task_struct(tsk);
        }
        
        
@@@@@@@@@ -588,6 -583,8 -583,6 -589,6 -584,6 -589,6 -588,6 -584,6 +588,6 @@@@@@@@@ mm_need_new_owner(struct mm_struct *mm
        	 * If there are other users of the mm and the owner (us) is exiting
        	 * we need to find a new owner to take on the responsibility.
        	 */
 -      	if (!mm)
 -      		return 0;
        	if (atomic_read(&mm->mm_users) <= 1)
        		return 0;
        	if (mm->owner != p)
@@@@@@@@@ -630,38 -627,29 -625,39 -631,38 -626,38 -631,38 -630,38 -626,38 +630,38 @@@@@@@@@ retry
        	} while_each_thread(g, c);
        
        	read_unlock(&tasklist_lock);
 +      	/*
 +      	 * We found no owner yet mm_users > 1: this implies that we are
 +      	 * most likely racing with swapoff (try_to_unuse()) or /proc or
 +      	 * ptrace or page migration (get_task_mm()).  Mark owner as NULL,
 +      	 * so that subsystems can understand the callback and take action.
 +      	 */
 +      	down_write(&mm->mmap_sem);
 +      	cgroup_mm_owner_callbacks(mm->owner, NULL);
 +      	mm->owner = NULL;
 +      	up_write(&mm->mmap_sem);
        	return;
        
        assign_new_owner:
        	BUG_ON(c == p);
        	get_task_struct(c);
 ++     	read_unlock(&tasklist_lock);
 ++     	down_write(&mm->mmap_sem);
        	/*
        	 * The task_lock protects c->mm from changing.
        	 * We always want mm->owner->mm == mm
        	 */
        	task_lock(c);
 --     	/*
 --     	 * Delay read_unlock() till we have the task_lock()
 --     	 * to ensure that c does not slip away underneath us
 --     	 */
 --     	read_unlock(&tasklist_lock);
        	if (c->mm != mm) {
        		task_unlock(c);
 ++     		up_write(&mm->mmap_sem);
        		put_task_struct(c);
        		goto retry;
        	}
        	cgroup_mm_owner_callbacks(mm->owner, c);
        	mm->owner = c;
        	task_unlock(c);
 ++     	up_write(&mm->mmap_sem);
        	put_task_struct(c);
        }
        #endif /* CONFIG_MM_OWNER */
@@@@@@@@@ -1058,6 -1046,14 -1054,14 -1059,14 -1054,14 -1059,14 -1058,6 -1054,14 +1058,6 @@@@@@@@@ NORET_TYPE void do_exit(long code
        		exit_itimers(tsk->signal);
        	}
        	acct_collect(code, group_dead);
 ----- -#ifdef CONFIG_FUTEX
 ----- -	if (unlikely(tsk->robust_list))
 ----- -		exit_robust_list(tsk);
 ----- -#ifdef CONFIG_COMPAT
 ----- -	if (unlikely(tsk->compat_robust_list))
 ----- -		compat_exit_robust_list(tsk);
 ----- -#endif
 ----- -#endif
        	if (group_dead)
        		tty_audit_exit();
        	if (unlikely(tsk->audit_context))
@@@@@@@@@ -1070,8 -1066,6 -1074,6 -1079,8 -1074,8 -1079,8 -1070,8 -1074,8 +1070,8 @@@@@@@@@
        
        	if (group_dead)
        		acct_process();
 ++     	trace_sched_process_exit(tsk);
 ++     
        	exit_sem(tsk);
        	exit_files(tsk);
        	exit_fs(tsk);
@@@@@@@@@ -1300,7 -1294,6 -1302,6 -1309,7 -1304,7 -1309,7 -1300,7 -1304,7 +1300,7 @@@@@@@@@ static int wait_task_zombie(struct task
        	if (likely(!traced)) {
        		struct signal_struct *psig;
        		struct signal_struct *sig;
 ++     		struct task_cputime cputime;
        
        		/*
        		 * The resource counters for the group leader are in its
@@@@@@@@@ -1316,23 -1309,20 -1317,20 -1325,23 -1320,23 -1325,23 -1316,23 -1320,23 +1316,23 @@@@@@@@@
        		 * need to protect the access to p->parent->signal fields,
        		 * as other threads in the parent group can be right
        		 * here reaping other children at the same time.
 ++     		 *
 ++     		 * We use thread_group_cputime() to get times for the thread
 ++     		 * group, which consolidates times for all threads in the
 ++     		 * group including the group leader.
        		 */
+++++ ++		thread_group_cputime(p, &cputime);
        		spin_lock_irq(&p->parent->sighand->siglock);
        		psig = p->parent->signal;
        		sig = p->signal;
-  -- --		thread_group_cputime(p, &cputime);
        		psig->cutime =
        			cputime_add(psig->cutime,
 --     			cputime_add(p->utime,
 --     			cputime_add(sig->utime,
 --     				    sig->cutime)));
 ++     			cputime_add(cputime.utime,
 ++     				    sig->cutime));
        		psig->cstime =
        			cputime_add(psig->cstime,
 --     			cputime_add(p->stime,
 --     			cputime_add(sig->stime,
 --     				    sig->cstime)));
 ++     			cputime_add(cputime.stime,
 ++     				    sig->cstime));
        		psig->cgtime =
        			cputime_add(psig->cgtime,
        			cputime_add(p->gtime,
@@@@@@@@@ -1677,8 -1667,6 -1675,6 -1686,8 -1681,8 -1686,8 -1677,8 -1681,8 +1677,8 @@@@@@@@@ static long do_wait(enum pid_type type
        	struct task_struct *tsk;
        	int retval;
        
 ++     	trace_sched_process_wait(pid);
 ++     
        	add_wait_queue(&current->signal->wait_chldexit,&wait);
        repeat:
        	/*
diff --combined kernel/futex.c
index 8af10027514,7d1136e97c1,62cbd648e28,8af10027514,8af10027514,8af10027514,8af10027514,7d1136e97c1..e10c5c8786a
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@@@@@@@@ -122,24 -122,24 -122,6 -122,24 -122,24 -122,24 -122,24 -122,24 +122,6 @@@@@@@@@ struct futex_hash_bucket 
        
        static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
        
-- -----/*
-- ----- * Take mm->mmap_sem, when futex is shared
-- ----- */
-- -----static inline void futex_lock_mm(struct rw_semaphore *fshared)
-- -----{
-- -----	if (fshared)
-- -----		down_read(fshared);
-- -----}
-- -----
-- -----/*
-- ----- * Release mm->mmap_sem, when the futex is shared
-- ----- */
-- -----static inline void futex_unlock_mm(struct rw_semaphore *fshared)
-- -----{
-- -----	if (fshared)
-- -----		up_read(fshared);
-- -----}
-- -----
        /*
         * We hash on the keys returned from get_futex_key (see below).
         */
@@@@@@@@@ -161,6 -161,6 -143,45 -161,6 -161,6 -161,6 -161,6 -161,6 +143,45 @@@@@@@@@ static inline int match_futex(union fut
        		&& key1->both.offset == key2->both.offset);
        }
        
++ +++++/*
++ +++++ * Take a reference to the resource addressed by a key.
++ +++++ * Can be called while holding spinlocks.
++ +++++ *
++ +++++ */
++ +++++static void get_futex_key_refs(union futex_key *key)
++ +++++{
++ +++++	if (!key->both.ptr)
++ +++++		return;
++ +++++
++ +++++	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
++ +++++	case FUT_OFF_INODE:
++ +++++		atomic_inc(&key->shared.inode->i_count);
++ +++++		break;
++ +++++	case FUT_OFF_MMSHARED:
++ +++++		atomic_inc(&key->private.mm->mm_count);
++ +++++		break;
++ +++++	}
++ +++++}
++ +++++
++ +++++/*
++ +++++ * Drop a reference to the resource addressed by a key.
++ +++++ * The hash bucket spinlock must not be held.
++ +++++ */
++ +++++static void drop_futex_key_refs(union futex_key *key)
++ +++++{
++ +++++	if (!key->both.ptr)
++ +++++		return;
++ +++++
++ +++++	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
++ +++++	case FUT_OFF_INODE:
++ +++++		iput(key->shared.inode);
++ +++++		break;
++ +++++	case FUT_OFF_MMSHARED:
++ +++++		mmdrop(key->private.mm);
++ +++++		break;
++ +++++	}
++ +++++}
++ +++++
        /**
         * get_futex_key - Get parameters which are the keys for a futex.
         * @uaddr: virtual address of the futex
@@@@@@@@@ -179,12 -179,12 -200,10 -179,12 -179,12 -179,12 -179,12 -179,12 +200,10 @@@@@@@@@
         * For other futexes, it points to &current->mm->mmap_sem and
         * caller must have taken the reader lock. but NOT any spinlocks.
         */
-- -----static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
-- -----			 union futex_key *key)
++ +++++static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
        {
        	unsigned long address = (unsigned long)uaddr;
        	struct mm_struct *mm = current->mm;
-- -----	struct vm_area_struct *vma;
        	struct page *page;
        	int err;
        
@@@@@@@@@ -208,100 -208,100 -227,50 -208,100 -208,100 -208,100 -208,100 -208,100 +227,50 @@@@@@@@@
        			return -EFAULT;
        		key->private.mm = mm;
        		key->private.address = address;
++ +++++		get_futex_key_refs(key);
        		return 0;
        	}
-- -----	/*
-- -----	 * The futex is hashed differently depending on whether
-- -----	 * it's in a shared or private mapping.  So check vma first.
-- -----	 */
-- -----	vma = find_extend_vma(mm, address);
-- -----	if (unlikely(!vma))
-- -----		return -EFAULT;
        
-- -----	/*
-- -----	 * Permissions.
-- -----	 */
-- -----	if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
-- -----		return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
++ +++++again:
++ +++++	err = get_user_pages_fast(address, 1, 0, &page);
++ +++++	if (err < 0)
++ +++++		return err;
++ +++++
++ +++++	lock_page(page);
++ +++++	if (!page->mapping) {
++ +++++		unlock_page(page);
++ +++++		put_page(page);
++ +++++		goto again;
++ +++++	}
        
        	/*
        	 * Private mappings are handled in a simple way.
        	 *
        	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
        	 * it's a read-only handle, it's expected that futexes attach to
-- -----	 * the object not the particular process.  Therefore we use
-- -----	 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
-- -----	 * mappings of _writable_ handles.
++ +++++	 * the object not the particular process.
        	 */
-- -----	if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
-- -----		key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
++ +++++	if (PageAnon(page)) {
++ +++++		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
        		key->private.mm = mm;
        		key->private.address = address;
 -     -		return 0;
 -     -	}
 -     -
 -     -	/*
 -     -	 * Linear file mappings are also simple.
 -     -	 */
 -     -	key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
 -     -	key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
 -     -	if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
 -     -		key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
 -     -				     + vma->vm_pgoff);
-- -----		return 0;
++ +++++	} else {
++ +++++		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
++ +++++		key->shared.inode = page->mapping->host;
++ +++++		key->shared.pgoff = page->index;
        	}
        
-- -----	/*
-  ---- 	 * Linear file mappings are also simple.
 -     -	 * We could walk the page table to read the non-linear
 -     -	 * pte, and get the page index without fetching the page
 -     -	 * from swap.  But that's a lot of code to duplicate here
 -     -	 * for a rare case, so we simply fetch the page.
-- -----	 */
-  ---- 	key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
-  ---- 	key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
-  ---- 	if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
-  ---- 		key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
-  ---- 				     + vma->vm_pgoff);
 -     -	err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
 -     -	if (err >= 0) {
 -     -		key->shared.pgoff =
 -     -			page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 -     -		put_page(page);
-- -----		return 0;
-- -----	}
 -     -	return err;
 -     -}
++ +++++	get_futex_key_refs(key);
        
-  ---- 	/*
-  ---- 	 * We could walk the page table to read the non-linear
-  ---- 	 * pte, and get the page index without fetching the page
-  ---- 	 * from swap.  But that's a lot of code to duplicate here
-  ---- 	 * for a rare case, so we simply fetch the page.
-  ---- 	 */
-  ---- 	err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
-  ---- 	if (err >= 0) {
-  ---- 		key->shared.pgoff =
-  ---- 			page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-  ---- 		put_page(page);
-  ---- 		return 0;
-  ---- 	}
-  ---- 	return err;
-  ---- }
-  ---- 
-- -----/*
-- ----- * Take a reference to the resource addressed by a key.
-- ----- * Can be called while holding spinlocks.
-- ----- *
-- ----- */
-- -----static void get_futex_key_refs(union futex_key *key)
-- -----{
-- -----	if (key->both.ptr == NULL)
-- -----		return;
-- -----	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-- -----		case FUT_OFF_INODE:
-- -----			atomic_inc(&key->shared.inode->i_count);
-- -----			break;
-- -----		case FUT_OFF_MMSHARED:
-- -----			atomic_inc(&key->private.mm->mm_count);
-- -----			break;
-- -----	}
++ +++++	unlock_page(page);
++ +++++	put_page(page);
++ +++++	return 0;
        }
        
-- -----/*
-- ----- * Drop a reference to the resource addressed by a key.
-- ----- * The hash bucket spinlock must not be held.
-- ----- */
-- -----static void drop_futex_key_refs(union futex_key *key)
++ +++++static inline
++ +++++void put_futex_key(int fshared, union futex_key *key)
        {
-- -----	if (!key->both.ptr)
-- -----		return;
-- -----	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-- -----		case FUT_OFF_INODE:
-- -----			iput(key->shared.inode);
-- -----			break;
-- -----		case FUT_OFF_MMSHARED:
-- -----			mmdrop(key->private.mm);
-- -----			break;
-- -----	}
++ +++++	drop_futex_key_refs(key);
        }
        
        static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@@@@@@@@ -328,10 -328,10 -297,8 -328,10 -328,10 -328,10 -328,10 -328,10 +297,8 @@@@@@@@@ static int get_futex_value_locked(u32 *
        
        /*
         * Fault handling.
-- ----- * if fshared is non NULL, current->mm->mmap_sem is already held
         */
-- -----static int futex_handle_fault(unsigned long address,
-- -----			      struct rw_semaphore *fshared, int attempt)
++ +++++static int futex_handle_fault(unsigned long address, int attempt)
        {
        	struct vm_area_struct * vma;
        	struct mm_struct *mm = current->mm;
@@@@@@@@@ -340,8 -340,8 -307,7 -340,8 -340,8 -340,8 -340,8 -340,8 +307,7 @@@@@@@@@
        	if (attempt > 2)
        		return ret;
        
-- -----	if (!fshared)
-- -----		down_read(&mm->mmap_sem);
++ +++++	down_read(&mm->mmap_sem);
        	vma = find_vma(mm, address);
        	if (vma && address >= vma->vm_start &&
        	    (vma->vm_flags & VM_WRITE)) {
@@@@@@@@@ -361,8 -361,8 -327,7 -361,8 -361,8 -361,8 -361,8 -361,8 +327,7 @@@@@@@@@
        				current->min_flt++;
        		}
        	}
-- -----	if (!fshared)
-- -----		up_read(&mm->mmap_sem);
++ +++++	up_read(&mm->mmap_sem);
        	return ret;
        }
        
@@@@@@@@@ -385,6 -385,6 -350,7 -385,6 -385,6 -385,6 -385,6 -385,6 +350,7 @@@@@@@@@ static int refill_pi_state_cache(void
        	/* pi_mutex gets initialized later */
        	pi_state->owner = NULL;
        	atomic_set(&pi_state->refcount, 1);
++ +++++	pi_state->key = FUTEX_KEY_INIT;
        
        	current->pi_state_cache = pi_state;
        
@@@@@@@@@ -462,7 -462,7 -428,7 -462,7 -462,7 -462,7 -462,7 -462,7 +428,7 @@@@@@@@@ void exit_pi_state_list(struct task_str
        	struct list_head *next, *head = &curr->pi_state_list;
        	struct futex_pi_state *pi_state;
        	struct futex_hash_bucket *hb;
-- -----	union futex_key key;
++ +++++	union futex_key key = FUTEX_KEY_INIT;
        
        	if (!futex_cmpxchg_enabled)
        		return;
@@@@@@@@@ -719,20 -719,20 -685,17 -719,20 -719,20 -719,20 -719,20 -719,20 +685,17 @@@@@@@@@ double_lock_hb(struct futex_hash_bucke
         * Wake up all waiters hashed on the physical page that is mapped
         * to this virtual address:
         */
-- -----static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
-- -----		      int nr_wake, u32 bitset)
++ +++++static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
        {
        	struct futex_hash_bucket *hb;
        	struct futex_q *this, *next;
        	struct plist_head *head;
-- -----	union futex_key key;
++ +++++	union futex_key key = FUTEX_KEY_INIT;
        	int ret;
        
        	if (!bitset)
        		return -EINVAL;
        
-- -----	futex_lock_mm(fshared);
-- -----
        	ret = get_futex_key(uaddr, fshared, &key);
        	if (unlikely(ret != 0))
        		goto out;
@@@@@@@@@ -760,7 -760,7 -723,7 -760,7 -760,7 -760,7 -760,7 -760,7 +723,7 @@@@@@@@@
        
        	spin_unlock(&hb->lock);
        out:
-- -----	futex_unlock_mm(fshared);
++ +++++	put_futex_key(fshared, &key);
        	return ret;
        }
        
@@@@@@@@@ -769,19 -769,19 -732,16 -769,19 -769,19 -769,19 -769,19 -769,19 +732,16 @@@@@@@@@
         * to this virtual address:
         */
        static int
-- -----futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
-- -----	      u32 __user *uaddr2,
++ +++++futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
        	      int nr_wake, int nr_wake2, int op)
        {
-- -----	union futex_key key1, key2;
++ +++++	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        	struct futex_hash_bucket *hb1, *hb2;
        	struct plist_head *head;
        	struct futex_q *this, *next;
        	int ret, op_ret, attempt = 0;
        
        retryfull:
-- -----	futex_lock_mm(fshared);
-- -----
        	ret = get_futex_key(uaddr1, fshared, &key1);
        	if (unlikely(ret != 0))
        		goto out;
@@@@@@@@@ -826,18 -826,18 -786,12 -826,18 -826,18 -826,18 -826,18 -826,18 +786,12 @@@@@@@@@ retry
        		 */
        		if (attempt++) {
        			ret = futex_handle_fault((unsigned long)uaddr2,
-- -----						 fshared, attempt);
++ +++++						 attempt);
        			if (ret)
        				goto out;
        			goto retry;
        		}
        
-- -----		/*
-- -----		 * If we would have faulted, release mmap_sem,
-- -----		 * fault it in and start all over again.
-- -----		 */
-- -----		futex_unlock_mm(fshared);
-- -----
        		ret = get_user(dummy, uaddr2);
        		if (ret)
        			return ret;
@@@@@@@@@ -873,7 -873,7 -827,8 -873,7 -873,7 -873,7 -873,7 -873,7 +827,8 @@@@@@@@@
        	if (hb1 != hb2)
        		spin_unlock(&hb2->lock);
        out:
-- -----	futex_unlock_mm(fshared);
++ +++++	put_futex_key(fshared, &key2);
++ +++++	put_futex_key(fshared, &key1);
        
        	return ret;
        }
@@@@@@@@@ -882,19 -882,19 -837,16 -882,19 -882,19 -882,19 -882,19 -882,19 +837,16 @@@@@@@@@
         * Requeue all waiters hashed on one physical page to another
         * physical page.
         */
-- -----static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
-- -----			 u32 __user *uaddr2,
++ +++++static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
        			 int nr_wake, int nr_requeue, u32 *cmpval)
        {
-- -----	union futex_key key1, key2;
++ +++++	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        	struct futex_hash_bucket *hb1, *hb2;
        	struct plist_head *head1;
        	struct futex_q *this, *next;
        	int ret, drop_count = 0;
        
         retry:
-- -----	futex_lock_mm(fshared);
-- -----
        	ret = get_futex_key(uaddr1, fshared, &key1);
        	if (unlikely(ret != 0))
        		goto out;
@@@@@@@@@ -917,12 -917,12 -869,6 -917,12 -917,12 -917,12 -917,12 -917,12 +869,6 @@@@@@@@@
        			if (hb1 != hb2)
        				spin_unlock(&hb2->lock);
        
-- -----			/*
-- -----			 * If we would have faulted, release mmap_sem, fault
-- -----			 * it in and start all over again.
-- -----			 */
-- -----			futex_unlock_mm(fshared);
-- -----
        			ret = get_user(curval, uaddr1);
        
        			if (!ret)
@@@@@@@@@ -974,7 -974,7 -920,8 -974,7 -974,7 -974,7 -974,7 -974,7 +920,8 @@@@@@@@@ out_unlock
        		drop_futex_key_refs(&key1);
        
        out:
-- -----	futex_unlock_mm(fshared);
++ +++++	put_futex_key(fshared, &key2);
++ +++++	put_futex_key(fshared, &key1);
        	return ret;
        }
        
@@@@@@@@@ -1096,8 -1096,8 -1043,7 -1096,8 -1096,8 -1096,8 -1096,8 -1096,8 +1043,7 @@@@@@@@@ static void unqueue_me_pi(struct futex_
         * private futexes.
         */
        static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-- -----				struct task_struct *newowner,
-- -----				struct rw_semaphore *fshared)
++ +++++				struct task_struct *newowner, int fshared)
        {
        	u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
        	struct futex_pi_state *pi_state = q->pi_state;
@@@@@@@@@ -1176,7 -1176,7 -1122,7 -1176,7 -1176,7 -1176,7 -1176,7 -1176,7 +1122,7 @@@@@@@@@ retry
        handle_fault:
        	spin_unlock(q->lock_ptr);
        
-- -----	ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
++ +++++	ret = futex_handle_fault((unsigned long)uaddr, attempt++);
        
        	spin_lock(q->lock_ptr);
        
@@@@@@@@@ -1200,7 -1200,7 -1146,7 -1200,7 -1200,7 -1200,7 -1200,7 -1200,7 +1146,7 @@@@@@@@@
        
        static long futex_wait_restart(struct restart_block *restart);
        
-- -----static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
++ +++++static int futex_wait(u32 __user *uaddr, int fshared,
        		      u32 val, ktime_t *abs_time, u32 bitset)
        {
        	struct task_struct *curr = current;
@@@@@@@@@ -1218,8 -1218,8 -1164,7 -1218,8 -1218,8 -1218,8 -1218,8 -1218,8 +1164,7 @@@@@@@@@
        	q.pi_state = NULL;
        	q.bitset = bitset;
         retry:
-- -----	futex_lock_mm(fshared);
-- -----
++ +++++	q.key = FUTEX_KEY_INIT;
        	ret = get_futex_key(uaddr, fshared, &q.key);
        	if (unlikely(ret != 0))
        		goto out_release_sem;
@@@@@@@@@ -1251,12 -1251,12 -1196,6 -1251,12 -1251,12 -1251,12 -1251,12 -1251,12 +1196,6 @@@@@@@@@
        	if (unlikely(ret)) {
        		queue_unlock(&q, hb);
        
-- -----		/*
-- -----		 * If we would have faulted, release mmap_sem, fault it in and
-- -----		 * start all over again.
-- -----		 */
-- -----		futex_unlock_mm(fshared);
-- -----
        		ret = get_user(uval, uaddr);
        
        		if (!ret)
@@@@@@@@@ -1270,12 -1270,12 -1209,6 -1270,12 -1270,12 -1270,12 -1270,12 -1270,12 +1209,6 @@@@@@@@@
        	/* Only actually queue if *uaddr contained val.  */
        	queue_me(&q, hb);
        
-- -----	/*
-- -----	 * Now the futex is queued and we have checked the data, we
-- -----	 * don't want to hold mmap_sem while we sleep.
-- -----	 */
-- -----	futex_unlock_mm(fshared);
-- -----
        	/*
        	 * There might have been scheduling since the queue_me(), as we
        	 * cannot hold a spinlock across the get_user() in case it
@@@@@@@@@ -1296,16 -1296,13 -1229,13 -1296,16 -1296,16 -1296,16 -1296,16 -1296,13 +1229,16 @@@@@@@@@
        		if (!abs_time)
        			schedule();
        		else {
 ++    +			unsigned long slack;
 ++    +			slack = current->timer_slack_ns;
 ++    +			if (rt_task(current))
 ++    +				slack = 0;
        			hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC,
        						HRTIMER_MODE_ABS);
        			hrtimer_init_sleeper(&t, current);
 --    -			t.timer.expires = *abs_time;
 ++    +			hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
        
 --    -			hrtimer_start(&t.timer, t.timer.expires,
 --    -						HRTIMER_MODE_ABS);
 ++    +			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
        			if (!hrtimer_active(&t.timer))
        				t.task = NULL;
        
@@@@@@@@@ -1363,7 -1360,7 -1293,7 -1363,7 -1363,7 -1363,7 -1363,7 -1360,7 +1296,7 @@@@@@@@@
        	queue_unlock(&q, hb);
        
         out_release_sem:
-- -----	futex_unlock_mm(fshared);
++ +++++	put_futex_key(fshared, &q.key);
        	return ret;
        }
        
@@@@@@@@@ -1371,13 -1368,13 -1301,13 -1371,13 -1371,13 -1371,13 -1371,13 -1368,13 +1304,13 @@@@@@@@@
        static long futex_wait_restart(struct restart_block *restart)
        {
        	u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
-- -----	struct rw_semaphore *fshared = NULL;
++ +++++	int fshared = 0;
        	ktime_t t;
        
        	t.tv64 = restart->futex.time;
        	restart->fn = do_no_restart_syscall;
        	if (restart->futex.flags & FLAGS_SHARED)
-- -----		fshared = &current->mm->mmap_sem;
++ +++++		fshared = 1;
        	return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
        				restart->futex.bitset);
        }
@@@@@@@@@ -1389,7 -1386,7 -1319,7 -1389,7 -1389,7 -1389,7 -1389,7 -1386,7 +1322,7 @@@@@@@@@
         * if there are waiters then it will block, it does PI, etc. (Due to
         * races the kernel might see a 0 value of the futex too.)
         */
-- -----static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
++ +++++static int futex_lock_pi(u32 __user *uaddr, int fshared,
        			 int detect, ktime_t *time, int trylock)
        {
        	struct hrtimer_sleeper timeout, *to = NULL;
@@@@@@@@@ -1407,13 -1404,13 -1337,12 -1407,13 -1407,13 -1407,13 -1407,13 -1404,13 +1340,12 @@@@@@@@@
        		hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
        				      HRTIMER_MODE_ABS);
        		hrtimer_init_sleeper(to, current);
 --    -		to->timer.expires = *time;
 ++    +		hrtimer_set_expires(&to->timer, *time);
        	}
        
        	q.pi_state = NULL;
         retry:
-- -----	futex_lock_mm(fshared);
-- -----
++ +++++	q.key = FUTEX_KEY_INIT;
        	ret = get_futex_key(uaddr, fshared, &q.key);
        	if (unlikely(ret != 0))
        		goto out_release_sem;
@@@@@@@@@ -1502,7 -1499,7 -1431,6 -1502,7 -1502,7 -1502,7 -1502,7 -1499,7 +1434,6 @@@@@@@@@
        			 * exit to complete.
        			 */
        			queue_unlock(&q, hb);
-- -----			futex_unlock_mm(fshared);
        			cond_resched();
        			goto retry;
        
@@@@@@@@@ -1534,12 -1531,12 -1462,6 -1534,12 -1534,12 -1534,12 -1534,12 -1531,12 +1465,6 @@@@@@@@@
        	 */
        	queue_me(&q, hb);
        
-- -----	/*
-- -----	 * Now the futex is queued and we have checked the data, we
-- -----	 * don't want to hold mmap_sem while we sleep.
-- -----	 */
-- -----	futex_unlock_mm(fshared);
-- -----
        	WARN_ON(!q.pi_state);
        	/*
        	 * Block on the PI mutex:
@@@@@@@@@ -1552,7 -1549,7 -1474,6 -1552,7 -1552,7 -1552,7 -1552,7 -1549,7 +1477,6 @@@@@@@@@
        		ret = ret ? 0 : -EWOULDBLOCK;
        	}
        
-- -----	futex_lock_mm(fshared);
        	spin_lock(q.lock_ptr);
        
        	if (!ret) {
@@@@@@@@@ -1618,7 -1615,7 -1539,6 -1618,7 -1618,7 -1618,7 -1618,7 -1615,7 +1542,6 @@@@@@@@@
        
        	/* Unqueue and drop the lock */
        	unqueue_me_pi(&q);
-- -----	futex_unlock_mm(fshared);
        
        	if (to)
        		destroy_hrtimer_on_stack(&to->timer);
@@@@@@@@@ -1628,7 -1625,7 -1548,7 -1628,7 -1628,7 -1628,7 -1628,7 -1625,7 +1551,7 @@@@@@@@@
        	queue_unlock(&q, hb);
        
         out_release_sem:
-- -----	futex_unlock_mm(fshared);
++ +++++	put_futex_key(fshared, &q.key);
        	if (to)
        		destroy_hrtimer_on_stack(&to->timer);
        	return ret;
@@@@@@@@@ -1645,15 -1642,15 -1565,12 -1645,15 -1645,15 -1645,15 -1645,15 -1642,15 +1568,12 @@@@@@@@@
        	queue_unlock(&q, hb);
        
        	if (attempt++) {
-- -----		ret = futex_handle_fault((unsigned long)uaddr, fshared,
-- -----					 attempt);
++ +++++		ret = futex_handle_fault((unsigned long)uaddr, attempt);
        		if (ret)
        			goto out_release_sem;
        		goto retry_unlocked;
        	}
        
-- -----	futex_unlock_mm(fshared);
-- -----
        	ret = get_user(uval, uaddr);
        	if (!ret && (uval != -EFAULT))
        		goto retry;
@@@@@@@@@ -1668,13 -1665,13 -1585,13 -1668,13 -1668,13 -1668,13 -1668,13 -1665,13 +1588,13 @@@@@@@@@
         * This is the in-kernel slowpath: we look up the PI state (if any),
         * and do the rt-mutex unlock.
         */
-- -----static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
++ +++++static int futex_unlock_pi(u32 __user *uaddr, int fshared)
        {
        	struct futex_hash_bucket *hb;
        	struct futex_q *this, *next;
        	u32 uval;
        	struct plist_head *head;
-- -----	union futex_key key;
++ +++++	union futex_key key = FUTEX_KEY_INIT;
        	int ret, attempt = 0;
        
        retry:
@@@@@@@@@ -1685,10 -1682,10 -1602,6 -1685,10 -1685,10 -1685,10 -1685,10 -1682,10 +1605,6 @@@@@@@@@
        	 */
        	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
        		return -EPERM;
-- -----	/*
-- -----	 * First take all the futex related locks:
-- -----	 */
-- -----	futex_lock_mm(fshared);
        
        	ret = get_futex_key(uaddr, fshared, &key);
        	if (unlikely(ret != 0))
@@@@@@@@@ -1747,7 -1744,7 -1660,7 -1747,7 -1747,7 -1747,7 -1747,7 -1744,7 +1663,7 @@@@@@@@@ retry_unlocked
        out_unlock:
        	spin_unlock(&hb->lock);
        out:
-- -----	futex_unlock_mm(fshared);
++ +++++	put_futex_key(fshared, &key);
        
        	return ret;
        
@@@@@@@@@ -1763,16 -1760,16 -1676,13 -1763,16 -1763,16 -1763,16 -1763,16 -1760,16 +1679,13 @@@@@@@@@ pi_faulted
        	spin_unlock(&hb->lock);
        
        	if (attempt++) {
-- -----		ret = futex_handle_fault((unsigned long)uaddr, fshared,
-- -----					 attempt);
++ +++++		ret = futex_handle_fault((unsigned long)uaddr, attempt);
        		if (ret)
        			goto out;
        		uval = 0;
        		goto retry_unlocked;
        	}
        
-- -----	futex_unlock_mm(fshared);
-- -----
        	ret = get_user(uval, uaddr);
        	if (!ret && (uval != -EFAULT))
        		goto retry;
@@@@@@@@@ -1898,8 -1895,8 -1808,7 -1898,8 -1898,8 -1898,8 -1898,8 -1895,8 +1811,7 @@@@@@@@@ retry
        		 * PI futexes happens in exit_pi_state():
        		 */
        		if (!pi && (uval & FUTEX_WAITERS))
-- -----			futex_wake(uaddr, &curr->mm->mmap_sem, 1,
-- -----				   FUTEX_BITSET_MATCH_ANY);
++ +++++			futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
        	}
        	return 0;
        }
@@@@@@@@@ -1995,10 -1992,10 -1904,10 -1995,10 -1995,10 -1995,10 -1995,10 -1992,10 +1907,10 @@@@@@@@@ long do_futex(u32 __user *uaddr, int op
        {
        	int ret = -ENOSYS;
        	int cmd = op & FUTEX_CMD_MASK;
-- -----	struct rw_semaphore *fshared = NULL;
++ +++++	int fshared = 0;
        
        	if (!(op & FUTEX_PRIVATE_FLAG))
-- -----		fshared = &current->mm->mmap_sem;
++ +++++		fshared = 1;
        
        	switch (cmd) {
        	case FUTEX_WAIT:
diff --combined kernel/lockdep.c
index 06e157119d2,dbda475b13b,dbda475b13b,a4285830323,06e157119d2,06e157119d2,46a404173db,dbda475b13b..e4bdda8dcf0
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@@@@@@@@ -136,16 -136,16 -136,16 -136,16 -136,16 -136,16 -136,16 -136,16 +136,16 @@@@@@@@@ static inline struct lock_class *hlock_
        #ifdef CONFIG_LOCK_STAT
        static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
        
--- ----static int lock_contention_point(struct lock_class *class, unsigned long ip)
+++ ++++static int lock_point(unsigned long points[], unsigned long ip)
        {
        	int i;
        
--- ----	for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
--- ----		if (class->contention_point[i] == 0) {
--- ----			class->contention_point[i] = ip;
+++ ++++	for (i = 0; i < LOCKSTAT_POINTS; i++) {
+++ ++++		if (points[i] == 0) {
+++ ++++			points[i] = ip;
        			break;
        		}
--- ----		if (class->contention_point[i] == ip)
+++ ++++		if (points[i] == ip)
        			break;
        	}
        
@@@@@@@@@ -185,6 -185,6 -185,6 -185,9 -185,6 -185,6 -185,6 -185,6 +185,9 @@@@@@@@@ struct lock_class_stats lock_stats(stru
        		for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
        			stats.contention_point[i] += pcs->contention_point[i];
        
+++ ++++		for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
+++ ++++			stats.contending_point[i] += pcs->contending_point[i];
+++ ++++
        		lock_time_add(&pcs->read_waittime, &stats.read_waittime);
        		lock_time_add(&pcs->write_waittime, &stats.write_waittime);
        
@@@@@@@@@ -209,6 -209,6 -209,6 -212,7 -209,6 -209,6 -209,6 -209,6 +212,7 @@@@@@@@@ void clear_lock_stats(struct lock_clas
        		memset(cpu_stats, 0, sizeof(struct lock_class_stats));
        	}
        	memset(class->contention_point, 0, sizeof(class->contention_point));
+++ ++++	memset(class->contending_point, 0, sizeof(class->contending_point));
        }
        
        static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@@@@@@@@ -2169,11 -2169,12 -2169,12 -2173,11 -2169,11 -2169,11 -2169,11 -2169,12 +2173,11 @@@@@@@@@ void early_boot_irqs_on(void
        /*
         * Hardirqs will be enabled:
         */
 --    -void trace_hardirqs_on_caller(unsigned long a0)
 ++    +void trace_hardirqs_on_caller(unsigned long ip)
        {
        	struct task_struct *curr = current;
 --    -	unsigned long ip;
        
 --    -	time_hardirqs_on(CALLER_ADDR0, a0);
 ++    +	time_hardirqs_on(CALLER_ADDR0, ip);
        
        	if (unlikely(!debug_locks || current->lockdep_recursion))
        		return;
@@@@@@@@@ -2187,6 -2188,7 -2188,7 -2191,6 -2187,6 -2187,6 -2187,6 -2188,7 +2191,6 @@@@@@@@@
        	}
        	/* we'll do an OFF -> ON transition: */
        	curr->hardirqs_enabled = 1;
 --    -	ip = (unsigned long) __builtin_return_address(0);
        
        	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
        		return;
@@@@@@@@@ -2222,11 -2224,11 -2224,11 -2226,11 -2222,11 -2222,11 -2222,11 -2224,11 +2226,11 @@@@@@@@@ EXPORT_SYMBOL(trace_hardirqs_on)
        /*
         * Hardirqs were disabled:
         */
 --    -void trace_hardirqs_off_caller(unsigned long a0)
 ++    +void trace_hardirqs_off_caller(unsigned long ip)
        {
        	struct task_struct *curr = current;
        
 --    -	time_hardirqs_off(CALLER_ADDR0, a0);
 ++    +	time_hardirqs_off(CALLER_ADDR0, ip);
        
        	if (unlikely(!debug_locks || current->lockdep_recursion))
        		return;
@@@@@@@@@ -2239,7 -2241,7 -2241,7 -2243,7 -2239,7 -2239,7 -2239,7 -2241,7 +2243,7 @@@@@@@@@
        		 * We have done an ON -> OFF transition:
        		 */
        		curr->hardirqs_enabled = 0;
 --    -		curr->hardirq_disable_ip = _RET_IP_;
 ++    +		curr->hardirq_disable_ip = ip;
        		curr->hardirq_disable_event = ++curr->irq_events;
        		debug_atomic_inc(&hardirqs_off_events);
        	} else
@@@@@@@@@ -2999,7 -3001,7 -3001,7 -3003,7 -2999,7 -2999,7 -2999,7 -3001,7 +3003,7 @@@@@@@@@ __lock_contended(struct lockdep_map *lo
        	struct held_lock *hlock, *prev_hlock;
        	struct lock_class_stats *stats;
        	unsigned int depth;
--- ----	int i, point;
+++ ++++	int i, contention_point, contending_point;
        
        	depth = curr->lockdep_depth;
        	if (DEBUG_LOCKS_WARN_ON(!depth))
@@@@@@@@@ -3023,18 -3025,18 -3025,18 -3027,22 -3023,18 -3023,18 -3023,18 -3025,18 +3027,22 @@@@@@@@@
        found_it:
        	hlock->waittime_stamp = sched_clock();
        
--- ----	point = lock_contention_point(hlock_class(hlock), ip);
+++ ++++	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
+++ ++++	contending_point = lock_point(hlock_class(hlock)->contending_point,
+++ ++++				      lock->ip);
        
        	stats = get_lock_stats(hlock_class(hlock));
--- ----	if (point < ARRAY_SIZE(stats->contention_point))
--- ----		stats->contention_point[point]++;
+++ ++++	if (contention_point < LOCKSTAT_POINTS)
+++ ++++		stats->contention_point[contention_point]++;
+++ ++++	if (contending_point < LOCKSTAT_POINTS)
+++ ++++		stats->contending_point[contending_point]++;
        	if (lock->cpu != smp_processor_id())
        		stats->bounces[bounce_contended + !!hlock->read]++;
        	put_lock_stats(stats);
        }
        
        static void
--- ----__lock_acquired(struct lockdep_map *lock)
+++ ++++__lock_acquired(struct lockdep_map *lock, unsigned long ip)
        {
        	struct task_struct *curr = current;
        	struct held_lock *hlock, *prev_hlock;
@@@@@@@@@ -3083,6 -3085,6 -3085,6 -3091,7 -3083,6 -3083,6 -3083,6 -3085,6 +3091,7 @@@@@@@@@ found_it
        	put_lock_stats(stats);
        
        	lock->cpu = cpu;
+++ ++++	lock->ip = ip;
        }
        
        void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@@@@@@@@ -3104,7 -3106,7 -3106,7 -3113,7 -3104,7 -3104,7 -3104,7 -3106,7 +3113,7 @@@@@@@@@
        }
        EXPORT_SYMBOL_GPL(lock_contended);
        
--- ----void lock_acquired(struct lockdep_map *lock)
+++ ++++void lock_acquired(struct lockdep_map *lock, unsigned long ip)
        {
        	unsigned long flags;
        
@@@@@@@@@ -3117,7 -3119,7 -3119,7 -3126,7 -3117,7 -3117,7 -3117,7 -3119,7 +3126,7 @@@@@@@@@
        	raw_local_irq_save(flags);
        	check_flags(flags);
        	current->lockdep_recursion = 1;
--- ----	__lock_acquired(lock);
+++ ++++	__lock_acquired(lock, ip);
        	current->lockdep_recursion = 0;
        	raw_local_irq_restore(flags);
        }
@@@@@@@@@ -3276,10 -3278,10 -3278,10 -3285,10 -3276,10 -3276,10 -3276,10 -3278,10 +3285,10 @@@@@@@@@ void __init lockdep_info(void
        {
        	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
        
------ -	printk("... MAX_LOCKDEP_SUBCLASSES:    %lu\n", MAX_LOCKDEP_SUBCLASSES);
++++++ +	printk("... MAX_LOCKDEP_SUBCLASSES:  %lu\n", MAX_LOCKDEP_SUBCLASSES);
        	printk("... MAX_LOCK_DEPTH:          %lu\n", MAX_LOCK_DEPTH);
        	printk("... MAX_LOCKDEP_KEYS:        %lu\n", MAX_LOCKDEP_KEYS);
------ -	printk("... CLASSHASH_SIZE:           %lu\n", CLASSHASH_SIZE);
++++++ +	printk("... CLASSHASH_SIZE:          %lu\n", CLASSHASH_SIZE);
        	printk("... MAX_LOCKDEP_ENTRIES:     %lu\n", MAX_LOCKDEP_ENTRIES);
        	printk("... MAX_LOCKDEP_CHAINS:      %lu\n", MAX_LOCKDEP_CHAINS);
        	printk("... CHAINHASH_SIZE:          %lu\n", CHAINHASH_SIZE);
@@@@@@@@@ -3415,10 -3417,9 -3417,9 -3424,10 -3415,10 -3415,10 -3415,10 -3417,9 +3424,10 @@@@@@@@@ retry
        		}
        		printk(" ignoring it.\n");
        		unlock = 0;
 ++    +	} else {
 ++    +		if (count != 10)
 ++    +			printk(KERN_CONT " locked it.\n");
        	}
 --    -	if (count != 10)
 --    -		printk(" locked it.\n");
        
        	do_each_thread(g, p) {
        		/*
diff --combined kernel/notifier.c
index 4282c0a40a5,0f39e398ef6,823be11584e,4282c0a40a5,4282c0a40a5,4282c0a40a5,4282c0a40a5,4282c0a40a5..61d5aa5eced
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@@@@@@@@ -82,6 -82,14 -82,6 -82,6 -82,6 -82,6 -82,6 -82,6 +82,14 @@@@@@@@@ static int __kprobes notifier_call_chai
        
        	while (nb && nr_to_call) {
        		next_nb = rcu_dereference(nb->next);
+ ++++++
+ ++++++#ifdef CONFIG_DEBUG_NOTIFIERS
+ ++++++		if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
+ ++++++			WARN(1, "Invalid notifier called!");
+ ++++++			nb = next_nb;
+ ++++++			continue;
+ ++++++		}
+ ++++++#endif
        		ret = nb->notifier_call(nb, val, v);
        
        		if (nr_calls)
@@@@@@@@@ -550,7 -558,7 -550,7 -550,7 -550,7 -550,7 -550,7 -550,7 +558,7 @@@@@@@@@ EXPORT_SYMBOL(unregister_reboot_notifie
        
        static ATOMIC_NOTIFIER_HEAD(die_chain);
        
 --     int notify_die(enum die_val val, const char *str,
 ++     int notrace notify_die(enum die_val val, const char *str,
        	       struct pt_regs *regs, long err, int trap, int sig)
        {
        	struct die_args args = {
diff --combined kernel/sched.c
index 9b1e79371c2,cc1f81b50b8,13dd2db9fb2,2a106b6b78b,e8819bc6f46,b388c9b243e,9b1e79371c2,d906f72b42d..558e5f28426
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@@@@@@@@ -55,7 -55,6 -55,6 -55,7 -55,7 -55,7 -55,7 -55,6 +55,7 @@@@@@@@@
        #include <linux/cpuset.h>
        #include <linux/percpu.h>
        #include <linux/kthread.h>
 ++    +#include <linux/proc_fs.h>
        #include <linux/seq_file.h>
        #include <linux/sysctl.h>
        #include <linux/syscalls.h>
@@@@@@@@@ -72,7 -71,6 -71,6 -72,7 -72,7 -72,7 -72,7 -71,7 +72,7 @@@@@@@@@
        #include <linux/debugfs.h>
        #include <linux/ctype.h>
        #include <linux/ftrace.h>
 ++     #include <trace/sched.h>
        
        #include <asm/tlb.h>
        #include <asm/irq_regs.h>
@@@@@@@@@ -203,19 -201,14 -201,14 -203,19 -203,19 -203,19 -203,19 -202,19 +203,19 @@@@@@@@@ void init_rt_bandwidth(struct rt_bandwi
        	hrtimer_init(&rt_b->rt_period_timer,
        			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        	rt_b->rt_period_timer.function = sched_rt_period_timer;
 --     	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 ++     	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 ++     }
 ++     
 ++     static inline int rt_bandwidth_enabled(void)
 ++     {
 ++     	return sysctl_sched_rt_runtime >= 0;
        }
        
        static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
        {
        	ktime_t now;
        
 --     	if (rt_b->rt_runtime == RUNTIME_INF)
 ++     	if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
        		return;
        
        	if (hrtimer_active(&rt_b->rt_period_timer))
@@@@@@@@@ -228,8 -221,9 -221,9 -228,8 -228,8 -228,8 -228,8 -227,9 +228,8 @@@@@@@@@
        
        		now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
        		hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
 --    -		hrtimer_start(&rt_b->rt_period_timer,
 --    -			      rt_b->rt_period_timer.expires,
 --    -			      HRTIMER_MODE_ABS);
 ++    +		hrtimer_start_expires(&rt_b->rt_period_timer,
 ++    +				HRTIMER_MODE_ABS);
        	}
        	spin_unlock(&rt_b->rt_runtime_lock);
        }
@@@@@@@@@ -304,9 -298,9 -298,9 -304,9 -304,9 -304,9 -304,9 -304,9 +304,9 @@@@@@@@@ static DEFINE_PER_CPU(struct cfs_rq, in
        static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
        static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
        #endif /* CONFIG_RT_GROUP_SCHED */
 --     #else /* !CONFIG_FAIR_GROUP_SCHED */
 ++     #else /* !CONFIG_USER_SCHED */
        #define root_task_group init_task_group
 --     #endif /* CONFIG_FAIR_GROUP_SCHED */
 ++     #endif /* CONFIG_USER_SCHED */
        
        /* task_group_lock serializes add/remove of task groups and also changes to
         * a task group's cpu shares.
@@@@@@@@@ -386,6 -380,7 -380,7 -386,6 -386,6 -386,6 -386,6 -386,7 +386,6 @@@@@@@@@ struct cfs_rq 
        
        	u64 exec_clock;
        	u64 min_vruntime;
 --    -	u64 pair_start;
        
        	struct rb_root tasks_timeline;
        	struct rb_node *rb_leftmost;
@@@@@@@@@ -397,9 -392,9 -392,9 -397,9 -397,9 -397,9 -397,9 -398,9 +397,9 @@@@@@@@@
        	 * 'curr' points to currently running entity on this cfs_rq.
        	 * It is set to NULL otherwise (i.e when none are currently running).
        	 */
 -- -  -	struct sched_entity *curr, *next;
 ++ +  +	struct sched_entity *curr, *next, *last;
        
 -- -  -	unsigned long nr_spread_over;
 ++ +  +	unsigned int nr_spread_over;
        
        #ifdef CONFIG_FAIR_GROUP_SCHED
        	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
@@@@@@@@@ -609,9 -604,9 -604,9 -609,9 -609,9 -609,9 -609,9 -610,9 +609,9 @@@@@@@@@ struct rq 
        
        static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
        
 --     static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
 ++     static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
        {
 --     	rq->curr->sched_class->check_preempt_curr(rq, p);
 ++     	rq->curr->sched_class->check_preempt_curr(rq, p, sync);
        }
        
        static inline int cpu_of(struct rq *rq)
@@@@@@@@@ -817,13 -812,6 -812,6 -817,13 -817,13 -817,13 -817,13 -818,6 +817,13 @@@@@@@@@ const_debug unsigned int sysctl_sched_n
         */
        unsigned int sysctl_sched_shares_ratelimit = 250000;
        
 ++    +/*
 ++    + * Inject some fuzzyness into changing the per-cpu group shares
 ++    + * this avoids remote rq-locks at the expense of fairness.
 ++    + * default: 4
 ++    + */
 ++    +unsigned int sysctl_sched_shares_thresh = 4;
 ++    +
        /*
         * period over which we measure -rt task cpu usage in us.
         * default: 1s
@@@@@@@@@ -969,14 -957,6 -957,6 -969,14 -969,6 -969,14 -969,14 -963,6 +969,14 @@@@@@@@@ static struct rq *task_rq_lock(struct t
        	}
        }
        
 ++ +  +void task_rq_unlock_wait(struct task_struct *p)
 ++ +  +{
 ++ +  +	struct rq *rq = task_rq(p);
 ++ +  +
 ++ +  +	smp_mb(); /* spin-unlock-wait is not a full memory barrier */
 ++ +  +	spin_unlock_wait(&rq->lock);
 ++ +  +}
 ++ +  +
        static void __task_rq_unlock(struct rq *rq)
        	__releases(rq->lock)
        {
@@@@@@@@@ -1078,7 -1058,7 -1058,7 -1078,7 -1070,7 -1078,7 -1078,7 -1064,7 +1078,7 @@@@@@@@@ static void hrtick_start(struct rq *rq
        	struct hrtimer *timer = &rq->hrtick_timer;
        	ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
        
 --    -	timer->expires = time;
 ++    +	hrtimer_set_expires(timer, time);
        
        	if (rq == this_rq()) {
        		hrtimer_restart(timer);
@@@@@@@@@ -1107,7 -1087,7 -1087,7 -1107,7 -1099,7 -1107,7 -1107,7 -1093,7 +1107,7 @@@@@@@@@ hotplug_hrtick(struct notifier_block *n
        	return NOTIFY_DONE;
        }
        
 -      static void init_hrtick(void)
 +      static __init void init_hrtick(void)
        {
        	hotcpu_notifier(hotplug_hrtick, 0);
        }
@@@@@@@@@ -1122,7 -1102,7 -1102,7 -1122,7 -1114,7 -1122,7 -1122,7 -1108,7 +1122,7 @@@@@@@@@ static void hrtick_start(struct rq *rq
        	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
        }
        
 --     static void init_hrtick(void)
 ++     static inline void init_hrtick(void)
        {
        }
        #endif /* CONFIG_SMP */
@@@@@@@@@ -1139,9 -1119,9 -1119,9 -1139,9 -1131,9 -1139,9 -1139,9 -1125,9 +1139,9 @@@@@@@@@ static void init_rq_hrtick(struct rq *r
        
        	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        	rq->hrtick_timer.function = hrtick;
 --     	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 ++     	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
        }
 --     #else
 ++     #else	/* CONFIG_SCHED_HRTICK */
        static inline void hrtick_clear(struct rq *rq)
        {
        }
@@@@@@@@@ -1153,7 -1133,7 -1133,7 -1153,7 -1145,7 -1153,7 -1153,7 -1139,7 +1153,7 @@@@@@@@@ static inline void init_rq_hrtick(struc
        static inline void init_hrtick(void)
        {
        }
 --     #endif
 ++     #endif	/* CONFIG_SCHED_HRTICK */
        
        /*
         * resched_task - mark a task 'to be rescheduled now'.
@@@@@@@@@ -1400,24 -1380,38 -1380,38 -1400,24 -1392,24 -1400,24 -1400,24 -1386,24 +1400,24 @@@@@@@@@ static inline void dec_cpu_load(struct 
        	update_load_sub(&rq->load, load);
        }
        
 --     #ifdef CONFIG_SMP
 --     static unsigned long source_load(int cpu, int type);
 --     static unsigned long target_load(int cpu, int type);
 --     static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
 --     
 --     static unsigned long cpu_avg_load_per_task(int cpu)
 --     {
 --     	struct rq *rq = cpu_rq(cpu);
 --     
 --     	if (rq->nr_running)
 --     		rq->avg_load_per_task = rq->load.weight / rq->nr_running;
 --     
 --     	return rq->avg_load_per_task;
 --     }
 --     
 --     #ifdef CONFIG_FAIR_GROUP_SCHED
 --     
 --     typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
 ++     #if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
 ++     typedef int (*tg_visitor)(struct task_group *, void *);
        
        /*
         * Iterate the full tree, calling @down when first entering a node and @up when
         * leaving it for the final time.
         */
 --     static void
 --     walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
 ++     static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
        {
        	struct task_group *parent, *child;
 ++     	int ret;
        
        	rcu_read_lock();
        	parent = &root_task_group;
        down:
 --     	(*down)(parent, cpu, sd);
 ++     	ret = (*down)(parent, data);
 ++     	if (ret)
 ++     		goto out_unlock;
        	list_for_each_entry_rcu(child, &parent->children, siblings) {
        		parent = child;
        		goto down;
@@@@@@@@@ -1425,53 -1419,23 -1419,23 -1425,51 -1417,51 -1425,53 -1425,53 -1411,51 +1425,53 @@@@@@@@@
        up:
        		continue;
        	}
 --     	(*up)(parent, cpu, sd);
 ++     	ret = (*up)(parent, data);
 ++     	if (ret)
 ++     		goto out_unlock;
        
        	child = parent;
        	parent = parent->parent;
        	if (parent)
        		goto up;
 ++     out_unlock:
        	rcu_read_unlock();
 ++     
 ++     	return ret;
 ++     }
 ++     
 ++     static int tg_nop(struct task_group *tg, void *data)
 ++     {
 ++     	return 0;
 ++     }
 ++     #endif
 ++     
 ++     #ifdef CONFIG_SMP
 ++     static unsigned long source_load(int cpu, int type);
 ++     static unsigned long target_load(int cpu, int type);
 ++     static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
 ++     
 ++     static unsigned long cpu_avg_load_per_task(int cpu)
 ++     {
 ++     	struct rq *rq = cpu_rq(cpu);
 ++     
 ++     	if (rq->nr_running)
 ++     		rq->avg_load_per_task = rq->load.weight / rq->nr_running;
 ++++  +	else
 ++++  +		rq->avg_load_per_task = 0;
 ++     
 ++     	return rq->avg_load_per_task;
        }
        
 ++     #ifdef CONFIG_FAIR_GROUP_SCHED
 ++     
        static void __set_se_shares(struct sched_entity *se, unsigned long shares);
        
        /*
         * Calculate and set the cpu's group shares.
         */
        static void
 --    -__update_group_shares_cpu(struct task_group *tg, int cpu,
 --    -			  unsigned long sd_shares, unsigned long sd_rq_weight)
 ++    +update_group_shares_cpu(struct task_group *tg, int cpu,
 ++    +			unsigned long sd_shares, unsigned long sd_rq_weight)
        {
        	int boost = 0;
        	unsigned long shares;
@@@@@@@@@ -1502,23 -1466,19 -1466,19 -1500,23 -1492,23 -1502,23 -1502,23 -1486,19 +1502,23 @@@@@@@@@
        	 *
        	 */
        	shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
 ++    +	shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
        
 --    -	/*
 --    -	 * record the actual number of shares, not the boosted amount.
 --    -	 */
 --    -	tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
 --    -	tg->cfs_rq[cpu]->rq_weight = rq_weight;
 ++    +	if (abs(shares - tg->se[cpu]->load.weight) >
 ++    +			sysctl_sched_shares_thresh) {
 ++    +		struct rq *rq = cpu_rq(cpu);
 ++    +		unsigned long flags;
        
 --    -	if (shares < MIN_SHARES)
 --    -		shares = MIN_SHARES;
 --    -	else if (shares > MAX_SHARES)
 --    -		shares = MAX_SHARES;
 ++    +		spin_lock_irqsave(&rq->lock, flags);
 ++    +		/*
 ++    +		 * record the actual number of shares, not the boosted amount.
 ++    +		 */
 ++    +		tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
 ++    +		tg->cfs_rq[cpu]->rq_weight = rq_weight;
        
 --    -	__set_se_shares(tg->se[cpu], shares);
 ++    +		__set_se_shares(tg->se[cpu], shares);
 ++    +		spin_unlock_irqrestore(&rq->lock, flags);
 ++    +	}
        }
        
        /*
@@@@@@@@@ -1526,11 -1486,11 -1486,11 -1524,11 -1516,11 -1526,11 -1526,11 -1506,11 +1526,11 @@@@@@@@@
         * This needs to be done in a bottom-up fashion because the rq weight of a
         * parent group depends on the shares of its child groups.
         */
 --     static void
 --     tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
 ++     static int tg_shares_up(struct task_group *tg, void *data)
        {
        	unsigned long rq_weight = 0;
        	unsigned long shares = 0;
 ++     	struct sched_domain *sd = data;
        	int i;
        
        	for_each_cpu_mask(i, sd->span) {
@@@@@@@@@ -1547,10 -1507,14 -1507,14 -1545,10 -1537,10 -1547,10 -1547,10 -1527,16 +1547,10 @@@@@@@@@
        	if (!rq_weight)
        		rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
        
 --    -	for_each_cpu_mask(i, sd->span) {
 --    -		struct rq *rq = cpu_rq(i);
 --    -		unsigned long flags;
       -
       -		spin_lock_irqsave(&rq->lock, flags);
       -		__update_group_shares_cpu(tg, i, shares, rq_weight);
       -		spin_unlock_irqrestore(&rq->lock, flags);
       -	}
 ++    +	for_each_cpu_mask(i, sd->span)
 ++    +		update_group_shares_cpu(tg, i, shares, rq_weight);
        
 --     		spin_lock_irqsave(&rq->lock, flags);
 --     		__update_group_shares_cpu(tg, i, shares, rq_weight);
 --     		spin_unlock_irqrestore(&rq->lock, flags);
 --     	}
 ++     	return 0;
        }
        
        /*
@@@@@@@@@ -1558,10 -1522,10 -1522,10 -1556,10 -1548,10 -1558,10 -1558,10 -1544,10 +1558,10 @@@@@@@@@
         * This needs to be done in a top-down fashion because the load of a child
         * group is a fraction of its parents load.
         */
 --     static void
 --     tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
 ++     static int tg_load_down(struct task_group *tg, void *data)
        {
        	unsigned long load;
 ++     	long cpu = (long)data;
        
        	if (!tg->parent) {
        		load = cpu_rq(cpu)->load.weight;
@@@@@@@@@ -1572,8 -1536,11 -1536,11 -1570,8 -1562,8 -1572,8 -1572,8 -1558,8 +1572,8 @@@@@@@@@
        	}
        
        	tg->cfs_rq[cpu]->h_load = load;
 --     }
        
 --     static void
 --     tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
 --     {
 ++     	return 0;
        }
        
        static void update_shares(struct sched_domain *sd)
@@@@@@@@@ -1583,7 -1550,7 -1550,7 -1581,7 -1573,7 -1583,7 -1583,7 -1569,7 +1583,7 @@@@@@@@@
        
        	if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
        		sd->last_update = now;
 --     		walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
 ++     		walk_tg_tree(tg_nop, tg_shares_up, sd);
        	}
        }
        
@@@@@@@@@ -1594,9 -1561,9 -1561,9 -1592,9 -1584,9 -1594,9 -1594,9 -1580,9 +1594,9 @@@@@@@@@ static void update_shares_locked(struc
        	spin_lock(&rq->lock);
        }
        
 --     static void update_h_load(int cpu)
 ++     static void update_h_load(long cpu)
        {
 --     	walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
 ++     	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
        }
        
        #else
@@@@@@@@@ -1815,9 -1782,7 -1782,7 -1813,9 -1805,7 -1815,9 -1815,9 -1801,7 +1815,9 @@@@@@@@@ task_hot(struct task_struct *p, u64 now
        	/*
        	 * Buddy candidates are cache hot:
        	 */
 -- -  -	if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next))
 ++ +  +	if (sched_feat(CACHE_HOT_BUDDY) &&
 ++ +  +			(&p->se == cfs_rq_of(&p->se)->next ||
 ++ +  +			 &p->se == cfs_rq_of(&p->se)->last))
        		return 1;
        
        	if (p->sched_class != &fair_sched_class)
@@@@@@@@@ -1953,12 -1918,14 -1918,14 -1951,12 -1941,12 -1953,12 -1953,12 -1937,12 +1953,12 @@@@@@@@@ unsigned long wait_task_inactive(struc
        		 * just go back and repeat.
        		 */
        		rq = task_rq_lock(p, &flags);
 ++     		trace_sched_wait_task(rq, p);
        		running = task_running(rq, p);
        		on_rq = p->se.on_rq;
        		ncsw = 0;
 --     		if (!match_state || p->state == match_state) {
 --     			ncsw = p->nivcsw + p->nvcsw;
 --     			if (unlikely(!ncsw))
 --     				ncsw = 1;
 --     		}
 ++     		if (!match_state || p->state == match_state)
 ++     			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
        		task_rq_unlock(rq, &flags);
        
        		/*
@@@@@@@@@ -2315,8 -2282,10 -2282,10 -2313,8 -2303,8 -2315,8 -2315,8 -2299,8 +2315,8 @@@@@@@@@ out_activate
        	success = 1;
        
        out_running:
 --     	trace_mark(kernel_sched_wakeup,
 --     		"pid %d state %ld ## rq %p task %p rq->curr %p",
 --     		p->pid, p->state, rq, p, rq->curr);
 --     	check_preempt_curr(rq, p);
 ++     	trace_sched_wakeup(rq, p);
 ++     	check_preempt_curr(rq, p, sync);
        
        	p->state = TASK_RUNNING;
        #ifdef CONFIG_SMP
@@@@@@@@@ -2448,8 -2417,10 -2417,10 -2446,8 -2436,8 -2448,8 -2448,8 -2432,8 +2448,8 @@@@@@@@@ void wake_up_new_task(struct task_struc
        		p->sched_class->task_new(rq, p);
        		inc_nr_running(rq);
        	}
 --     	trace_mark(kernel_sched_wakeup_new,
 --     		"pid %d state %ld ## rq %p task %p rq->curr %p",
 --     		p->pid, p->state, rq, p, rq->curr);
 --     	check_preempt_curr(rq, p);
 ++     	trace_sched_wakeup_new(rq, p);
 ++     	check_preempt_curr(rq, p, 0);
        #ifdef CONFIG_SMP
        	if (p->sched_class->task_wake_up)
        		p->sched_class->task_wake_up(rq, p);
@@@@@@@@@ -2621,7 -2592,11 -2592,11 -2619,7 -2609,7 -2621,7 -2621,7 -2605,7 +2621,7 @@@@@@@@@ context_switch(struct rq *rq, struct ta
        	struct mm_struct *mm, *oldmm;
        
        	prepare_task_switch(rq, prev, next);
 --     	trace_mark(kernel_sched_schedule,
 --     		"prev_pid %d next_pid %d prev_state %ld "
 --     		"## rq %p prev %p next %p",
 --     		prev->pid, next->pid, prev->state,
 --     		rq, prev, next);
 ++     	trace_sched_switch(rq, prev, next);
        	mm = next->mm;
        	oldmm = prev->active_mm;
        	/*
@@@@@@@@@ -2861,7 -2836,6 -2836,6 -2859,7 -2849,7 -2861,7 -2861,7 -2845,7 +2861,7 @@@@@@@@@ static void sched_migrate_task(struct t
        	    || unlikely(!cpu_active(dest_cpu)))
        		goto out;
        
 ++     	trace_sched_migrate_task(rq, p, dest_cpu);
        	/* force the process onto the specified CPU */
        	if (migrate_task(p, dest_cpu, &req)) {
        		/* Need to wait for migration thread (might exit: take ref). */
@@@@@@@@@ -2906,7 -2880,7 -2880,7 -2904,7 -2894,7 -2906,7 -2906,7 -2890,7 +2906,7 @@@@@@@@@ static void pull_task(struct rq *src_rq
        	 * Note that idle threads have a prio of MAX_PRIO, for this test
        	 * to be always true for them.
        	 */
 --     	check_preempt_curr(this_rq, p);
 ++     	check_preempt_curr(this_rq, p, 0);
        }
        
        /*
@@@@@@@@@ -3355,7 -3329,7 -3329,7 -3353,7 -3343,7 -3355,7 -3355,7 -3339,7 +3355,7 @@@@@@@@@ small_imbalance
        		} else
        			this_load_per_task = cpu_avg_load_per_task(this_cpu);
        
 --    -		if (max_load - this_load + 2*busiest_load_per_task >=
 ++    +		if (max_load - this_load + busiest_load_per_task >=
        					busiest_load_per_task * imbn) {
        			*imbalance = busiest_load_per_task;
        			return busiest;
@@@@@@@@@ -4063,26 -4037,23 -4037,23 -4061,26 -4051,26 -4063,26 -4063,26 -4047,26 +4063,26 @@@@@@@@@ DEFINE_PER_CPU(struct kernel_stat, ksta
        EXPORT_PER_CPU_SYMBOL(kstat);
        
        /*
 --      * Return p->sum_exec_runtime plus any more ns on the sched_clock
 --      * that have not yet been banked in case the task is currently running.
 ++      * Return any ns on the sched_clock that have not yet been banked in
 ++      * @p in case that task is currently running.
         */
 --     unsigned long long task_sched_runtime(struct task_struct *p)
 ++     unsigned long long task_delta_exec(struct task_struct *p)
        {
        	unsigned long flags;
 --     	u64 ns, delta_exec;
        	struct rq *rq;
 ++     	u64 ns = 0;
        
        	rq = task_rq_lock(p, &flags);
 --     	ns = p->se.sum_exec_runtime;
 ++     
        	if (task_current(rq, p)) {
 ++     		u64 delta_exec;
 ++     
        		update_rq_clock(rq);
        		delta_exec = rq->clock - p->se.exec_start;
        		if ((s64)delta_exec > 0)
 --     			ns += delta_exec;
 ++     			ns = delta_exec;
        	}
 ++     
        	task_rq_unlock(rq, &flags);
        
        	return ns;
@@@@@@@@@ -4099,7 -4070,6 -4070,6 -4097,7 -4087,7 -4099,7 -4099,7 -4083,7 +4099,7 @@@@@@@@@ void account_user_time(struct task_stru
        	cputime64_t tmp;
        
        	p->utime = cputime_add(p->utime, cputime);
 ++     	account_group_user_time(p, cputime);
        
        	/* Add user time to cpustat. */
        	tmp = cputime_to_cputime64(cputime);
@@@@@@@@@ -4124,7 -4094,6 -4094,6 -4122,7 -4112,7 -4124,7 -4124,7 -4108,7 +4124,7 @@@@@@@@@ static void account_guest_time(struct t
        	tmp = cputime_to_cputime64(cputime);
        
        	p->utime = cputime_add(p->utime, cputime);
 ++     	account_group_user_time(p, cputime);
        	p->gtime = cputime_add(p->gtime, cputime);
        
        	cpustat->user = cputime64_add(cpustat->user, tmp);
@@@@@@@@@ -4160,7 -4129,6 -4129,6 -4158,7 -4148,7 -4160,7 -4160,7 -4144,7 +4160,7 @@@@@@@@@ void account_system_time(struct task_st
        	}
        
        	p->stime = cputime_add(p->stime, cputime);
 ++     	account_group_system_time(p, cputime);
        
        	/* Add system time to cpustat. */
        	tmp = cputime_to_cputime64(cputime);
@@@@@@@@@ -4202,7 -4170,6 -4170,6 -4200,7 -4190,7 -4202,6 -4202,7 -4186,7 +4202,6 @@@@@@@@@ void account_steal_time(struct task_str
        
        	if (p == rq->idle) {
        		p->stime = cputime_add(p->stime, steal);
-  -- --		account_group_system_time(p, steal);
        		if (atomic_read(&rq->nr_iowait) > 0)
        			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
        		else
@@@@@@@@@ -4338,7 -4305,7 -4305,7 -4336,7 -4326,7 -4337,7 -4338,7 -4322,7 +4337,7 @@@@@@@@@ void __kprobes sub_preempt_count(int va
        	/*
        	 * Underflow?
        	 */
--- ----	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+++ ++++       if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
        		return;
        	/*
        	 * Is the spinlock portion underflowing?
@@@@@@@@@ -4459,8 -4426,12 -4426,12 -4457,8 -4447,8 -4458,8 -4459,8 -4443,12 +4458,8 @@@@@@@@@ need_resched_nonpreemptible
        	if (sched_feat(HRTICK))
        		hrtick_clear(rq);
        
 --    -	/*
 --    -	 * Do the rq-clock update outside the rq lock:
 --    -	 */
 --    -	local_irq_disable();
 ++    +	spin_lock_irq(&rq->lock);
        	update_rq_clock(rq);
 --    -	spin_lock(&rq->lock);
        	clear_tsk_need_resched(prev);
        
        	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@@@@@@@@ -4656,15 -4627,6 -4627,6 -4654,15 -4644,15 -4655,15 -4656,15 -4644,15 +4655,15 @@@@@@@@@ __wake_up_sync(wait_queue_head_t *q, un
        }
        EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
        
 ++     /**
 ++      * complete: - signals a single thread waiting on this completion
 ++      * @x:  holds the state of this particular completion
 ++      *
 ++      * This will wake up a single thread waiting on this completion. Threads will be
 ++      * awakened in the same order in which they were queued.
 ++      *
 ++      * See also complete_all(), wait_for_completion() and related routines.
 ++      */
        void complete(struct completion *x)
        {
        	unsigned long flags;
@@@@@@@@@ -4676,12 -4638,6 -4638,6 -4674,12 -4664,12 -4675,12 -4676,12 -4664,12 +4675,12 @@@@@@@@@
        }
        EXPORT_SYMBOL(complete);
        
 ++     /**
 ++      * complete_all: - signals all threads waiting on this completion
 ++      * @x:  holds the state of this particular completion
 ++      *
 ++      * This will wake up all threads waiting on this particular completion event.
 ++      */
        void complete_all(struct completion *x)
        {
        	unsigned long flags;
@@@@@@@@@ -4702,7 -4658,10 -4658,10 -4700,7 -4690,7 -4701,7 -4702,7 -4690,7 +4701,7 @@@@@@@@@ do_wait_for_common(struct completion *x
        		wait.flags |= WQ_FLAG_EXCLUSIVE;
        		__add_wait_queue_tail(&x->wait, &wait);
        		do {
 --     			if ((state == TASK_INTERRUPTIBLE &&
 --     			     signal_pending(current)) ||
 --     			    (state == TASK_KILLABLE &&
 --     			     fatal_signal_pending(current))) {
 ++     			if (signal_pending_state(state, current)) {
        				timeout = -ERESTARTSYS;
        				break;
        			}
@@@@@@@@@ -4730,31 -4689,12 -4689,12 -4728,31 -4718,31 -4729,31 -4730,31 -4718,31 +4729,31 @@@@@@@@@ wait_for_common(struct completion *x, l
        	return timeout;
        }
        
 ++     /**
 ++      * wait_for_completion: - waits for completion of a task
 ++      * @x:  holds the state of this particular completion
 ++      *
 ++      * This waits to be signaled for completion of a specific task. It is NOT
 ++      * interruptible and there is no timeout.
 ++      *
 ++      * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
 ++      * and interrupt capability. Also see complete().
 ++      */
        void __sched wait_for_completion(struct completion *x)
        {
        	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
        }
        EXPORT_SYMBOL(wait_for_completion);
        
 ++     /**
 ++      * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
 ++      * @x:  holds the state of this particular completion
 ++      * @timeout:  timeout value in jiffies
 ++      *
 ++      * This waits for either a completion of a specific task to be signaled or for a
 ++      * specified timeout to expire. The timeout is in jiffies. It is not
 ++      * interruptible.
 ++      */
        unsigned long __sched
        wait_for_completion_timeout(struct completion *x, unsigned long timeout)
        {
@@@@@@@@@ -4762,13 -4702,6 -4702,6 -4760,13 -4750,13 -4761,13 -4762,13 -4750,13 +4761,13 @@@@@@@@@
        }
        EXPORT_SYMBOL(wait_for_completion_timeout);
        
 ++     /**
 ++      * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
 ++      * @x:  holds the state of this particular completion
 ++      *
 ++      * This waits for completion of a specific task to be signaled. It is
 ++      * interruptible.
 ++      */
        int __sched wait_for_completion_interruptible(struct completion *x)
        {
        	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
@@@@@@@@@ -4778,14 -4711,6 -4711,6 -4776,14 -4766,14 -4777,14 -4778,14 -4766,14 +4777,14 @@@@@@@@@
        }
        EXPORT_SYMBOL(wait_for_completion_interruptible);
        
 ++     /**
 ++      * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
 ++      * @x:  holds the state of this particular completion
 ++      * @timeout:  timeout value in jiffies
 ++      *
 ++      * This waits for either a completion of a specific task to be signaled or for a
 ++      * specified timeout to expire. It is interruptible. The timeout is in jiffies.
 ++      */
        unsigned long __sched
        wait_for_completion_interruptible_timeout(struct completion *x,
        					  unsigned long timeout)
@@@@@@@@@ -4794,13 -4719,6 -4719,6 -4792,13 -4782,13 -4793,13 -4794,13 -4782,13 +4793,13 @@@@@@@@@
        }
        EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
        
 ++     /**
 ++      * wait_for_completion_killable: - waits for completion of a task (killable)
 ++      * @x:  holds the state of this particular completion
 ++      *
 ++      * This waits to be signaled for completion of a specific task. It can be
 ++      * interrupted by a kill signal.
 ++      */
        int __sched wait_for_completion_killable(struct completion *x)
        {
        	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
@@@@@@@@@ -5203,8 -5121,7 -5121,7 -5201,8 -5191,8 -5202,8 -5203,8 -5191,8 +5202,8 @@@@@@@@@ recheck
        		 * Do not allow realtime tasks into groups that have no runtime
        		 * assigned.
        		 */
 --     		if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
 ++     		if (rt_bandwidth_enabled() && rt_policy(policy) &&
 ++     				task_group(p)->rt_bandwidth.rt_runtime == 0)
        			return -EPERM;
        #endif
        
@@@@@@@@@ -5870,8 -5787,6 -5787,6 -5868,6 -5858,6 -5869,8 -5870,8 -5858,6 +5869,8 @@@@@@@@@ void __cpuinit init_idle(struct task_st
        	struct rq *rq = cpu_rq(cpu);
        	unsigned long flags;
        
 ++++  +	spin_lock_irqsave(&rq->lock, flags);
 ++++  +
        	__sched_fork(idle);
        	idle->se.exec_start = sched_clock();
        
@@@@@@@@@ -5879,6 -5794,7 -5794,7 -5875,7 -5865,7 -5878,6 -5879,6 -5865,7 +5878,6 @@@@@@@@@
        	idle->cpus_allowed = cpumask_of_cpu(cpu);
        	__set_task_cpu(idle, cpu);
        
 ----  -	spin_lock_irqsave(&rq->lock, flags);
        	rq->curr = rq->idle = idle;
        #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
        	idle->oncpu = 1;
@@@@@@@@@ -6041,7 -5957,7 -5957,7 -6038,7 -6028,7 -6040,7 -6041,7 -6028,7 +6040,7 @@@@@@@@@ static int __migrate_task(struct task_s
        	set_task_cpu(p, dest_cpu);
        	if (on_rq) {
        		activate_task(rq_dest, p, 0);
 --     		check_preempt_curr(rq_dest, p);
 ++     		check_preempt_curr(rq_dest, p, 0);
        	}
        done:
        	ret = 1;
@@@@@@@@@ -6366,7 -6282,7 -6282,7 -6363,7 -6353,7 -6365,7 -6366,7 -6353,7 +6365,7 @@@@@@@@@ set_table_entry(struct ctl_table *entry
        static struct ctl_table *
        sd_alloc_ctl_domain_table(struct sched_domain *sd)
        {
 --     	struct ctl_table *table = sd_alloc_ctl_entry(12);
 ++     	struct ctl_table *table = sd_alloc_ctl_entry(13);
        
        	if (table == NULL)
        		return NULL;
@@@@@@@@@ -6394,9 -6310,7 -6310,7 -6391,9 -6381,9 -6393,9 -6394,9 -6381,9 +6393,9 @@@@@@@@@
        		sizeof(int), 0644, proc_dointvec_minmax);
        	set_table_entry(&table[10], "flags", &sd->flags,
        		sizeof(int), 0644, proc_dointvec_minmax);
 --     	/* &table[11] is terminator */
 ++     	set_table_entry(&table[11], "name", sd->name,
 ++     		CORENAME_MAX_SIZE, 0444, proc_dostring);
 ++     	/* &table[12] is terminator */
        
        	return table;
        }
@@@@@@@@@ -6888,17 -6802,15 -6802,15 -6885,17 -6875,15 -6887,17 -6888,17 -6875,15 +6887,17 @@@@@@@@@ cpu_attach_domain(struct sched_domain *
        	struct sched_domain *tmp;
        
        	/* Remove the sched domains which do not contribute to scheduling. */
 -- -  -	for (tmp = sd; tmp; tmp = tmp->parent) {
 ++ +  +	for (tmp = sd; tmp; ) {
        		struct sched_domain *parent = tmp->parent;
        		if (!parent)
        			break;
 ++ +  +
        		if (sd_parent_degenerate(tmp, parent)) {
        			tmp->parent = parent->parent;
        			if (parent->parent)
        				parent->parent->child = tmp;
 -- -  -		}
 ++ +  +		} else
 ++ +  +			tmp = tmp->parent;
        	}
        
        	if (sd && sd_degenerate(sd)) {
@@@@@@@@@ -7282,21 -7194,13 -7194,13 -7279,21 -7267,21 -7281,21 -7282,21 -7267,21 +7281,21 @@@@@@@@@ static void init_sched_groups_power(in
         * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
         */
        
 ++     #ifdef CONFIG_SCHED_DEBUG
 ++     # define SD_INIT_NAME(sd, type)		sd->name = #type
 ++     #else
 ++     # define SD_INIT_NAME(sd, type)		do { } while (0)
 ++     #endif
 ++     
        #define	SD_INIT(sd, type)	sd_init_##type(sd)
 ++     
        #define SD_INIT_FUNC(type)	\
        static noinline void sd_init_##type(struct sched_domain *sd)	\
        {								\
        	memset(sd, 0, sizeof(*sd));				\
        	*sd = SD_##type##_INIT;					\
        	sd->level = SD_LV_##type;				\
 ++     	SD_INIT_NAME(sd, type);					\
        }
        
        SD_INIT_FUNC(CPU)
@@@@@@@@@ -7687,7 -7591,6 -7591,6 -7684,7 -7672,6 -7686,7 -7687,7 -7672,6 +7686,7 @@@@@@@@@ static int __build_sched_domains(const 
        error:
        	free_sched_groups(cpu_map, tmpmask);
        	SCHED_CPUMASK_FREE((void *)allmasks);
 ++ +  +	kfree(rd);
        	return -ENOMEM;
        #endif
        }
@@@@@@@@@ -7789,14 -7692,13 -7692,13 -7786,13 -7773,13 -7788,13 -7789,14 -7773,13 +7788,14 @@@@@@@@@ static int dattrs_equal(struct sched_do
         *
         * The passed in 'doms_new' should be kmalloc'd. This routine takes
         * ownership of it and will kfree it when done with it. If the caller
 ----- - * failed the kmalloc call, then it can pass in doms_new == NULL,
 ----- - * and partition_sched_domains() will fallback to the single partition
 ----- - * 'fallback_doms', it also forces the domains to be rebuilt.
 +++++ + * failed the kmalloc call, then it can pass in doms_new == NULL &&
 +++++ + * ndoms_new == 1, and partition_sched_domains() will fallback to
 +++++ + * the single partition 'fallback_doms', it also forces the domains
 +++++ + * to be rebuilt.
         *
 ----- - * If doms_new==NULL it will be replaced with cpu_online_map.
 ----- - * ndoms_new==0 is a special case for destroying existing domains.
 ----- - * It will not create the default domain.
 +++++ + * If doms_new == NULL it will be replaced with cpu_online_map.
 +++++ + * ndoms_new == 0 is a special case for destroying existing domains,
 +++++ + * and it will not create the default domain.
         *
         * Call with hotplug lock held
         */
@@@@@@@@@ -8340,25 -8242,20 -8242,20 -8336,25 -8323,25 -8338,25 -8340,25 -8323,25 +8339,25 @@@@@@@@@ void __might_sleep(char *file, int line
        #ifdef in_atomic
        	static unsigned long prev_jiffy;	/* ratelimiting */
        
 --     	if ((in_atomic() || irqs_disabled()) &&
 --     	    system_state == SYSTEM_RUNNING && !oops_in_progress) {
 --     		if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 --     			return;
 --     		prev_jiffy = jiffies;
 --     		printk(KERN_ERR "BUG: sleeping function called from invalid"
 --     				" context at %s:%d\n", file, line);
 --     		printk("in_atomic():%d, irqs_disabled():%d\n",
 --     			in_atomic(), irqs_disabled());
 --     		debug_show_held_locks(current);
 --     		if (irqs_disabled())
 --     			print_irqtrace_events(current);
 --     		dump_stack();
 --     	}
 ++     	if ((!in_atomic() && !irqs_disabled()) ||
 ++     		    system_state != SYSTEM_RUNNING || oops_in_progress)
 ++     		return;
 ++     	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 ++     		return;
 ++     	prev_jiffy = jiffies;
 ++     
 ++     	printk(KERN_ERR
 ++     		"BUG: sleeping function called from invalid context at %s:%d\n",
 ++     			file, line);
 ++     	printk(KERN_ERR
 ++     		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
 ++     			in_atomic(), irqs_disabled(),
 ++     			current->pid, current->comm);
 ++     
 ++     	debug_show_held_locks(current);
 ++     	if (irqs_disabled())
 ++     		print_irqtrace_events(current);
 ++     	dump_stack();
        #endif
        }
        EXPORT_SYMBOL(__might_sleep);
@@@@@@@@@ -8856,95 -8753,73 -8753,73 -8852,95 -8839,95 -8854,95 -8856,95 -8839,95 +8855,95 @@@@@@@@@ static DEFINE_MUTEX(rt_constraints_mute
        static unsigned long to_ratio(u64 period, u64 runtime)
        {
        	if (runtime == RUNTIME_INF)
 --     		return 1ULL << 16;
 ++     		return 1ULL << 20;
        
 --     	return div64_u64(runtime << 16, period);
 ++     	return div64_u64(runtime << 20, period);
        }
        
 --     #ifdef CONFIG_CGROUP_SCHED
 --     static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 ++     /* Must be called with tasklist_lock held */
 ++     static inline int tg_has_rt_tasks(struct task_group *tg)
        {
 --     	struct task_group *tgi, *parent = tg->parent;
 --     	unsigned long total = 0;
 ++     	struct task_struct *g, *p;
        
 --     	if (!parent) {
 --     		if (global_rt_period() < period)
 --     			return 0;
 ++     	do_each_thread(g, p) {
 ++     		if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
 ++     			return 1;
 ++     	} while_each_thread(g, p);
        
 --     		return to_ratio(period, runtime) <
 --     			to_ratio(global_rt_period(), global_rt_runtime());
 --     	}
 ++     	return 0;
 ++     }
        
 --     	if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
 --     		return 0;
 ++     struct rt_schedulable_data {
 ++     	struct task_group *tg;
 ++     	u64 rt_period;
 ++     	u64 rt_runtime;
 ++     };
        
 --     	rcu_read_lock();
 --     	list_for_each_entry_rcu(tgi, &parent->children, siblings) {
 --     		if (tgi == tg)
 --     			continue;
 ++     static int tg_schedulable(struct task_group *tg, void *data)
 ++     {
 ++     	struct rt_schedulable_data *d = data;
 ++     	struct task_group *child;
 ++     	unsigned long total, sum = 0;
 ++     	u64 period, runtime;
        
 --     		total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
 --     				tgi->rt_bandwidth.rt_runtime);
 ++     	period = ktime_to_ns(tg->rt_bandwidth.rt_period);
 ++     	runtime = tg->rt_bandwidth.rt_runtime;
 ++     
 ++     	if (tg == d->tg) {
 ++     		period = d->rt_period;
 ++     		runtime = d->rt_runtime;
        	}
 --     	rcu_read_unlock();
        
 --     	return total + to_ratio(period, runtime) <=
 --     		to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
 --     				parent->rt_bandwidth.rt_runtime);
 --     }
 --     #elif defined CONFIG_USER_SCHED
 --     static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 --     {
 --     	struct task_group *tgi;
 --     	unsigned long total = 0;
 --     	unsigned long global_ratio =
 --     		to_ratio(global_rt_period(), global_rt_runtime());
 ++     	/*
 ++     	 * Cannot have more runtime than the period.
 ++     	 */
 ++     	if (runtime > period && runtime != RUNTIME_INF)
 ++     		return -EINVAL;
        
 --     	rcu_read_lock();
 --     	list_for_each_entry_rcu(tgi, &task_groups, list) {
 --     		if (tgi == tg)
 --     			continue;
 ++     	/*
 ++     	 * Ensure we don't starve existing RT tasks.
 ++     	 */
 ++     	if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
 ++     		return -EBUSY;
 ++     
 ++     	total = to_ratio(period, runtime);
        
 --     		total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
 --     				tgi->rt_bandwidth.rt_runtime);
 ++     	/*
 ++     	 * Nobody can have more than the global setting allows.
 ++     	 */
 ++     	if (total > to_ratio(global_rt_period(), global_rt_runtime()))
 ++     		return -EINVAL;
 ++     
 ++     	/*
 ++     	 * The sum of our children's runtime should not exceed our own.
 ++     	 */
 ++     	list_for_each_entry_rcu(child, &tg->children, siblings) {
 ++     		period = ktime_to_ns(child->rt_bandwidth.rt_period);
 ++     		runtime = child->rt_bandwidth.rt_runtime;
 ++     
 ++     		if (child == d->tg) {
 ++     			period = d->rt_period;
 ++     			runtime = d->rt_runtime;
 ++     		}
 ++     
 ++     		sum += to_ratio(period, runtime);
        	}
 --     	rcu_read_unlock();
        
 --     	return total + to_ratio(period, runtime) < global_ratio;
 ++     	if (sum > total)
 ++     		return -EINVAL;
 ++     
 ++     	return 0;
        }
 --     #endif
        
 --     /* Must be called with tasklist_lock held */
 --     static inline int tg_has_rt_tasks(struct task_group *tg)
 ++     static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
        {
 --     	struct task_struct *g, *p;
 --     	do_each_thread(g, p) {
 --     		if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
 --     			return 1;
 --     	} while_each_thread(g, p);
 --     	return 0;
 ++     	struct rt_schedulable_data data = {
 ++     		.tg = tg,
 ++     		.rt_period = period,
 ++     		.rt_runtime = runtime,
 ++     	};
 ++     
 ++     	return walk_tg_tree(tg_schedulable, tg_nop, &data);
        }
        
        static int tg_set_bandwidth(struct task_group *tg,
@@@@@@@@@ -8954,9 -8829,14 -8829,14 -8950,9 -8937,9 -8952,9 -8954,9 -8937,9 +8953,9 @@@@@@@@@
        
        	mutex_lock(&rt_constraints_mutex);
        	read_lock(&tasklist_lock);
 --     	if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
 --     		err = -EBUSY;
 --     		goto unlock;
 --     	}
 --     	if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
 --     		err = -EINVAL;
 ++     	err = __rt_schedulable(tg, rt_period, rt_runtime);
 ++     	if (err)
        		goto unlock;
 --     	}
        
        	spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
        	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@@@@@@@@ -9025,25 -8905,16 -8905,19 -9021,25 -9008,25 -9023,25 -9025,25 -9008,25 +9024,25 @@@@@@@@@ long sched_group_rt_period(struct task_
        
        static int sched_rt_global_constraints(void)
        {
 --     	struct task_group *tg = &root_task_group;
 --     	u64 rt_runtime, rt_period;
 ++     	u64 runtime, period;
        	int ret = 0;
        
 -      	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
 -      	rt_runtime = tg->rt_bandwidth.rt_runtime;
 +      	if (sysctl_sched_rt_period <= 0)
 +      		return -EINVAL;
 +      
  -     	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
  -     	rt_runtime = tg->rt_bandwidth.rt_runtime;
 ++     	runtime = global_rt_runtime();
 ++     	period = global_rt_period();
 ++     
 ++     	/*
 ++     	 * Sanity check on the sysctl variables.
 ++     	 */
 ++     	if (runtime > period && runtime != RUNTIME_INF)
 ++     		return -EINVAL;
        
        	mutex_lock(&rt_constraints_mutex);
 --     	if (!__rt_schedulable(tg, rt_period, rt_runtime))
 --     		ret = -EINVAL;
 ++     	read_lock(&tasklist_lock);
 ++     	ret = __rt_schedulable(NULL, 0, 0);
 ++     	read_unlock(&tasklist_lock);
        	mutex_unlock(&rt_constraints_mutex);
        
        	return ret;
@@@@@@@@@ -9054,9 -8925,6 -8928,9 -9050,9 -9037,9 -9052,9 -9054,9 -9037,9 +9053,9 @@@@@@@@@ static int sched_rt_global_constraints(
        	unsigned long flags;
        	int i;
        
 +      	if (sysctl_sched_rt_period <= 0)
 +      		return -EINVAL;
 +      
        	spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
        	for_each_possible_cpu(i) {
        		struct rt_rq *rt_rq = &cpu_rq(i)->rt;
@@@@@@@@@ -9117,6 -8985,7 -8991,7 -9113,6 -9100,6 -9115,6 -9117,6 -9100,6 +9116,6 @@@@@@@@@ cpu_cgroup_create(struct cgroup_subsys 
        
        	if (!cgrp->parent) {
        		/* This is early initialization for the top cgroup */
 --     		init_task_group.css.cgroup = cgrp;
        		return &init_task_group.css;
        	}
        
@@@@@@@@@ -9125,6 -8994,9 -9000,9 -9121,6 -9108,6 -9123,6 -9125,6 -9108,6 +9124,6 @@@@@@@@@
        	if (IS_ERR(tg))
        		return ERR_PTR(-ENOMEM);
        
 --     	/* Bind the cgroup to task_group object we just created */
 --     	tg->css.cgroup = cgrp;
 --     
        	return &tg->css;
        }
        
diff --combined kernel/softlockup.c
index 3953e4aed73,b9a528f2273,cb838ee93a8,3953e4aed73,3953e4aed73,3953e4aed73,3953e4aed73,3953e4aed73..884e6cd2769
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@@@@@@@@ -164,7 -164,7 -164,7 -164,7 -164,7 -164,7 -164,7 -164,7 +164,7 @@@@@@@@@ unsigned long __read_mostly sysctl_hung
        /*
         * Zero means infinite timeout - no checking done:
         */
- ------unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+ ++++++unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
        
        unsigned long __read_mostly sysctl_hung_task_warnings = 10;
        
@@@@@@@@@ -226,7 -226,7 -226,7 -226,7 -226,7 -226,7 -226,7 -226,7 +226,7 @@@@@@@@@ static void check_hung_uninterruptible_
        	 * If the system crashed already then all bets are off,
        	 * do not report extra hung tasks:
        	 */
 --     	if ((tainted & TAINT_DIE) || did_panic)
 ++     	if (test_taint(TAINT_DIE) || did_panic)
        		return;
        
        	read_lock(&tasklist_lock);
diff --combined lib/Kconfig.debug
index b0f239e443b,4116e10ea14,0b504814e37,b0f239e443b,b0f239e443b,b0f239e443b,b0f239e443b,b0f239e443b..1e3fd3e3436
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@@@@@@@ -495,15 -495,6 -495,6 -495,15 -495,15 -495,15 -495,15 -495,15 +495,15 @@@@@@@@@ config DEBUG_V
        
        	  If unsure, say N.
        
 ++     config DEBUG_VIRTUAL
 ++     	bool "Debug VM translations"
 ++     	depends on DEBUG_KERNEL && X86
 ++     	help
 ++     	  Enable some costly sanity checks in virtual to page code. This can
 ++     	  catch mistakes with virt_to_page() and friends.
 ++     
 ++     	  If unsure, say N.
 ++     
        config DEBUG_WRITECOUNT
        	bool "Debug filesystem writers count"
        	depends on DEBUG_KERNEL
@@@@@@@@@ -545,6 -536,16 -536,6 -545,6 -545,6 -545,6 -545,6 -545,6 +545,16 @@@@@@@@@ config DEBUG_S
        
        	  If unsure, say N.
        
+ ++++++config DEBUG_NOTIFIERS
+ ++++++	bool "Debug notifier call chains"
+ ++++++	depends on DEBUG_KERNEL
+ ++++++	help
+ ++++++	  Enable this to turn on sanity checking for notifier call chains.
+ ++++++	  This is most useful for kernel developers to make sure that
+ ++++++	  modules properly unregister themselves from notifier chains.
+ ++++++	  This is a relatively cheap check but if you care about maximum
+ ++++++	  performance, say N.
+ ++++++
        config FRAME_POINTER
        	bool "Compile the kernel with frame pointers"
        	depends on DEBUG_KERNEL && \
@@@@@@@@@ -606,19 -607,6 -597,6 -606,19 -606,19 -606,19 -606,19 -606,19 +616,19 @@@@@@@@@ config RCU_TORTURE_TEST_RUNNABL
        	  Say N here if you want the RCU torture tests to start only
        	  after being manually enabled via /proc.
        
 ++     config RCU_CPU_STALL_DETECTOR
 ++     	bool "Check for stalled CPUs delaying RCU grace periods"
 ++     	depends on CLASSIC_RCU
 ++     	default n
 ++     	help
 ++     	  This option causes RCU to printk information on which
 ++     	  CPUs are delaying the current grace period, but only when
 ++     	  the grace period extends for excessive time periods.
 ++     
 ++     	  Say Y if you want RCU to perform such checks.
 ++     
 ++     	  Say N if you are unsure.
 ++     
        config KPROBES_SANITY_TEST
        	bool "Kprobes sanity tests"
        	depends on DEBUG_KERNEL
@@@@@@@@@ -646,33 -634,6 -624,6 -646,33 -646,33 -646,33 -646,33 -646,33 +656,33 @@@@@@@@@ config BACKTRACE_SELF_TES
        
        	  Say N if you are unsure.
        
 ++     config DEBUG_BLOCK_EXT_DEVT
 ++             bool "Force extended block device numbers and spread them"
 ++     	depends on DEBUG_KERNEL
 ++     	depends on BLOCK
 ++     	default n
 ++     	help
 ++     	  BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
 ++     	  SOME DISTRIBUTIONS.  DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
 ++     	  YOU ARE DOING.  Distros, please enable this and fix whatever
 ++     	  is broken.
 ++     
 ++     	  Conventionally, block device numbers are allocated from
 ++     	  predetermined contiguous area.  However, extended block area
 ++     	  may introduce non-contiguous block device numbers.  This
 ++     	  option forces most block device numbers to be allocated from
 ++     	  the extended space and spreads them to discover kernel or
 ++     	  userland code paths which assume predetermined contiguous
 ++     	  device number allocation.
 ++     
 ++     	  Note that turning on this debug option shuffles all the
 ++     	  device numbers for all IDE and SCSI devices including libata
 ++     	  ones, so root partition specified using device number
 ++     	  directly (via rdev or root=MAJ:MIN) won't work anymore.
 ++     	  Textual device names (root=/dev/sdXn) will continue to work.
 ++     
 ++     	  Say N if you are unsure.
 ++     
        config LKDTM
        	tristate "Linux Kernel Dump Test Tool Module"
        	depends on DEBUG_KERNEL
@@@@@@@@@ -710,21 -671,10 -661,10 -710,21 -710,21 -710,21 -710,21 -710,21 +720,21 @@@@@@@@@ config FAIL_PAGE_ALLO
        
        config FAIL_MAKE_REQUEST
        	bool "Fault-injection capability for disk IO"
 --     	depends on FAULT_INJECTION
 ++     	depends on FAULT_INJECTION && BLOCK
        	help
        	  Provide fault-injection capability for disk IO.
        
 ++     config FAIL_IO_TIMEOUT
 ++     	bool "Faul-injection capability for faking disk interrupts"
 ++     	depends on FAULT_INJECTION && BLOCK
 ++     	help
 ++     	  Provide fault-injection capability on end IO handling. This
 ++     	  will make the block layer "forget" an interrupt as configured,
 ++     	  thus exercising the error handling.
 ++     
 ++     	  Only works with drivers that use the generic timeout handling,
 ++     	  for others it wont do anything.
 ++     
        config FAULT_INJECTION_DEBUG_FS
        	bool "Debugfs entries for fault-injection capabilities"
        	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
@@@@@@@@@ -812,61 -762,6 -752,6 -812,61 -812,61 -812,61 -812,61 -812,61 +822,61 @@@@@@@@@ menuconfig BUILD_DOCSR
        
        	  Say N if you are unsure.
        
 ++     config DYNAMIC_PRINTK_DEBUG
 ++     	bool "Enable dynamic printk() call support"
 ++     	default n
 ++     	depends on PRINTK
 ++     	select PRINTK_DEBUG
 ++     	help
 ++     
 ++     	  Compiles debug level messages into the kernel, which would not
 ++     	  otherwise be available at runtime. These messages can then be
 ++     	  enabled/disabled on a per module basis. This mechanism implicitly
 ++     	  enables all pr_debug() and dev_dbg() calls. The impact of this
 ++     	  compile option is a larger kernel text size of about 2%.
 ++     
 ++     	  Usage:
 ++     
 ++     	  Dynamic debugging is controlled by the debugfs file,
 ++     	  dynamic_printk/modules. This file contains a list of the modules that
 ++     	  can be enabled. The format of the file is the module name, followed
 ++     	  by a set of flags that can be enabled. The first flag is always the
 ++     	  'enabled' flag. For example:
 ++     
 ++     		<module_name> <enabled=0/1>
 ++     				.
 ++     				.
 ++     				.
 ++     
 ++     	  <module_name> : Name of the module in which the debug call resides
 ++     	  <enabled=0/1> : whether the messages are enabled or not
 ++     
 ++     	  From a live system:
 ++     
 ++     		snd_hda_intel enabled=0
 ++     		fixup enabled=0
 ++     		driver enabled=0
 ++     
 ++     	  Enable a module:
 ++     
 ++     	  	$echo "set enabled=1 <module_name>" > dynamic_printk/modules
 ++     
 ++     	  Disable a module:
 ++     
 ++     	  	$echo "set enabled=0 <module_name>" > dynamic_printk/modules
 ++     
 ++     	  Enable all modules:
 ++     
 ++     		$echo "set enabled=1 all" > dynamic_printk/modules
 ++     
 ++     	  Disable all modules:
 ++     
 ++     		$echo "set enabled=0 all" > dynamic_printk/modules
 ++     
 ++     	  Finally, passing "dynamic_printk" at the command line enables
 ++     	  debugging for all modules. This mode can be turned off via the above
 ++     	  disable command.
 ++     
        source "samples/Kconfig"
        
        source "lib/Kconfig.kgdb"