Merge branch 'linus' into core/softlockup

author Ingo Molnar <mingo@elte.hu>

Thu, 12 Feb 2009 12:08:57 +0000 (13:08 +0100)

committer Ingo Molnar <mingo@elte.hu>

Thu, 12 Feb 2009 12:08:57 +0000 (13:08 +0100)
author Ingo Molnar <mingo@elte.hu>
Thu, 12 Feb 2009 12:08:57 +0000 (13:08 +0100)
committer Ingo Molnar <mingo@elte.hu>
Thu, 12 Feb 2009 12:08:57 +0000 (13:08 +0100)
diff --combined include/linux/sched.h

index e0d723fea9f506f3878a5d250691cf96981aba2c,8981e52c714f05f19ad5727031058509e06bf6a8..d05e2b3ae41af174282e1d5702e20df334408562
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -297,11 -297,17 +297,11 @@@ extern int proc_dosoftlockup_thresh(str
                                     struct file *filp, void __user *buffer,
                                     size_t *lenp, loff_t *ppos);
   extern unsigned int  softlockup_panic;
- -extern unsigned long sysctl_hung_task_check_count;
- -extern unsigned long sysctl_hung_task_timeout_secs;
- -extern unsigned long sysctl_hung_task_warnings;
   extern int softlockup_thresh;
   #else
   static inline void softlockup_tick(void)
   {
   }
- -static inline void spawn_softlockup_task(void)
- -{
- -}
   static inline void touch_softlockup_watchdog(void)
   {
   }
@@@ -310,15 -316,6 +310,15 @@@ static inline void touch_all_softlockup
   }
   #endif
   
+ +#ifdef CONFIG_DETECT_HUNG_TASK
+ +extern unsigned int  sysctl_hung_task_panic;
+ +extern unsigned long sysctl_hung_task_check_count;
+ +extern unsigned long sysctl_hung_task_timeout_secs;
+ +extern unsigned long sysctl_hung_task_warnings;
+ +extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+ +                                       struct file *filp, void __user *buffer,
+ +                                       size_t *lenp, loff_t *ppos);
+ +#endif
   
   /* Attach to any functions which should be ignored in wchan output. */
   #define __sched               __attribute__((__section__(".sched.text")))
@@@ -462,16 -459,27 +462,27 @@@ struct task_cputime 
   #define virt_exp      utime
   #define sched_exp     sum_exec_runtime
   
+ #define INIT_CPUTIME  \
+       (struct task_cputime) {                                 \
+               .utime = cputime_zero,                          \
+               .stime = cputime_zero,                          \
+               .sum_exec_runtime = 0,                          \
+       }
+ 
   /**
-  * struct thread_group_cputime - thread group interval timer counts
-  * @totals:           thread group interval timers; substructure for
-  *                    uniprocessor kernel, per-cpu for SMP kernel.
+  * struct thread_group_cputimer - thread group interval timer counts
+  * @cputime:          thread group interval timers.
+  * @running:          non-zero when there are timers running and
+  *                    @cputime receives updates.
+  * @lock:             lock for fields in this struct.
    *
    * This structure contains the version of task_cputime, above, that is
-  * used for thread group CPU clock calculations.
+  * used for thread group CPU timer calculations.
    */
- struct thread_group_cputime {
-       struct task_cputime *totals;
+ struct thread_group_cputimer {
+       struct task_cputime cputime;
+       int running;
+       spinlock_t lock;
   };
   
   /*
@@@ -520,10 -528,10 +531,10 @@@ struct signal_struct 
         cputime_t it_prof_incr, it_virt_incr;
   
         /*
-        * Thread group totals for process CPU clocks.
-        * See thread_group_cputime(), et al, for details.
+        * Thread group totals for process CPU timers.
+        * See thread_group_cputimer(), et al, for details.
          */
-       struct thread_group_cputime cputime;
+       struct thread_group_cputimer cputimer;
   
         /* Earliest-expiration cache. */
         struct task_cputime cputime_expires;
@@@ -560,7 -568,7 +571,7 @@@
          * Live threads maintain their own counters and add to these
          * in __exit_signal, except for the group leader.
          */
-       cputime_t cutime, cstime;
+       cputime_t utime, stime, cutime, cstime;
         cputime_t gtime;
         cputime_t cgtime;
         unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@@ -568,6 -576,14 +579,14 @@@
         unsigned long inblock, oublock, cinblock, coublock;
         struct task_io_accounting ioac;
   
+       /*
+        * Cumulative ns of schedule CPU time fo dead threads in the
+        * group, not including a zombie group leader, (This only differs
+        * from jiffies_to_ns(utime + stime) if sched_clock uses something
+        * other than jiffies.)
+        */
+       unsigned long long sum_sched_runtime;
+ 
         /*
          * We don't bother to synchronize most readers of this at all,
          * because there is no reader checking a limit that actually needs
@@@ -632,7 -648,6 +651,6 @@@ struct user_struct 
         atomic_t inotify_devs;  /* How many inotify devs does this user have opened? */
   #endif
   #ifdef CONFIG_EPOLL
-       atomic_t epoll_devs;    /* The number of epoll descriptors currently open */
         atomic_t epoll_watches; /* The number of file descriptors currently watched */
   #endif
   #ifdef CONFIG_POSIX_MQUEUE
@@@ -1239,8 -1254,9 +1257,8 @@@ struct task_struct 
   /* ipc stuff */
         struct sysv_sem sysvsem;
   #endif
- -#ifdef CONFIG_DETECT_SOFTLOCKUP
+ +#ifdef CONFIG_DETECT_HUNG_TASK
   /* hung task detection */
- -      unsigned long last_switch_timestamp;
         unsigned long last_switch_count;
   #endif
   /* CPU-specific state of this task */
@@@ -2184,25 -2200,18 +2202,18 @@@ static inline int spin_needbreak(spinlo
   /*
    * Thread group CPU time accounting.
    */
- 
- extern int thread_group_cputime_alloc(struct task_struct *);
- extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
+ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
+ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
   
   static inline void thread_group_cputime_init(struct signal_struct *sig)
   {
-       sig->cputime.totals = NULL;
- }
- 
- static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
- {
-       if (curr->signal->cputime.totals)
-               return 0;
-       return thread_group_cputime_alloc(curr);
+       sig->cputimer.cputime = INIT_CPUTIME;
+       spin_lock_init(&sig->cputimer.lock);
+       sig->cputimer.running = 0;
   }
   
   static inline void thread_group_cputime_free(struct signal_struct *sig)
   {
-       free_percpu(sig->cputime.totals);
   }
   
   /*
diff --combined kernel/Makefile

index 979745f1b4bc67ad8ff90c5664f1804e66f426d0,170a9213c1b68d6cee652fb7758fef556ecf127f..ae6565b3dced1c8bd646210d55cc7a48af93ec22
--- 1/kernel/Makefile
--- 2/kernel/Makefile
+++ b/kernel/Makefile
@@@ -40,9 -40,8 +40,8 @@@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.
   obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
   obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
   obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
- ifeq ($(CONFIG_USE_GENERIC_SMP_HELPERS),y)
- obj-y += smp.o
- else
+ obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
+ ifneq ($(CONFIG_SMP),y)
   obj-y += up.o
   endif
   obj-$(CONFIG_SMP) += spinlock.o
@@@ -74,7 -73,6 +73,7 @@@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.
   obj-$(CONFIG_KPROBES) += kprobes.o
   obj-$(CONFIG_KGDB) += kgdb.o
   obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+ +obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
   obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
   obj-$(CONFIG_SECCOMP) += seccomp.o
   obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --combined kernel/fork.c

index bf582f75014b5d1ba7f42c85aff366177a8ce37f,a66fbde20715bb2d93180b7b3d03541d31d9c068..3b5dcf9a66aab7e29e8a69e5b1695bb972f2217c
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -639,9 -639,6 +639,9 @@@ static int copy_mm(unsigned long clone_
   
         tsk->min_flt = tsk->maj_flt = 0;
         tsk->nvcsw = tsk->nivcsw = 0;
+ +#ifdef CONFIG_DETECT_HUNG_TASK
+ +      tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+ +#endif
   
         tsk->mm = NULL;
         tsk->active_mm = NULL;
@@@ -820,17 -817,17 +820,17 @@@ static void posix_cpu_timers_init_group
   static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
   {
         struct signal_struct *sig;
-       int ret;
   
         if (clone_flags & CLONE_THREAD) {
-               ret = thread_group_cputime_clone_thread(current);
-               if (likely(!ret)) {
-                       atomic_inc(&current->signal->count);
-                       atomic_inc(&current->signal->live);
-               }
-               return ret;
+               atomic_inc(&current->signal->count);
+               atomic_inc(&current->signal->live);
+               return 0;
         }
         sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+ 
+       if (sig)
+               posix_cpu_timers_init_group(sig);
+ 
         tsk->signal = sig;
         if (!sig)
                 return -ENOMEM;
@@@ -854,21 -851,20 +854,20 @@@
         sig->tty_old_pgrp = NULL;
         sig->tty = NULL;
   
-       sig->cutime = sig->cstime = cputime_zero;
+       sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
         sig->gtime = cputime_zero;
         sig->cgtime = cputime_zero;
         sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
         sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
         sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
         task_io_accounting_init(&sig->ioac);
+       sig->sum_sched_runtime = 0;
         taskstats_tgid_init(sig);
   
         task_lock(current->group_leader);
         memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
         task_unlock(current->group_leader);
   
-       posix_cpu_timers_init_group(sig);
- 
         acct_init_pacct(&sig->pacct);
   
         tty_audit_fork(sig);
@@@ -904,7 -900,7 +903,7 @@@ static void copy_flags(unsigned long cl
         clear_freeze_flag(p);
   }
   
- asmlinkage long sys_set_tid_address(int __user *tidptr)
+ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
   {
         current->clear_child_tid = tidptr;
   
@@@ -1010,6 -1006,7 +1009,7 @@@ static struct task_struct *copy_process
          * triggers too late. This doesn't hurt, the check is only there
          * to stop root fork bombs.
          */
+       retval = -EAGAIN;
         if (nr_threads >= max_threads)
                 goto bad_fork_cleanup_count;
   
@@@ -1044,6 -1041,11 +1044,6 @@@
   
         p->default_timer_slack_ns = current->timer_slack_ns;
   
- -#ifdef CONFIG_DETECT_SOFTLOCKUP
- -      p->last_switch_count = 0;
- -      p->last_switch_timestamp = 0;
- -#endif
- -
         task_io_accounting_init(&p->ioac);
         acct_clear_integrals(p);
   
@@@ -1093,7 -1095,7 +1093,7 @@@
   #ifdef CONFIG_DEBUG_MUTEXES
         p->blocked_on = NULL; /* not blocked yet */
   #endif
-       if (unlikely(ptrace_reparented(current)))
+       if (unlikely(current->ptrace))
                 ptrace_fork(p, clone_flags);
   
         /* Perform scheduler related setup. Assign this task to a CPU. */
@@@ -1601,7 -1603,7 +1601,7 @@@ static int unshare_fd(unsigned long uns
    * constructed. Here we are modifying the current, active,
    * task_struct.
    */
- asmlinkage long sys_unshare(unsigned long unshare_flags)
+ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
   {
         int err = 0;
         struct fs_struct *fs, *new_fs = NULL;
diff --combined kernel/sysctl.c

index 2481ed30d2b5097e63b5c34aea381f1722726106,c5ef44ff850f5af111943d76a5484318d6a5bc35..3b6b54c8ac0dce3accc5e3b013d359932db5420b
--- 1/kernel/sysctl.c
--- 2/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@@ -101,6 -101,7 +101,7 @@@ static int two = 2
   
   static int zero;
   static int one = 1;
+ static unsigned long one_ul = 1;
   static int one_hundred = 100;
   
   /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@@ -144,6 -145,7 +145,7 @@@ extern int acct_parm[]
   
   #ifdef CONFIG_IA64
   extern int no_unaligned_warning;
+ extern int unaligned_dump_stack;
   #endif
   
   #ifdef CONFIG_RT_MUTEXES
@@@ -781,6 -783,14 +783,14 @@@ static struct ctl_table kern_table[] = 
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec,
         },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "unaligned-dump-stack",
+               .data           = &unaligned_dump_stack,
+               .maxlen         = sizeof (int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
   #endif
   #ifdef CONFIG_DETECT_SOFTLOCKUP
         {
@@@ -805,19 -815,6 +815,19 @@@
                 .extra1         = &neg_one,
                 .extra2         = &sixty,
         },
+ +#endif
+ +#ifdef CONFIG_DETECT_HUNG_TASK
+ +      {
+ +              .ctl_name       = CTL_UNNUMBERED,
+ +              .procname       = "hung_task_panic",
+ +              .data           = &sysctl_hung_task_panic,
+ +              .maxlen         = sizeof(int),
+ +              .mode           = 0644,
+ +              .proc_handler   = &proc_dointvec_minmax,
+ +              .strategy       = &sysctl_intvec,
+ +              .extra1         = &zero,
+ +              .extra2         = &one,
+ +      },
         {
                 .ctl_name       = CTL_UNNUMBERED,
                 .procname       = "hung_task_check_count",
@@@ -833,7 -830,7 +843,7 @@@
                 .data           = &sysctl_hung_task_timeout_secs,
                 .maxlen         = sizeof(unsigned long),
                 .mode           = 0644,
- -              .proc_handler   = &proc_doulongvec_minmax,
+ +              .proc_handler   = &proc_dohung_task_timeout_secs,
                 .strategy       = &sysctl_intvec,
         },
         {
@@@ -978,7 -975,7 +988,7 @@@ static struct ctl_table vm_table[] = 
                 .mode           = 0644,
                 .proc_handler   = &dirty_background_bytes_handler,
                 .strategy       = &sysctl_intvec,
-               .extra1         = &one,
+               .extra1         = &one_ul,
         },
         {
                 .ctl_name       = VM_DIRTY_RATIO,
@@@ -999,7 -996,7 +1009,7 @@@
                 .mode           = 0644,
                 .proc_handler   = &dirty_bytes_handler,
                 .strategy       = &sysctl_intvec,
-               .extra1         = &one,
+               .extra1         = &one_ul,
         },
         {
                 .procname       = "dirty_writeback_centisecs",
@@@ -1701,7 -1698,7 +1711,7 @@@ int do_sysctl(int __user *name, int nle
         return error;
   }
   
- asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
+ SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
   {
         struct __sysctl_args tmp;
         int error;
@@@ -3002,7 -2999,7 +3012,7 @@@ int sysctl_ms_jiffies(struct ctl_table 
   #else /* CONFIG_SYSCTL_SYSCALL */
   
   
- asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
+ SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
   {
         struct __sysctl_args tmp;
         int error;
diff --combined lib/Kconfig.debug

index 883ecea22f37510bd0f3b135953f1872d8df85f8,29044f500269c3b9ad9b834adc21a77557ac897b..4934eaa21e1e13b2bad4fa30e27a6fc91621aa69
--- 1/lib/Kconfig.debug
--- 2/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -186,44 -186,6 +186,44 @@@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALU
         default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
         default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
   
+ +config DETECT_HUNG_TASK
+ +      bool "Detect Hung Tasks"
+ +      depends on DEBUG_KERNEL
+ +      default y
+ +      help
+ +        Say Y here to enable the kernel to detect "hung tasks",
+ +        which are bugs that cause the task to be stuck in
+ +        uninterruptible "D" state indefinitiley.
+ +
+ +        When a hung task is detected, the kernel will print the
+ +        current stack trace (which you should report), but the
+ +        task will stay in uninterruptible state. If lockdep is
+ +        enabled then all held locks will also be reported. This
+ +        feature has negligible overhead.
+ +
+ +config BOOTPARAM_HUNG_TASK_PANIC
+ +      bool "Panic (Reboot) On Hung Tasks"
+ +      depends on DETECT_HUNG_TASK
+ +      help
+ +        Say Y here to enable the kernel to panic on "hung tasks",
+ +        which are bugs that cause the kernel to leave a task stuck
+ +        in uninterruptible "D" state.
+ +
+ +        The panic can be used in combination with panic_timeout,
+ +        to cause the system to reboot automatically after a
+ +        hung task has been detected. This feature is useful for
+ +        high-availability systems that have uptime guarantees and
+ +        where a hung tasks must be resolved ASAP.
+ +
+ +        Say N if unsure.
+ +
+ +config BOOTPARAM_HUNG_TASK_PANIC_VALUE
+ +      int
+ +      depends on DETECT_HUNG_TASK
+ +      range 0 1
+ +      default 0 if !BOOTPARAM_HUNG_TASK_PANIC
+ +      default 1 if BOOTPARAM_HUNG_TASK_PANIC
+ +
   config SCHED_DEBUG
         bool "Collect scheduler debugging info"
         depends on DEBUG_KERNEL && PROC_FS
@@@ -608,6 -570,15 +608,15 @@@ config DEBUG_NOTIFIER
           This is a relatively cheap check but if you care about maximum
           performance, say N.
   
+ #
+ # Select this config option from the architecture Kconfig, if it
+ # it is preferred to always offer frame pointers as a config
+ # option on the architecture (regardless of KERNEL_DEBUG):
+ #
+ config ARCH_WANT_FRAME_POINTERS
+       bool
+       help
+ 
   config FRAME_POINTER
         bool "Compile the kernel with frame pointers"
         depends on DEBUG_KERNEL && \
@@@ -669,19 -640,6 +678,6 @@@ config RCU_TORTURE_TEST_RUNNABL
           Say N here if you want the RCU torture tests to start only
           after being manually enabled via /proc.
   
- config RCU_CPU_STALL_DETECTOR
-       bool "Check for stalled CPUs delaying RCU grace periods"
-       depends on CLASSIC_RCU
-       default n
-       help
-         This option causes RCU to printk information on which
-         CPUs are delaying the current grace period, but only when
-         the grace period extends for excessive time periods.
- 
-         Say Y if you want RCU to perform such checks.
- 
-         Say N if you are unsure.
- 
   config RCU_CPU_STALL_DETECTOR
         bool "Check for stalled CPUs delaying RCU grace periods"
         depends on CLASSIC_RCU || TREE_RCU
author	Ingo Molnar <mingo@elte.hu>
	Thu, 12 Feb 2009 12:08:57 +0000 (13:08 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Thu, 12 Feb 2009 12:08:57 +0000 (13:08 +0100)
		1	2
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sysctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig.debug	patch \|	diff1 \|	diff2 \|	blob \| history