Merge branches 'core/debug', 'core/futexes', 'core/locking', 'core/rcu', 'core/signal...

[linux-2.6-omap-h63xx.git] / kernel / exit.c
diff --git a/kernel/exit.c b/kernel/exit.c

index 16395644a98ff8c060b2f5fd776fe7abadd61c6a..30fcdf16737a2bb013a78b62bc6b7f1eb6eb165b 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -40,13 +40,13 @@
  #include <linux/cn_proc.h>
  #include <linux/mutex.h>
  #include <linux/futex.h>
-#include <linux/compat.h>
  #include <linux/pipe_fs_i.h>
  #include <linux/audit.h> /* for audit_free() */
  #include <linux/resource.h>
  #include <linux/blkdev.h>
  #include <linux/task_io_accounting_ops.h>
  #include <linux/tracehook.h>
+#include <trace/sched.h>
  
  #include <asm/uaccess.h>
  #include <asm/unistd.h>
@@ -112,8 +112,6 @@ static void __exit_signal(struct task_struct *tsk)
                  * We won't ever get here for the group leader, since it
                  * will have been the last reference on the signal_struct.
                  */
-               sig->utime = cputime_add(sig->utime, task_utime(tsk));
-               sig->stime = cputime_add(sig->stime, task_stime(tsk));
                 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
                 sig->min_flt += tsk->min_flt;
                 sig->maj_flt += tsk->maj_flt;
@@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk)
                 sig->inblock += task_io_get_inblock(tsk);
                 sig->oublock += task_io_get_oublock(tsk);
                 task_io_accounting_add(&sig->ioac, &tsk->ioac);
-               sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
                 sig = NULL; /* Marker for below. */
         }
  
@@ -143,13 +140,21 @@ static void __exit_signal(struct task_struct *tsk)
         if (sig) {
                 flush_sigqueue(&sig->shared_pending);
                 taskstats_tgid_free(sig);
+               /*
+                * Make sure ->signal can't go away under rq->lock,
+                * see account_group_exec_runtime().
+                */
+               task_rq_unlock_wait(tsk);
                 __cleanup_signal(sig);
         }
  }
  
  static void delayed_put_task_struct(struct rcu_head *rhp)
  {
-       put_task_struct(container_of(rhp, struct task_struct, rcu));
+       struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+       trace_sched_process_free(tsk);
+       put_task_struct(tsk);
  }
  
  
@@ -583,8 +588,6 @@ mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
          * If there are other users of the mm and the owner (us) is exiting
          * we need to find a new owner to take on the responsibility.
          */
-       if (!mm)
-               return 0;
         if (atomic_read(&mm->mm_users) <= 1)
                 return 0;
         if (mm->owner != p)
@@ -627,29 +630,38 @@ retry:
         } while_each_thread(g, c);
  
         read_unlock(&tasklist_lock);
+       /*
+        * We found no owner yet mm_users > 1: this implies that we are
+        * most likely racing with swapoff (try_to_unuse()) or /proc or
+        * ptrace or page migration (get_task_mm()).  Mark owner as NULL,
+        * so that subsystems can understand the callback and take action.
+        */
+       down_write(&mm->mmap_sem);
+       cgroup_mm_owner_callbacks(mm->owner, NULL);
+       mm->owner = NULL;
+       up_write(&mm->mmap_sem);
         return;
  
  assign_new_owner:
         BUG_ON(c == p);
         get_task_struct(c);
+       read_unlock(&tasklist_lock);
+       down_write(&mm->mmap_sem);
         /*
          * The task_lock protects c->mm from changing.
          * We always want mm->owner->mm == mm
          */
         task_lock(c);
-       /*
-        * Delay read_unlock() till we have the task_lock()
-        * to ensure that c does not slip away underneath us
-        */
-       read_unlock(&tasklist_lock);
         if (c->mm != mm) {
                 task_unlock(c);
+               up_write(&mm->mmap_sem);
                 put_task_struct(c);
                 goto retry;
         }
         cgroup_mm_owner_callbacks(mm->owner, c);
         mm->owner = c;
         task_unlock(c);
+       up_write(&mm->mmap_sem);
         put_task_struct(c);
  }
  #endif /* CONFIG_MM_OWNER */
@@ -1046,14 +1058,6 @@ NORET_TYPE void do_exit(long code)
                 exit_itimers(tsk->signal);
         }
         acct_collect(code, group_dead);
-#ifdef CONFIG_FUTEX
-       if (unlikely(tsk->robust_list))
-               exit_robust_list(tsk);
-#ifdef CONFIG_COMPAT
-       if (unlikely(tsk->compat_robust_list))
-               compat_exit_robust_list(tsk);
-#endif
-#endif
         if (group_dead)
                 tty_audit_exit();
         if (unlikely(tsk->audit_context))
@@ -1066,6 +1070,8 @@ NORET_TYPE void do_exit(long code)
  
         if (group_dead)
                 acct_process();
+       trace_sched_process_exit(tsk);
+
         exit_sem(tsk);
         exit_files(tsk);
         exit_fs(tsk);
@@ -1294,6 +1300,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
         if (likely(!traced)) {
                 struct signal_struct *psig;
                 struct signal_struct *sig;
+               struct task_cputime cputime;
  
                 /*
                  * The resource counters for the group leader are in its
@@ -1309,20 +1316,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
                  * need to protect the access to p->parent->signal fields,
                  * as other threads in the parent group can be right
                  * here reaping other children at the same time.
+                *
+                * We use thread_group_cputime() to get times for the thread
+                * group, which consolidates times for all threads in the
+                * group including the group leader.
                  */
+               thread_group_cputime(p, &cputime);
                 spin_lock_irq(&p->parent->sighand->siglock);
                 psig = p->parent->signal;
                 sig = p->signal;
                 psig->cutime =
                         cputime_add(psig->cutime,
-                       cputime_add(p->utime,
-                       cputime_add(sig->utime,
-                                   sig->cutime)));
+                       cputime_add(cputime.utime,
+                                   sig->cutime));
                 psig->cstime =
                         cputime_add(psig->cstime,
-                       cputime_add(p->stime,
-                       cputime_add(sig->stime,
-                                   sig->cstime)));
+                       cputime_add(cputime.stime,
+                                   sig->cstime));
                 psig->cgtime =
                         cputime_add(psig->cgtime,
                         cputime_add(p->gtime,
@@ -1667,6 +1677,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options,
         struct task_struct *tsk;
         int retval;
  
+       trace_sched_process_wait(pid);
+
         add_wait_queue(&current->signal->wait_chldexit,&wait);
  repeat:
         /*