Format: <area>[,<node>]
                        See also Documentation/networking/decnet.txt.
 
+       delayacct       [KNL] Enable per-task delay accounting
+
        dhash_entries=  [KNL]
                        Set number of hash buckets for dentry cache.
 
 
--- /dev/null
+/* delayacct.h - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_DELAYACCT_H
+#define _LINUX_DELAYACCT_H
+
+#include <linux/sched.h>
+
+#ifdef CONFIG_TASK_DELAY_ACCT
+
+extern int delayacct_on;       /* Delay accounting turned on/off */
+extern kmem_cache_t *delayacct_cache;
+extern void delayacct_init(void);
+extern void __delayacct_tsk_init(struct task_struct *);
+extern void __delayacct_tsk_exit(struct task_struct *);
+
+static inline void delayacct_set_flag(int flag)
+{
+       if (current->delays)
+               current->delays->flags |= flag;
+}
+
+static inline void delayacct_clear_flag(int flag)
+{
+       if (current->delays)
+               current->delays->flags &= ~flag;
+}
+
+static inline void delayacct_tsk_init(struct task_struct *tsk)
+{
+       /* reinitialize in case parent's non-null pointer was dup'ed*/
+       tsk->delays = NULL;
+       if (unlikely(delayacct_on))
+               __delayacct_tsk_init(tsk);
+}
+
+static inline void delayacct_tsk_exit(struct task_struct *tsk)
+{
+       if (tsk->delays)
+               __delayacct_tsk_exit(tsk);
+}
+
+#else
+static inline void delayacct_set_flag(int flag)
+{}
+static inline void delayacct_clear_flag(int flag)
+{}
+static inline void delayacct_init(void)
+{}
+static inline void delayacct_tsk_init(struct task_struct *tsk)
+{}
+static inline void delayacct_tsk_exit(struct task_struct *tsk)
+{}
+#endif /* CONFIG_TASK_DELAY_ACCT */
+
+#endif
 
 extern struct file_operations proc_schedstat_operations;
 #endif
 
+#ifdef CONFIG_TASK_DELAY_ACCT
+struct task_delay_info {
+       spinlock_t      lock;
+       unsigned int    flags;  /* Private per-task flags */
+
+       /* For each stat XXX, add following, aligned appropriately
+        *
+        * struct timespec XXX_start, XXX_end;
+        * u64 XXX_delay;
+        * u32 XXX_count;
+        *
+        * Atomicity of updates to XXX_delay, XXX_count protected by
+        * single lock above (split into XXX_lock if contention is an issue).
+        */
+};
+#endif
+
 enum idle_type
 {
        SCHED_IDLE,
         * cache last used pipe for splice
         */
        struct pipe_inode_info *splice_pipe;
+#ifdef CONFIG_TASK_DELAY_ACCT
+       struct task_delay_info *delays;
+#endif
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
 
 
 extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
 
+/*
+ * sub = lhs - rhs, in normalized form
+ */
+static inline struct timespec timespec_sub(struct timespec lhs,
+                                               struct timespec rhs)
+{
+       struct timespec ts_delta;
+       set_normalized_timespec(&ts_delta, lhs.tv_sec - rhs.tv_sec,
+                               lhs.tv_nsec - rhs.tv_nsec);
+       return ts_delta;
+}
+
 /*
  * Returns true if the timespec is norm, false if denorm:
  */
 
          for processing it. A preliminary version of these tools is available
          at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>.
 
+config TASK_DELAY_ACCT
+       bool "Enable per-task delay accounting (EXPERIMENTAL)"
+       help
+         Collect information on time spent by a task waiting for system
+         resources like cpu, synchronous block I/O completion and swapping
+         in pages. Such statistics can help in setting a task's priorities
+         relative to other tasks for cpu, io, rss limits etc.
+
+         Say N if unsure.
+
 config SYSCTL
        bool "Sysctl support" if EMBEDDED
        default y
 
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/efi.h>
+#include <linux/delayacct.h>
 #include <linux/unistd.h>
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
        proc_root_init();
 #endif
        cpuset_init();
+       delayacct_init();
 
        check_bugs();
 
 
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 
--- /dev/null
+/* delayacct.c - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/sysctl.h>
+#include <linux/delayacct.h>
+
+int delayacct_on __read_mostly;        /* Delay accounting turned on/off */
+kmem_cache_t *delayacct_cache;
+
+static int __init delayacct_setup_enable(char *str)
+{
+       delayacct_on = 1;
+       return 1;
+}
+__setup("delayacct", delayacct_setup_enable);
+
+void delayacct_init(void)
+{
+       delayacct_cache = kmem_cache_create("delayacct_cache",
+                                       sizeof(struct task_delay_info),
+                                       0,
+                                       SLAB_PANIC,
+                                       NULL, NULL);
+       delayacct_tsk_init(&init_task);
+}
+
+void __delayacct_tsk_init(struct task_struct *tsk)
+{
+       tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
+       if (tsk->delays)
+               spin_lock_init(&tsk->delays->lock);
+}
+
+void __delayacct_tsk_exit(struct task_struct *tsk)
+{
+       kmem_cache_free(delayacct_cache, tsk->delays);
+       tsk->delays = NULL;
+}
+
+/*
+ * Start accounting for a delay statistic using
+ * its starting timestamp (@start)
+ */
+
+static inline void delayacct_start(struct timespec *start)
+{
+       do_posix_clock_monotonic_gettime(start);
+}
+
+/*
+ * Finish delay accounting for a statistic using
+ * its timestamps (@start, @end), accumalator (@total) and @count
+ */
+
+static void delayacct_end(struct timespec *start, struct timespec *end,
+                               u64 *total, u32 *count)
+{
+       struct timespec ts;
+       s64 ns;
+
+       do_posix_clock_monotonic_gettime(end);
+       ts = timespec_sub(*end, *start);
+       ns = timespec_to_ns(&ts);
+       if (ns < 0)
+               return;
+
+       spin_lock(¤t->delays->lock);
+       *total += ns;
+       (*count)++;
+       spin_unlock(¤t->delays->lock);
+}
+
 
 #include <linux/mount.h>
 #include <linux/proc_fs.h>
 #include <linux/mempolicy.h>
+#include <linux/delayacct.h>
 #include <linux/cpuset.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
 #endif
        if (unlikely(tsk->audit_context))
                audit_free(tsk);
+       delayacct_tsk_exit(tsk);
        exit_mm(tsk);
 
        if (group_dead)
 
 #include <linux/rmap.h>
 #include <linux/acct.h>
 #include <linux/cn_proc.h>
+#include <linux/delayacct.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
                goto bad_fork_cleanup_put_domain;
 
        p->did_exec = 0;
+       delayacct_tsk_init(p);  /* Must remain after dup_task_struct() */
        copy_flags(clone_flags, p);
        p->pid = pid;
        retval = -EFAULT;