cgroup: remove unused variable

[linux-2.6-omap-h63xx.git] / kernel / cgroup.c
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index a0123d75ec9a7624f897bc2984ca4009b61abfbc..35eebd5510c2165c6f8daa2cef1fecab9187d6d0 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
         struct cg_cgroup_link *link;
         struct cg_cgroup_link *saved_link;
  
-       write_lock(&css_set_lock);
         hlist_del(&cg->hlist);
         css_set_count--;
  
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
                 list_del(&link->cgrp_link_list);
                 kfree(link);
         }
-
-       write_unlock(&css_set_lock);
  }
  
-static void __release_css_set(struct kref *k, int taskexit)
+static void __put_css_set(struct css_set *cg, int taskexit)
  {
         int i;
-       struct css_set *cg = container_of(k, struct css_set, ref);
-
+       /*
+        * Ensure that the refcount doesn't hit zero while any readers
+        * can see it. Similar to atomic_dec_and_lock(), but for an
+        * rwlock
+        */
+       if (atomic_add_unless(&cg->refcount, -1, 1))
+               return;
+       write_lock(&css_set_lock);
+       if (!atomic_dec_and_test(&cg->refcount)) {
+               write_unlock(&css_set_lock);
+               return;
+       }
         unlink_css_set(cg);
+       write_unlock(&css_set_lock);
  
         rcu_read_lock();
         for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
         kfree(cg);
  }
  
-static void release_css_set(struct kref *k)
-{
-       __release_css_set(k, 0);
-}
-
-static void release_css_set_taskexit(struct kref *k)
-{
-       __release_css_set(k, 1);
-}
-
  /*
   * refcounted get/put for css_set objects
   */
  static inline void get_css_set(struct css_set *cg)
  {
-       kref_get(&cg->ref);
+       atomic_inc(&cg->refcount);
  }
  
  static inline void put_css_set(struct css_set *cg)
  {
-       kref_put(&cg->ref, release_css_set);
+       __put_css_set(cg, 0);
  }
  
  static inline void put_css_set_taskexit(struct css_set *cg)
  {
-       kref_put(&cg->ref, release_css_set_taskexit);
+       __put_css_set(cg, 1);
  }
  
  /*
@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
                 return NULL;
         }
  
-       kref_init(&res->ref);
+       atomic_set(&res->refcount, 1);
         INIT_LIST_HEAD(&res->cg_links);
         INIT_LIST_HEAD(&res->tasks);
         INIT_HLIST_NODE(&res->hlist);
@@ -870,6 +868,14 @@ static struct super_operations cgroup_ops = {
         .remount_fs = cgroup_remount,
  };
  
+static void init_cgroup_housekeeping(struct cgroup *cgrp)
+{
+       INIT_LIST_HEAD(&cgrp->sibling);
+       INIT_LIST_HEAD(&cgrp->children);
+       INIT_LIST_HEAD(&cgrp->css_sets);
+       INIT_LIST_HEAD(&cgrp->release_list);
+       init_rwsem(&cgrp->pids_mutex);
+}
  static void init_cgroup_root(struct cgroupfs_root *root)
  {
         struct cgroup *cgrp = &root->top_cgroup;
@@ -878,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
         root->number_of_cgroups = 1;
         cgrp->root = root;
         cgrp->top_cgroup = cgrp;
-       INIT_LIST_HEAD(&cgrp->sibling);
-       INIT_LIST_HEAD(&cgrp->children);
-       INIT_LIST_HEAD(&cgrp->css_sets);
-       INIT_LIST_HEAD(&cgrp->release_list);
+       init_cgroup_housekeeping(cgrp);
  }
  
  static int cgroup_test_super(struct super_block *sb, void *data)
@@ -1728,7 +1731,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
  
         read_lock(&css_set_lock);
         list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
-               count += atomic_read(&link->cg->ref.refcount);
+               count += atomic_read(&link->cg->refcount);
         }
         read_unlock(&css_set_lock);
         return count;
@@ -1997,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
   * but we cannot guarantee that the information we produce is correct
   * unless we produce it entirely atomically.
   *
- * Upon tasks file open(), a struct ctr_struct is allocated, that
- * will have a pointer to an array (also allocated here).  The struct
- * ctr_struct * is stored in file->private_data.  Its resources will
- * be freed by release() when the file is closed.  The array is used
- * to sprintf the PIDs and then used by read().
   */
-struct ctr_struct {
-       char *buf;
-       int bufsz;
-};
  
  /*
   * Load into 'pidarray' up to 'npids' of the tasks using cgroup
@@ -2088,42 +2082,132 @@ static int cmppid(const void *a, const void *b)
         return *(pid_t *)a - *(pid_t *)b;
  }
  
+
  /*
- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
- * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
- * count 'cnt' of how many chars would be written if buf were large enough.
+ * seq_file methods for the "tasks" file. The seq_file position is the
+ * next pid to display; the seq_file iterator is a pointer to the pid
+ * in the cgroup->tasks_pids array.
   */
-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+
+static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
  {
-       int cnt = 0;
-       int i;
+       /*
+        * Initially we receive a position value that corresponds to
+        * one more than the last pid shown (or 0 on the first call or
+        * after a seek to the start). Use a binary-search to find the
+        * next pid to display, if any
+        */
+       struct cgroup *cgrp = s->private;
+       int index = 0, pid = *pos;
+       int *iter;
+
+       down_read(&cgrp->pids_mutex);
+       if (pid) {
+               int end = cgrp->pids_length;
+
+               while (index < end) {
+                       int mid = (index + end) / 2;
+                       if (cgrp->tasks_pids[mid] == pid) {
+                               index = mid;
+                               break;
+                       } else if (cgrp->tasks_pids[mid] <= pid)
+                               index = mid + 1;
+                       else
+                               end = mid;
+               }
+       }
+       /* If we're off the end of the array, we're done */
+       if (index >= cgrp->pids_length)
+               return NULL;
+       /* Update the abstract position to be the actual pid that we found */
+       iter = cgrp->tasks_pids + index;
+       *pos = *iter;
+       return iter;
+}
+
+static void cgroup_tasks_stop(struct seq_file *s, void *v)
+{
+       struct cgroup *cgrp = s->private;
+       up_read(&cgrp->pids_mutex);
+}
+
+static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       struct cgroup *cgrp = s->private;
+       int *p = v;
+       int *end = cgrp->tasks_pids + cgrp->pids_length;
+
+       /*
+        * Advance to the next pid in the array. If this goes off the
+        * end, we're done
+        */
+       p++;
+       if (p >= end) {
+               return NULL;
+       } else {
+               *pos = *p;
+               return p;
+       }
+}
+
+static int cgroup_tasks_show(struct seq_file *s, void *v)
+{
+       return seq_printf(s, "%d\n", *(int *)v);
+}
+
+static struct seq_operations cgroup_tasks_seq_operations = {
+       .start = cgroup_tasks_start,
+       .stop = cgroup_tasks_stop,
+       .next = cgroup_tasks_next,
+       .show = cgroup_tasks_show,
+};
+
+static void release_cgroup_pid_array(struct cgroup *cgrp)
+{
+       down_write(&cgrp->pids_mutex);
+       BUG_ON(!cgrp->pids_use_count);
+       if (!--cgrp->pids_use_count) {
+               kfree(cgrp->tasks_pids);
+               cgrp->tasks_pids = NULL;
+               cgrp->pids_length = 0;
+       }
+       up_write(&cgrp->pids_mutex);
+}
+
+static int cgroup_tasks_release(struct inode *inode, struct file *file)
+{
+       struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
  
-       for (i = 0; i < npids; i++)
-               cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
-       return cnt;
+       if (!(file->f_mode & FMODE_READ))
+               return 0;
+
+       release_cgroup_pid_array(cgrp);
+       return seq_release(inode, file);
  }
  
+static struct file_operations cgroup_tasks_operations = {
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .write = cgroup_file_write,
+       .release = cgroup_tasks_release,
+};
+
  /*
- * Handle an open on 'tasks' file.  Prepare a buffer listing the
+ * Handle an open on 'tasks' file.  Prepare an array containing the
   * process id's of tasks currently attached to the cgroup being opened.
- *
- * Does not require any specific cgroup mutexes, and does not take any.
   */
+
  static int cgroup_tasks_open(struct inode *unused, struct file *file)
  {
         struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-       struct ctr_struct *ctr;
         pid_t *pidarray;
         int npids;
-       char c;
+       int retval;
  
+       /* Nothing to do for write-only files */
         if (!(file->f_mode & FMODE_READ))
                 return 0;
  
-       ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
-       if (!ctr)
-               goto err0;
-
         /*
          * If cgroup gets more users after we read count, we won't have
          * enough space - tough.  This race is indistinguishable to the
@@ -2131,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
          * show up until sometime later on.
          */
         npids = cgroup_task_count(cgrp);
-       if (npids) {
-               pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-               if (!pidarray)
-                       goto err1;
-
-               npids = pid_array_load(pidarray, npids, cgrp);
-               sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-               /* Call pid_array_to_buf() twice, first just to get bufsz */
-               ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
-               ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
-               if (!ctr->buf)
-                       goto err2;
-               ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
-
-               kfree(pidarray);
-       } else {
-               ctr->buf = NULL;
-               ctr->bufsz = 0;
-       }
-       file->private_data = ctr;
-       return 0;
-
-err2:
-       kfree(pidarray);
-err1:
-       kfree(ctr);
-err0:
-       return -ENOMEM;
-}
-
-static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
-                                   struct cftype *cft,
-                                   struct file *file, char __user *buf,
-                                   size_t nbytes, loff_t *ppos)
-{
-       struct ctr_struct *ctr = file->private_data;
+       pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+       if (!pidarray)
+               return -ENOMEM;
+       npids = pid_array_load(pidarray, npids, cgrp);
+       sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
  
-       return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
-}
+       /*
+        * Store the array in the cgroup, freeing the old
+        * array if necessary
+        */
+       down_write(&cgrp->pids_mutex);
+       kfree(cgrp->tasks_pids);
+       cgrp->tasks_pids = pidarray;
+       cgrp->pids_length = npids;
+       cgrp->pids_use_count++;
+       up_write(&cgrp->pids_mutex);
  
-static int cgroup_tasks_release(struct inode *unused_inode,
-                                       struct file *file)
-{
-       struct ctr_struct *ctr;
+       file->f_op = &cgroup_tasks_operations;
  
-       if (file->f_mode & FMODE_READ) {
-               ctr = file->private_data;
-               kfree(ctr->buf);
-               kfree(ctr);
+       retval = seq_open(file, &cgroup_tasks_seq_operations);
+       if (retval) {
+               release_cgroup_pid_array(cgrp);
+               return retval;
         }
+       ((struct seq_file *)file->private_data)->private = cgrp;
         return 0;
  }
  
@@ -2210,7 +2268,6 @@ static struct cftype files[] = {
         {
                 .name = "tasks",
                 .open = cgroup_tasks_open,
-               .read = cgroup_tasks_read,
                 .write_u64 = cgroup_tasks_write,
                 .release = cgroup_tasks_release,
                 .private = FILE_TASKLIST,
@@ -2300,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
  
         mutex_lock(&cgroup_mutex);
  
-       INIT_LIST_HEAD(&cgrp->sibling);
-       INIT_LIST_HEAD(&cgrp->children);
-       INIT_LIST_HEAD(&cgrp->css_sets);
-       INIT_LIST_HEAD(&cgrp->release_list);
+       init_cgroup_housekeeping(cgrp);
  
         cgrp->parent = parent;
         cgrp->root = parent->root;
@@ -2495,8 +2549,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
  int __init cgroup_init_early(void)
  {
         int i;
-       kref_init(&init_css_set.ref);
-       kref_get(&init_css_set.ref);
+       atomic_set(&init_css_set.refcount, 1);
         INIT_LIST_HEAD(&init_css_set.cg_links);
         INIT_LIST_HEAD(&init_css_set.tasks);
         INIT_HLIST_NODE(&init_css_set.hlist);
@@ -2735,6 +2788,8 @@ void cgroup_fork_callbacks(struct task_struct *child)
   * Called on every change to mm->owner. mm_init_owner() does not
   * invoke this routine, since it assigns the mm->owner the first time
   * and does not change it.
+ *
+ * The callbacks are invoked with mmap_sem held in read mode.
   */
  void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
  {
@@ -2750,7 +2805,7 @@ void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
                         if (oldcgrp == newcgrp)
                                 continue;
                         if (ss->mm_owner_changed)
-                               ss->mm_owner_changed(ss, oldcgrp, newcgrp);
+                               ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
                 }
         }
  }