Merge branch 'for-linus' of git://git390.osdl.marist.edu/pub/scm/linux-2.6

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 11 Oct 2008 15:50:01 +0000 (08:50 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 11 Oct 2008 15:50:01 +0000 (08:50 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 11 Oct 2008 15:50:01 +0000 (08:50 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 11 Oct 2008 15:50:01 +0000 (08:50 -0700)
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl

index f5696ba9ae96dbccb3c02c44febacef6e9b87c68..9d0058e788e53430f1b33bf8ff40d3ff4f6f67b1 100644 (file)
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -283,6 +283,7 @@ X!Earch/x86/kernel/mca_32.c
    <chapter id="security">
       <title>Security Framework</title>
  !Isecurity/security.c
+!Esecurity/inode.c
    </chapter>
  
    <chapter id="audit">
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt

index cf5562cbe35642834f28d6fca3409dffebb5c0c4..6e253407b3dc1f83d85076dae39d405a29ec6716 100644 (file)
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -210,7 +210,7 @@ over a rather long period of time, but improvements are always welcome!
                 number of updates per grace period.
  
  9.     All RCU list-traversal primitives, which include
-       rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(),
+       rcu_dereference(), list_for_each_entry_rcu(),
         list_for_each_continue_rcu(), and list_for_each_safe_rcu(),
         must be either within an RCU read-side critical section or
         must be protected by appropriate update-side locks.  RCU
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt

index 451de2ad8329022e3164f26cfa138f9a05dc1d33..4202ad0931300fe776697a081494a433b54e3b61 100644 (file)
--- a/Documentation/RCU/rcuref.txt
+++ b/Documentation/RCU/rcuref.txt
@@ -29,9 +29,9 @@ release_referenced()                  delete()
                                         }
  
  If this list/array is made lock free using RCU as in changing the
-write_lock() in add() and delete() to spin_lock and changing read_lock
-in search_and_reference to rcu_read_lock(), the atomic_get in
-search_and_reference could potentially hold reference to an element which
+write_lock() in add() and delete() to spin_lock() and changing read_lock()
+in search_and_reference() to rcu_read_lock(), the atomic_inc() in
+search_and_reference() could potentially hold reference to an element which
  has already been deleted from the list/array.  Use atomic_inc_not_zero()
  in this scenario as follows:
  
@@ -40,20 +40,20 @@ add()                                       search_and_reference()
  {                                      {
      alloc_object                           rcu_read_lock();
      ...                                            search_for_element
-    atomic_set(&el->rc, 1);                if (atomic_inc_not_zero(&el->rc)) {
-    write_lock(&list_lock);                    rcu_read_unlock();
+    atomic_set(&el->rc, 1);                if (!atomic_inc_not_zero(&el->rc)) {
+    spin_lock(&list_lock);                     rcu_read_unlock();
                                                 return FAIL;
      add_element                                    }
      ...                                            ...
-    write_unlock(&list_lock);              rcu_read_unlock();
+    spin_unlock(&list_lock);               rcu_read_unlock();
  }                                      }
  3.                                     4.
  release_referenced()                   delete()
  {                                      {
-    ...                                            write_lock(&list_lock);
+    ...                                            spin_lock(&list_lock);
      if (atomic_dec_and_test(&el->rc))       ...
          call_rcu(&el->head, el_free);       delete_element
-    ...                                     write_unlock(&list_lock);
+    ...                                     spin_unlock(&list_lock);
  }                                          ...
                                             if (atomic_dec_and_test(&el->rc))
                                                 call_rcu(&el->head, el_free);
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt

index e04d643a9f57a802e59057165f719f36b5ebf5ab..96170824a717059962b85c2400491f58883f3bde 100644 (file)
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -786,8 +786,6 @@ RCU pointer/list traversal:
         list_for_each_entry_rcu
         hlist_for_each_entry_rcu
  
-       list_for_each_rcu               (to be deprecated in favor of
-                                        list_for_each_entry_rcu)
         list_for_each_continue_rcu      (to be deprecated in favor of new
                                          list_for_each_entry_continue_rcu)
  
diff --git a/Documentation/SELinux.txt b/Documentation/SELinux.txt

new file mode 100644 (file)

index 0000000..07eae00
--- /dev/null
+++ b/Documentation/SELinux.txt
@@ -0,0 +1,27 @@
+If you want to use SELinux, chances are you will want
+to use the distro-provided policies, or install the
+latest reference policy release from
+       http://oss.tresys.com/projects/refpolicy
+
+However, if you want to install a dummy policy for
+testing, you can do using 'mdp' provided under
+scripts/selinux.  Note that this requires the selinux
+userspace to be installed - in particular you will
+need checkpolicy to compile a kernel, and setfiles and
+fixfiles to label the filesystem.
+
+       1. Compile the kernel with selinux enabled.
+       2. Type 'make' to compile mdp.
+       3. Make sure that you are not running with
+          SELinux enabled and a real policy.  If
+          you are, reboot with selinux disabled
+          before continuing.
+       4. Run install_policy.sh:
+               cd scripts/selinux
+               sh install_policy.sh
+
+Step 4 will create a new dummy policy valid for your
+kernel, with a single selinux user, role, and type.
+It will compile the policy, will set your SELINUXTYPE to
+dummy in /etc/selinux/config, install the compiled policy
+as 'dummy', and relabel your filesystem.
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt

index 0bd32748a467be8e7436f4ff3ec684b784755663..c6841eee9598ddd82932db9e4e4a451f9f33ad9d 100644 (file)
--- a/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/Documentation/kernel-doc-nano-HOWTO.txt
@@ -168,10 +168,10 @@ if ($#ARGV < 0) {
  mkdir $ARGV[0],0777;
  $state = 0;
  while (<STDIN>) {
-    if (/^\.TH \"[^\"]*\" 4 \"([^\"]*)\"/) {
+    if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) {
         if ($state == 1) { close OUT }
         $state = 1;
-       $fn = "$ARGV[0]/$1.4";
+       $fn = "$ARGV[0]/$1.9";
         print STDERR "Creating $fn\n";
         open OUT, ">$fn" or die "can't open $fn: $!\n";
         print OUT $_;
diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt

index 88bcb87673354302737be5bbaac5b8c049941d86..9d8eb553884c130cc16f5c50eaeb4e6ca73dbab6 100644 (file)
--- a/Documentation/scheduler/sched-design-CFS.txt
+++ b/Documentation/scheduler/sched-design-CFS.txt
@@ -1,151 +1,242 @@
+                      =============
+                      CFS Scheduler
+                      =============
  
-This is the CFS scheduler.
-
-80% of CFS's design can be summed up in a single sentence: CFS basically
-models an "ideal, precise multi-tasking CPU" on real hardware.
-
-"Ideal multi-tasking CPU" is a (non-existent  :-))  CPU that has 100%
-physical power and which can run each task at precise equal speed, in
-parallel, each at 1/nr_running speed. For example: if there are 2 tasks
-running then it runs each at 50% physical power - totally in parallel.
-
-On real hardware, we can run only a single task at once, so while that
-one task runs, the other tasks that are waiting for the CPU are at a
-disadvantage - the current task gets an unfair amount of CPU time. In
-CFS this fairness imbalance is expressed and tracked via the per-task
-p->wait_runtime (nanosec-unit) value. "wait_runtime" is the amount of
-time the task should now run on the CPU for it to become completely fair
-and balanced.
-
-( small detail: on 'ideal' hardware, the p->wait_runtime value would
-  always be zero - no task would ever get 'out of balance' from the
-  'ideal' share of CPU time. )
-
-CFS's task picking logic is based on this p->wait_runtime value and it
-is thus very simple: it always tries to run the task with the largest
-p->wait_runtime value. In other words, CFS tries to run the task with
-the 'gravest need' for more CPU time. So CFS always tries to split up
-CPU time between runnable tasks as close to 'ideal multitasking
-hardware' as possible.
-
-Most of the rest of CFS's design just falls out of this really simple
-concept, with a few add-on embellishments like nice levels,
-multiprocessing and various algorithm variants to recognize sleepers.
-
-In practice it works like this: the system runs a task a bit, and when
-the task schedules (or a scheduler tick happens) the task's CPU usage is
-'accounted for': the (small) time it just spent using the physical CPU
-is deducted from p->wait_runtime. [minus the 'fair share' it would have
-gotten anyway]. Once p->wait_runtime gets low enough so that another
-task becomes the 'leftmost task' of the time-ordered rbtree it maintains
-(plus a small amount of 'granularity' distance relative to the leftmost
-task so that we do not over-schedule tasks and trash the cache) then the
-new leftmost task is picked and the current task is preempted.
-
-The rq->fair_clock value tracks the 'CPU time a runnable task would have
-fairly gotten, had it been runnable during that time'. So by using
-rq->fair_clock values we can accurately timestamp and measure the
-'expected CPU time' a task should have gotten. All runnable tasks are
-sorted in the rbtree by the "rq->fair_clock - p->wait_runtime" key, and
-CFS picks the 'leftmost' task and sticks to it. As the system progresses
-forwards, newly woken tasks are put into the tree more and more to the
-right - slowly but surely giving a chance for every task to become the
-'leftmost task' and thus get on the CPU within a deterministic amount of
-time.
-
-Some implementation details:
-
- - the introduction of Scheduling Classes: an extensible hierarchy of
-   scheduler modules. These modules encapsulate scheduling policy
-   details and are handled by the scheduler core without the core
-   code assuming about them too much.
-
- - sched_fair.c implements the 'CFS desktop scheduler': it is a
-   replacement for the vanilla scheduler's SCHED_OTHER interactivity
-   code.
-
-   I'd like to give credit to Con Kolivas for the general approach here:
-   he has proven via RSDL/SD that 'fair scheduling' is possible and that
-   it results in better desktop scheduling. Kudos Con!
-
-   The CFS patch uses a completely different approach and implementation
-   from RSDL/SD. My goal was to make CFS's interactivity quality exceed
-   that of RSDL/SD, which is a high standard to meet :-) Testing
-   feedback is welcome to decide this one way or another. [ and, in any
-   case, all of SD's logic could be added via a kernel/sched_sd.c module
-   as well, if Con is interested in such an approach. ]
-
-   CFS's design is quite radical: it does not use runqueues, it uses a
-   time-ordered rbtree to build a 'timeline' of future task execution,
-   and thus has no 'array switch' artifacts (by which both the vanilla
-   scheduler and RSDL/SD are affected).
-
-   CFS uses nanosecond granularity accounting and does not rely on any
-   jiffies or other HZ detail. Thus the CFS scheduler has no notion of
-   'timeslices' and has no heuristics whatsoever. There is only one
-   central tunable (you have to switch on CONFIG_SCHED_DEBUG):
-
-         /proc/sys/kernel/sched_granularity_ns
-
-   which can be used to tune the scheduler from 'desktop' (low
-   latencies) to 'server' (good batching) workloads. It defaults to a
-   setting suitable for desktop workloads. SCHED_BATCH is handled by the
-   CFS scheduler module too.
-
-   Due to its design, the CFS scheduler is not prone to any of the
-   'attacks' that exist today against the heuristics of the stock
-   scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all
-   work fine and do not impact interactivity and produce the expected
-   behavior.
-
-   the CFS scheduler has a much stronger handling of nice levels and
-   SCHED_BATCH: both types of workloads should be isolated much more
-   agressively than under the vanilla scheduler.
-
-   ( another detail: due to nanosec accounting and timeline sorting,
-     sched_yield() support is very simple under CFS, and in fact under
-     CFS sched_yield() behaves much better than under any other
-     scheduler i have tested so far. )
-
- - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler
-   way than the vanilla scheduler does. It uses 100 runqueues (for all
-   100 RT priority levels, instead of 140 in the vanilla scheduler)
-   and it needs no expired array.
-
- - reworked/sanitized SMP load-balancing: the runqueue-walking
-   assumptions are gone from the load-balancing code now, and
-   iterators of the scheduling modules are used. The balancing code got
-   quite a bit simpler as a result.
-
-
-Group scheduler extension to CFS
-================================
-
-Normally the scheduler operates on individual tasks and strives to provide
-fair CPU time to each task. Sometimes, it may be desirable to group tasks
-and provide fair CPU time to each such task group. For example, it may
-be desirable to first provide fair CPU time to each user on the system
-and then to each task belonging to a user.
-
-CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets
-SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such
-groups. At present, there are two (mutually exclusive) mechanisms to group
-tasks for CPU bandwidth control purpose:
-
-       - Based on user id (CONFIG_FAIR_USER_SCHED)
-               In this option, tasks are grouped according to their user id.
-       - Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED)
-               This options lets the administrator create arbitrary groups
-               of tasks, using the "cgroup" pseudo filesystem. See
-               Documentation/cgroups.txt for more information about this
-               filesystem.
  
-Only one of these options to group tasks can be chosen and not both.
+1.  OVERVIEW
+
+CFS stands for "Completely Fair Scheduler," and is the new "desktop" process
+scheduler implemented by Ingo Molnar and merged in Linux 2.6.23.  It is the
+replacement for the previous vanilla scheduler's SCHED_OTHER interactivity
+code.
+
+80% of CFS's design can be summed up in a single sentence: CFS basically models
+an "ideal, precise multi-tasking CPU" on real hardware.
+
+"Ideal multi-tasking CPU" is a (non-existent  :-)) CPU that has 100% physical
+power and which can run each task at precise equal speed, in parallel, each at
+1/nr_running speed.  For example: if there are 2 tasks running, then it runs
+each at 50% physical power --- i.e., actually in parallel.
+
+On real hardware, we can run only a single task at once, so we have to
+introduce the concept of "virtual runtime."  The virtual runtime of a task
+specifies when its next timeslice would start execution on the ideal
+multi-tasking CPU described above.  In practice, the virtual runtime of a task
+is its actual runtime normalized to the total number of running tasks.
+
+
+
+2.  FEW IMPLEMENTATION DETAILS
+
+In CFS the virtual runtime is expressed and tracked via the per-task
+p->se.vruntime (nanosec-unit) value.  This way, it's possible to accurately
+timestamp and measure the "expected CPU time" a task should have gotten.
+
+[ small detail: on "ideal" hardware, at any time all tasks would have the same
+  p->se.vruntime value --- i.e., tasks would execute simultaneously and no task
+  would ever get "out of balance" from the "ideal" share of CPU time.  ]
+
+CFS's task picking logic is based on this p->se.vruntime value and it is thus
+very simple: it always tries to run the task with the smallest p->se.vruntime
+value (i.e., the task which executed least so far).  CFS always tries to split
+up CPU time between runnable tasks as close to "ideal multitasking hardware" as
+possible.
+
+Most of the rest of CFS's design just falls out of this really simple concept,
+with a few add-on embellishments like nice levels, multiprocessing and various
+algorithm variants to recognize sleepers.
+
+
+
+3.  THE RBTREE
+
+CFS's design is quite radical: it does not use the old data structures for the
+runqueues, but it uses a time-ordered rbtree to build a "timeline" of future
+task execution, and thus has no "array switch" artifacts (by which both the
+previous vanilla scheduler and RSDL/SD are affected).
+
+CFS also maintains the rq->cfs.min_vruntime value, which is a monotonic
+increasing value tracking the smallest vruntime among all tasks in the
+runqueue.  The total amount of work done by the system is tracked using
+min_vruntime; that value is used to place newly activated entities on the left
+side of the tree as much as possible.
+
+The total number of running tasks in the runqueue is accounted through the
+rq->cfs.load value, which is the sum of the weights of the tasks queued on the
+runqueue.
+
+CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the
+p->se.vruntime key (there is a subtraction using rq->cfs.min_vruntime to
+account for possible wraparounds).  CFS picks the "leftmost" task from this
+tree and sticks to it.
+As the system progresses forwards, the executed tasks are put into the tree
+more and more to the right --- slowly but surely giving a chance for every task
+to become the "leftmost task" and thus get on the CPU within a deterministic
+amount of time.
+
+Summing up, CFS works like this: it runs a task a bit, and when the task
+schedules (or a scheduler tick happens) the task's CPU usage is "accounted
+for": the (small) time it just spent using the physical CPU is added to
+p->se.vruntime.  Once p->se.vruntime gets high enough so that another task
+becomes the "leftmost task" of the time-ordered rbtree it maintains (plus a
+small amount of "granularity" distance relative to the leftmost task so that we
+do not over-schedule tasks and trash the cache), then the new leftmost task is
+picked and the current task is preempted.
+
+
+
+4.  SOME FEATURES OF CFS
+
+CFS uses nanosecond granularity accounting and does not rely on any jiffies or
+other HZ detail.  Thus the CFS scheduler has no notion of "timeslices" in the
+way the previous scheduler had, and has no heuristics whatsoever.  There is
+only one central tunable (you have to switch on CONFIG_SCHED_DEBUG):
+
+   /proc/sys/kernel/sched_granularity_ns
+
+which can be used to tune the scheduler from "desktop" (i.e., low latencies) to
+"server" (i.e., good batching) workloads.  It defaults to a setting suitable
+for desktop workloads.  SCHED_BATCH is handled by the CFS scheduler module too.
+
+Due to its design, the CFS scheduler is not prone to any of the "attacks" that
+exist today against the heuristics of the stock scheduler: fiftyp.c, thud.c,
+chew.c, ring-test.c, massive_intr.c all work fine and do not impact
+interactivity and produce the expected behavior.
+
+The CFS scheduler has a much stronger handling of nice levels and SCHED_BATCH
+than the previous vanilla scheduler: both types of workloads are isolated much
+more aggressively.
+
+SMP load-balancing has been reworked/sanitized: the runqueue-walking
+assumptions are gone from the load-balancing code now, and iterators of the
+scheduling modules are used.  The balancing code got quite a bit simpler as a
+result.
+
+
+
+5. Scheduling policies
+
+CFS implements three scheduling policies:
+
+  - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling
+    policy that is used for regular tasks.
+
+  - SCHED_BATCH: Does not preempt nearly as often as regular tasks
+    would, thereby allowing tasks to run longer and make better use of
+    caches but at the cost of interactivity. This is well suited for
+    batch jobs.
+
+  - SCHED_IDLE: This is even weaker than nice 19, but its not a true
+    idle timer scheduler in order to avoid to get into priority
+    inversion problems which would deadlock the machine.
+
+SCHED_FIFO/_RR are implemented in sched_rt.c and are as specified by
+POSIX.
+
+The command chrt from util-linux-ng 2.13.1.1 can set all of these except
+SCHED_IDLE.
  
-Group scheduler tunables:
  
-When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for
-each new user and a "cpu_share" file is added in that directory.
+
+6.  SCHEDULING CLASSES
+
+The new CFS scheduler has been designed in such a way to introduce "Scheduling
+Classes," an extensible hierarchy of scheduler modules.  These modules
+encapsulate scheduling policy details and are handled by the scheduler core
+without the core code assuming too much about them.
+
+sched_fair.c implements the CFS scheduler described above.
+
+sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler way than
+the previous vanilla scheduler did.  It uses 100 runqueues (for all 100 RT
+priority levels, instead of 140 in the previous scheduler) and it needs no
+expired array.
+
+Scheduling classes are implemented through the sched_class structure, which
+contains hooks to functions that must be called whenever an interesting event
+occurs.
+
+This is the (partial) list of the hooks:
+
+ - enqueue_task(...)
+
+   Called when a task enters a runnable state.
+   It puts the scheduling entity (task) into the red-black tree and
+   increments the nr_running variable.
+
+ - dequeue_tree(...)
+
+   When a task is no longer runnable, this function is called to keep the
+   corresponding scheduling entity out of the red-black tree.  It decrements
+   the nr_running variable.
+
+ - yield_task(...)
+
+   This function is basically just a dequeue followed by an enqueue, unless the
+   compat_yield sysctl is turned on; in that case, it places the scheduling
+   entity at the right-most end of the red-black tree.
+
+ - check_preempt_curr(...)
+
+   This function checks if a task that entered the runnable state should
+   preempt the currently running task.
+
+ - pick_next_task(...)
+
+   This function chooses the most appropriate task eligible to run next.
+
+ - set_curr_task(...)
+
+   This function is called when a task changes its scheduling class or changes
+   its task group.
+
+ - task_tick(...)
+
+   This function is mostly called from time tick functions; it might lead to
+   process switch.  This drives the running preemption.
+
+ - task_new(...)
+
+   The core scheduler gives the scheduling module an opportunity to manage new
+   task startup.  The CFS scheduling module uses it for group scheduling, while
+   the scheduling module for a real-time task does not use it.
+
+
+
+7.  GROUP SCHEDULER EXTENSIONS TO CFS
+
+Normally, the scheduler operates on individual tasks and strives to provide
+fair CPU time to each task.  Sometimes, it may be desirable to group tasks and
+provide fair CPU time to each such task group.  For example, it may be
+desirable to first provide fair CPU time to each user on the system and then to
+each task belonging to a user.
+
+CONFIG_GROUP_SCHED strives to achieve exactly that.  It lets tasks to be
+grouped and divides CPU time fairly among such groups.
+
+CONFIG_RT_GROUP_SCHED permits to group real-time (i.e., SCHED_FIFO and
+SCHED_RR) tasks.
+
+CONFIG_FAIR_GROUP_SCHED permits to group CFS (i.e., SCHED_NORMAL and
+SCHED_BATCH) tasks.
+
+At present, there are two (mutually exclusive) mechanisms to group tasks for
+CPU bandwidth control purposes:
+
+ - Based on user id (CONFIG_USER_SCHED)
+
+   With this option, tasks are grouped according to their user id.
+
+ - Based on "cgroup" pseudo filesystem (CONFIG_CGROUP_SCHED)
+
+   This options needs CONFIG_CGROUPS to be defined, and lets the administrator
+   create arbitrary groups of tasks, using the "cgroup" pseudo filesystem.  See
+   Documentation/cgroups.txt for more information about this filesystem.
+
+Only one of these options to group tasks can be chosen and not both.
+
+When CONFIG_USER_SCHED is defined, a directory is created in sysfs for each new
+user and a "cpu_share" file is added in that directory.
  
         # cd /sys/kernel/uids
         # cat 512/cpu_share             # Display user 512's CPU share
@@ -155,16 +246,14 @@ each new user and a "cpu_share" file is added in that directory.
         2048
         #
  
-CPU bandwidth between two users are divided in the ratio of their CPU shares.
-For ex: if you would like user "root" to get twice the bandwidth of user
-"guest", then set the cpu_share for both the users such that "root"'s
-cpu_share is twice "guest"'s cpu_share
-
+CPU bandwidth between two users is divided in the ratio of their CPU shares.
+For example: if you would like user "root" to get twice the bandwidth of user
+"guest," then set the cpu_share for both the users such that "root"'s cpu_share
+is twice "guest"'s cpu_share.
  
-When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created
-for each group created using the pseudo filesystem. See example steps
-below to create task groups and modify their CPU share using the "cgroups"
-pseudo filesystem
+When CONFIG_CGROUP_SCHED is defined, a "cpu.shares" file is created for each
+group created using the pseudo filesystem.  See example steps below to create
+task groups and modify their CPU share using the "cgroups" pseudo filesystem.
  
         # mkdir /dev/cpuctl
         # mount -t cgroup -ocpu none /dev/cpuctl
diff --git a/MAINTAINERS b/MAINTAINERS

index 8dae4555f10e1b91f0c2eafb84f04fbf71812e55..7a03bd5a91a3eb37586c13cde383c9a162f08ee0 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3649,8 +3649,9 @@ M:        jmorris@namei.org
  P:     Eric Paris
  M:     eparis@parisplace.org
  L:     linux-kernel@vger.kernel.org (kernel issues)
-L:     selinux@tycho.nsa.gov (subscribers-only, general discussion)
-W:     http://www.nsa.gov/selinux
+L:     selinux@tycho.nsa.gov (subscribers-only, general discussion)
+W:     http://selinuxproject.org
+T:     git kernel.org:pub/scm/linux/kernel/git/jmorris/security-testing-2.6.git
  S:     Supported
  
  SENSABLE PHANTOM
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c

index 83df541650fcec6171e919fca76d1c1acaa1009a..06b6fdab639f879d56529611381f94627ee5e52f 100644 (file)
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -149,6 +149,9 @@ smp_callin(void)
         atomic_inc(&init_mm.mm_count);
         current->active_mm = &init_mm;
  
+       /* inform the notifiers about the new cpu */
+       notify_cpu_starting(cpuid);
+
         /* Must have completely accurate bogos.  */
         local_irq_enable();
  
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c

index e9842f6767f959b3cfb134b7325f4076ca1dcf89..e42a749a56dd5c85abc823e2666ff7b4683c2da0 100644 (file)
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -277,6 +277,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
         /*
          * Enable local interrupts.
          */
+       notify_cpu_starting(cpu);
         local_irq_enable();
         local_fiq_enable();
  
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c

index 952a24b2f5a9c59faec140963bfe03d3aeffacb2..52e16c6436f9bac0733022622fa252d217b40369 100644 (file)
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -178,6 +178,7 @@ void __init smp_callin(void)
         unmask_irq(IPI_INTR_VECT);
         unmask_irq(TIMER0_INTR_VECT);
         preempt_disable();
+       notify_cpu_starting(cpu);
         local_irq_enable();
  
         cpu_set(cpu, cpu_online_map);
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c

index d8f05e504fbf039bb85fa946c40e6ff01349d26b..1dcbb85fc4ee9f05253d5d76b5e8033b39a07cf9 100644 (file)
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -401,6 +401,7 @@ smp_callin (void)
         spin_lock(&vector_lock);
         /* Setup the per cpu irq handling data structures */
         __setup_vector_irq(cpuid);
+       notify_cpu_starting(cpuid);
         cpu_set(cpuid, cpu_online_map);
         per_cpu(cpu_state, cpuid) = CPU_ONLINE;
         spin_unlock(&vector_lock);
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c

index 2c03ac1d005f44cfe9fe7ad61fa03750e9edf9e9..fc2994811f150c991986b6294538ca5b9c6a64ab 100644 (file)
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -498,6 +498,8 @@ static void __init smp_online(void)
  {
         int cpu_id = smp_processor_id();
  
+       notify_cpu_starting(cpu_id);
+
         local_irq_enable();
  
         /* Get our bogomips. */
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c

index 4410f172b8abf055ca4f0bc8c339dca673d90854..7b59cfb7e6022a21cf90cbd93e8e6a2dfe3a53d3 100644 (file)
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -121,6 +121,8 @@ asmlinkage __cpuinit void start_secondary(void)
         cpu = smp_processor_id();
         cpu_data[cpu].udelay_val = loops_per_jiffy;
  
+       notify_cpu_starting(cpu);
+
         mp_ops->smp_finish();
         set_cpu_sibling_map(cpu);
  
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c

index 5337ca7bb649b02999258143947751109fe6c585..c27b10a1bd79adffe0a2686b9c5baaf34d3eb724 100644 (file)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -453,6 +453,7 @@ int __devinit start_secondary(void *unused)
         secondary_cpu_time_init();
  
         ipi_call_lock();
+       notify_cpu_starting(cpu);
         cpu_set(cpu, cpu_online_map);
         /* Update sibling maps */
         base = cpu_first_thread_in_core(cpu);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c

index 00b9b4dec5eb0850aeda9a373227b190a42e8caf..9e8b1f9b8f4d6bcfcfd477e2b965030bd236cc10 100644 (file)
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -585,6 +585,8 @@ int __cpuinit start_secondary(void *cpuvoid)
         /* Enable pfault pseudo page faults on this cpu. */
         pfault_init();
  
+       /* call cpu notifiers */
+       notify_cpu_starting(smp_processor_id());
         /* Mark this cpu as online */
         spin_lock(&call_lock);
         cpu_set(smp_processor_id(), cpu_online_map);
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c

index 60c50841143e26103b02a41c04d7ff863055663e..001778f9adaf83eee6cb3766866809516d346bc0 100644 (file)
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -82,6 +82,8 @@ asmlinkage void __cpuinit start_secondary(void)
  
         preempt_disable();
  
+       notify_cpu_starting(smp_processor_id());
+
         local_irq_enable();
  
         calibrate_delay();
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c

index 69596402a500092c562b3ec97e7269efcc8981ec..446767e8f5694651b914cce1c1206796bd2f103d 100644 (file)
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -88,6 +88,7 @@ void __init smp4d_callin(void)
         local_flush_cache_all();
         local_flush_tlb_all();
  
+       notify_cpu_starting(cpuid);
         /*
          * Unblock the master CPU _only_ when the scheduler state
          * of all secondary CPUs will be up-to-date, so after
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c

index a14a76ac7f36464642ea95f397326c29d3967002..9964890dc1dbe521b0db97ecef41ea076da8f0ca 100644 (file)
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -71,6 +71,8 @@ void __cpuinit smp4m_callin(void)
         local_flush_cache_all();
         local_flush_tlb_all();
  
+       notify_cpu_starting(cpuid);
+
         /* Get our local ticker going. */
         smp_setup_percpu_timer();
  
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c

index be2d50c3aa95caf483377b8a54c2caf121ac8117..045772142844690f2471d9330b836da594c945f8 100644 (file)
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -85,6 +85,7 @@ static int idle_proc(void *cpup)
         while (!cpu_isset(cpu, smp_commenced_mask))
                 cpu_relax();
  
+       notify_cpu_starting(cpu);
         cpu_set(cpu, cpu_online_map);
         default_idle();
         return 0;
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c

index dd097b835839dd73d4139d807c23e8fca1648344..c24c4a487b7cb05f8ec6ad73998a2e0e01439d32 100644 (file)
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -256,7 +256,8 @@ static u32 get_cur_val(const cpumask_t *mask)
   * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
   * no meaning should be associated with absolute values of these MSRs.
   */
-static unsigned int get_measured_perf(unsigned int cpu)
+static unsigned int get_measured_perf(struct cpufreq_policy *policy,
+                                     unsigned int cpu)
  {
         union {
                 struct {
@@ -326,7 +327,7 @@ static unsigned int get_measured_perf(unsigned int cpu)
  
  #endif
  
-       retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100;
+       retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100;
  
         put_cpu();
         set_cpus_allowed_ptr(current, &saved_mask);
@@ -785,7 +786,11 @@ static int __init acpi_cpufreq_init(void)
         if (ret)
                 return ret;
  
-       return cpufreq_register_driver(&acpi_cpufreq_driver);
+       ret = cpufreq_register_driver(&acpi_cpufreq_driver);
+       if (ret)
+               free_percpu(acpi_perf_data);
+
+       return ret;
  }
  
  static void __exit acpi_cpufreq_exit(void)
@@ -795,8 +800,6 @@ static void __exit acpi_cpufreq_exit(void)
         cpufreq_unregister_driver(&acpi_cpufreq_driver);
  
         free_percpu(acpi_perf_data);
-
-       return;
  }
  
  module_param(acpi_pstate_strict, uint, 0644);
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c

index e4a4bf870e9472e7b3da95ac0d6b9a620ff675b5..fe613c93b3667d73e78ca6747f691232a9233702 100644 (file)
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -25,8 +25,8 @@
  #include <linux/cpufreq.h>
  
  #include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
+#include <linux/timex.h>
+#include <linux/io.h>
  
  #define REG_CSCIR 0x22         /* Chip Setup and Control Index Register    */
  #define REG_CSCDR 0x23         /* Chip Setup and Control Data  Register    */
@@ -82,7 +82,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
         u8 clockspeed_reg;    /* Clock Speed Register */
  
         local_irq_disable();
-       outb_p(0x80,REG_CSCIR);
+       outb_p(0x80, REG_CSCIR);
         clockspeed_reg = inb_p(REG_CSCDR);
         local_irq_enable();
  
@@ -98,10 +98,10 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
         }
  
         /* 33 MHz is not 32 MHz... */
-       if ((clockspeed_reg & 0xE0)==0xA0)
+       if ((clockspeed_reg & 0xE0) == 0xA0)
                 return 33000;
  
-       return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000);
+       return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000;
  }
  
  
@@ -117,7 +117,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
   *     There is no return value.
   */
  
-static void elanfreq_set_cpu_state (unsigned int state)
+static void elanfreq_set_cpu_state(unsigned int state)
  {
         struct cpufreq_freqs    freqs;
  
@@ -144,20 +144,20 @@ static void elanfreq_set_cpu_state (unsigned int state)
          */
  
         local_irq_disable();
-       outb_p(0x40,REG_CSCIR);         /* Disable hyperspeed mode */
-       outb_p(0x00,REG_CSCDR);
+       outb_p(0x40, REG_CSCIR);                /* Disable hyperspeed mode */
+       outb_p(0x00, REG_CSCDR);
         local_irq_enable();             /* wait till internal pipelines and */
         udelay(1000);                   /* buffers have cleaned up          */
  
         local_irq_disable();
  
         /* now, set the CPU clock speed register (0x80) */
-       outb_p(0x80,REG_CSCIR);
-       outb_p(elan_multiplier[state].val80h,REG_CSCDR);
+       outb_p(0x80, REG_CSCIR);
+       outb_p(elan_multiplier[state].val80h, REG_CSCDR);
  
         /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
-       outb_p(0x40,REG_CSCIR);
-       outb_p(elan_multiplier[state].val40h,REG_CSCDR);
+       outb_p(0x40, REG_CSCIR);
+       outb_p(elan_multiplier[state].val40h, REG_CSCDR);
         udelay(10000);
         local_irq_enable();
  
@@ -173,12 +173,12 @@ static void elanfreq_set_cpu_state (unsigned int state)
   *     for the hardware supported by the driver.
   */
  
-static int elanfreq_verify (struct cpufreq_policy *policy)
+static int elanfreq_verify(struct cpufreq_policy *policy)
  {
         return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
  }
  
-static int elanfreq_target (struct cpufreq_policy *policy,
+static int elanfreq_target(struct cpufreq_policy *policy,
                             unsigned int target_freq,
                             unsigned int relation)
  {
@@ -205,7 +205,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
  
         /* capability check */
         if ((c->x86_vendor != X86_VENDOR_AMD) ||
-           (c->x86 != 4) || (c->x86_model!=10))
+           (c->x86 != 4) || (c->x86_model != 10))
                 return -ENODEV;
  
         /* max freq */
@@ -213,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
                 max_freq = elanfreq_get_cpu_frequency(0);
  
         /* table init */
-       for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+       for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
                 if (elanfreq_table[i].frequency > max_freq)
                         elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
         }
@@ -224,7 +224,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy)
  
         result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
         if (result)
-               return (result);
+               return result;
  
         cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
         return 0;
@@ -260,7 +260,7 @@ __setup("elanfreq=", elanfreq_setup);
  #endif
  
  
-static struct freq_attr* elanfreq_attr[] = {
+static struct freq_attr *elanfreq_attr[] = {
         &cpufreq_freq_attr_scaling_available_freqs,
         NULL,
  };
@@ -284,9 +284,9 @@ static int __init elanfreq_init(void)
  
         /* Test if we have the right hardware */
         if ((c->x86_vendor != X86_VENDOR_AMD) ||
-               (c->x86 != 4) || (c->x86_model!=10)) {
+               (c->x86 != 4) || (c->x86_model != 10)) {
                 printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
-                return -ENODEV;
+               return -ENODEV;
         }
         return cpufreq_register_driver(&elanfreq_driver);
  }
@@ -298,7 +298,7 @@ static void __exit elanfreq_exit(void)
  }
  
  
-module_param (max_freq, int, 0444);
+module_param(max_freq, int, 0444);
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c

index eb9b62b0830c8941ee7ea990304596e2a1d82316..b5ced806a316d66b1c4bb1fcc6120f855617a266 100644 (file)
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -15,12 +15,11 @@
  #include <linux/slab.h>
  
  #include <asm/msr.h>
-#include <asm/timex.h>
-#include <asm/io.h>
+#include <linux/timex.h>
+#include <linux/io.h>
  
-
-#define POWERNOW_IOPORT 0xfff0         /* it doesn't matter where, as long
-                                         as it is unused */
+#define POWERNOW_IOPORT 0xfff0          /* it doesn't matter where, as long
+                                          as it is unused */
  
  static unsigned int                     busfreq;   /* FSB, in 10 kHz */
  static unsigned int                     max_multiplier;
@@ -53,7 +52,7 @@ static int powernow_k6_get_cpu_multiplier(void)
  
         msrval = POWERNOW_IOPORT + 0x1;
         wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
-       invalue=inl(POWERNOW_IOPORT + 0x8);
+       invalue = inl(POWERNOW_IOPORT + 0x8);
         msrval = POWERNOW_IOPORT + 0x0;
         wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
  
@@ -67,9 +66,9 @@ static int powernow_k6_get_cpu_multiplier(void)
   *
   *   Tries to change the PowerNow! multiplier
   */
-static void powernow_k6_set_state (unsigned int best_i)
+static void powernow_k6_set_state(unsigned int best_i)
  {
-       unsigned long           outvalue=0, invalue=0;
+       unsigned long           outvalue = 0, invalue = 0;
         unsigned long           msrval;
         struct cpufreq_freqs    freqs;
  
@@ -90,10 +89,10 @@ static void powernow_k6_set_state (unsigned int best_i)
  
         msrval = POWERNOW_IOPORT + 0x1;
         wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
-       invalue=inl(POWERNOW_IOPORT + 0x8);
+       invalue = inl(POWERNOW_IOPORT + 0x8);
         invalue = invalue & 0xf;
         outvalue = outvalue | invalue;
-       outl(outvalue ,(POWERNOW_IOPORT + 0x8));
+       outl(outvalue , (POWERNOW_IOPORT + 0x8));
         msrval = POWERNOW_IOPORT + 0x0;
         wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
  
@@ -124,7 +123,7 @@ static int powernow_k6_verify(struct cpufreq_policy *policy)
   *
   * sets a new CPUFreq policy
   */
-static int powernow_k6_target (struct cpufreq_policy *policy,
+static int powernow_k6_target(struct cpufreq_policy *policy,
                                unsigned int target_freq,
                                unsigned int relation)
  {
@@ -152,7 +151,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
         busfreq = cpu_khz / max_multiplier;
  
         /* table init */
-       for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
+       for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
                 if (clock_ratio[i].index > max_multiplier)
                         clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
                 else
@@ -165,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
  
         result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
         if (result)
-               return (result);
+               return result;
  
         cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
  
@@ -176,8 +175,8 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
  static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
  {
         unsigned int i;
-       for (i=0; i<8; i++) {
-               if (i==max_multiplier)
+       for (i = 0; i < 8; i++) {
+               if (i == max_multiplier)
                         powernow_k6_set_state(i);
         }
         cpufreq_frequency_table_put_attr(policy->cpu);
@@ -189,7 +188,7 @@ static unsigned int powernow_k6_get(unsigned int cpu)
         return busfreq * powernow_k6_get_cpu_multiplier();
  }
  
-static struct freq_attr* powernow_k6_attr[] = {
+static struct freq_attr *powernow_k6_attr[] = {
         &cpufreq_freq_attr_scaling_available_freqs,
         NULL,
  };
@@ -227,7 +226,7 @@ static int __init powernow_k6_init(void)
         }
  
         if (cpufreq_register_driver(&powernow_k6_driver)) {
-               release_region (POWERNOW_IOPORT, 16);
+               release_region(POWERNOW_IOPORT, 16);
                 return -EINVAL;
         }
  
@@ -243,13 +242,13 @@ static int __init powernow_k6_init(void)
  static void __exit powernow_k6_exit(void)
  {
         cpufreq_unregister_driver(&powernow_k6_driver);
-       release_region (POWERNOW_IOPORT, 16);
+       release_region(POWERNOW_IOPORT, 16);
  }
  
  
-MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
+MODULE_LICENSE("GPL");
  
  module_init(powernow_k6_init);
  module_exit(powernow_k6_exit);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c

index 45531e3ba19436ad72001dfbcf3d5700209dc518..4e7ccb0e2a9b01df38dd25c0561c7694e96f9649 100644 (file)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -257,6 +257,7 @@ static void __cpuinit smp_callin(void)
         end_local_APIC_setup();
         map_cpu_to_logical_apicid();
  
+       notify_cpu_starting(cpuid);
         /*
          * Get our bogomips.
          *
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c

index ee0fba0921572ba89ad56e45e5757e9eb9351385..199a5f4a873c76b33728fbfaaf186a0fb6404530 100644 (file)
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -448,6 +448,8 @@ static void __init start_secondary(void *unused)
  
         VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid));
  
+       notify_cpu_starting(cpuid);
+
         /* enable interrupts */
         local_irq_enable();
  
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig

index 3738cfa209ff2024b77d9790ea91777ab1783c83..f5fc64f89c5c3b71a54a874020e1f8108c9e025e 100644 (file)
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -6,6 +6,7 @@ menuconfig TCG_TPM
         tristate "TPM Hardware Support"
         depends on HAS_IOMEM
         depends on EXPERIMENTAL
+       select SECURITYFS
         ---help---
           If you have a TPM security chip in your system, which
           implements the Trusted Computing Group's specification,
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c

index 8a67f16987db16582e87a49514cf3a99158d542e..31d6f535a79de51d322d8d76af3eccfd2bf11140 100644 (file)
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1467,25 +1467,27 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
                           unsigned int target_freq,
                           unsigned int relation)
  {
-       int ret;
+       int ret = -EINVAL;
  
         policy = cpufreq_cpu_get(policy->cpu);
         if (!policy)
-               return -EINVAL;
+               goto no_policy;
  
         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
-               return -EINVAL;
+               goto fail;
  
         ret = __cpufreq_driver_target(policy, target_freq, relation);
  
         unlock_policy_rwsem_write(policy->cpu);
  
+fail:
         cpufreq_cpu_put(policy);
+no_policy:
         return ret;
  }
  EXPORT_SYMBOL_GPL(cpufreq_driver_target);
  
-int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
+int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
  {
         int ret = 0;
  
@@ -1493,8 +1495,8 @@ int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
         if (!policy)
                 return -EINVAL;
  
-       if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
-               ret = cpufreq_driver->getavg(policy->cpu);
+       if (cpu_online(cpu) && cpufreq_driver->getavg)
+               ret = cpufreq_driver->getavg(policy, cpu);
  
         cpufreq_cpu_put(policy);
         return ret;
@@ -1717,13 +1719,17 @@ int cpufreq_update_policy(unsigned int cpu)
  {
         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
         struct cpufreq_policy policy;
-       int ret = 0;
+       int ret;
  
-       if (!data)
-               return -ENODEV;
+       if (!data) {
+               ret = -ENODEV;
+               goto no_policy;
+       }
  
-       if (unlikely(lock_policy_rwsem_write(cpu)))
-               return -EINVAL;
+       if (unlikely(lock_policy_rwsem_write(cpu))) {
+               ret = -EINVAL;
+               goto fail;
+       }
  
         dprintk("updating policy for CPU %u\n", cpu);
         memcpy(&policy, data, sizeof(struct cpufreq_policy));
@@ -1750,7 +1756,9 @@ int cpufreq_update_policy(unsigned int cpu)
  
         unlock_policy_rwsem_write(cpu);
  
+fail:
         cpufreq_cpu_put(data);
+no_policy:
         return ret;
  }
  EXPORT_SYMBOL(cpufreq_update_policy);
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c

index ac0bbf2d234f3c72ecb5e6d781bc25afeef1afae..e2657837d954a459a731639811fcf3e4b7d91399 100644 (file)
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -460,6 +460,7 @@ static void do_dbs_timer(struct work_struct *work)
  
  static inline void dbs_timer_init(void)
  {
+       init_timer_deferrable(&dbs_work.timer);
         schedule_delayed_work(&dbs_work,
                         usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
         return;
@@ -575,13 +576,15 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
         return 0;
  }
  
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
+static
+#endif
  struct cpufreq_governor cpufreq_gov_conservative = {
         .name                   = "conservative",
         .governor               = cpufreq_governor_dbs,
         .max_transition_latency = TRANSITION_LATENCY_LIMIT,
         .owner                  = THIS_MODULE,
  };
-EXPORT_SYMBOL(cpufreq_gov_conservative);
  
  static int __init cpufreq_gov_dbs_init(void)
  {
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c

index 33855cb3cf16f1ecd30528f35343c2c029959b1e..2ab3c12b88afbd733fe5fd521a8dcc98540f8f3b 100644 (file)
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -18,13 +18,19 @@
  #include <linux/jiffies.h>
  #include <linux/kernel_stat.h>
  #include <linux/mutex.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/ktime.h>
  
  /*
   * dbs is used in this file as a shortform for demandbased switching
   * It helps to keep variable names smaller, simpler
   */
  
+#define DEF_FREQUENCY_DOWN_DIFFERENTIAL                (10)
  #define DEF_FREQUENCY_UP_THRESHOLD             (80)
+#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL      (3)
+#define MICRO_FREQUENCY_UP_THRESHOLD           (95)
  #define MIN_FREQUENCY_UP_THRESHOLD             (11)
  #define MAX_FREQUENCY_UP_THRESHOLD             (100)
  
@@ -57,6 +63,7 @@ enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
  struct cpu_dbs_info_s {
         cputime64_t prev_cpu_idle;
         cputime64_t prev_cpu_wall;
+       cputime64_t prev_cpu_nice;
         struct cpufreq_policy *cur_policy;
         struct delayed_work work;
         struct cpufreq_frequency_table *freq_table;
@@ -86,21 +93,24 @@ static struct workqueue_struct      *kondemand_wq;
  static struct dbs_tuners {
         unsigned int sampling_rate;
         unsigned int up_threshold;
+       unsigned int down_differential;
         unsigned int ignore_nice;
         unsigned int powersave_bias;
  } dbs_tuners_ins = {
         .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
+       .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
         .ignore_nice = 0,
         .powersave_bias = 0,
  };
  
-static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
+static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
+                                                       cputime64_t *wall)
  {
         cputime64_t idle_time;
-       cputime64_t cur_jiffies;
+       cputime64_t cur_wall_time;
         cputime64_t busy_time;
  
-       cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
+       cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
         busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
                         kstat_cpu(cpu).cpustat.system);
  
@@ -113,7 +123,37 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
                                 kstat_cpu(cpu).cpustat.nice);
         }
  
-       idle_time = cputime64_sub(cur_jiffies, busy_time);
+       idle_time = cputime64_sub(cur_wall_time, busy_time);
+       if (wall)
+               *wall = cur_wall_time;
+
+       return idle_time;
+}
+
+static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
+{
+       u64 idle_time = get_cpu_idle_time_us(cpu, wall);
+
+       if (idle_time == -1ULL)
+               return get_cpu_idle_time_jiffy(cpu, wall);
+
+       if (dbs_tuners_ins.ignore_nice) {
+               cputime64_t cur_nice;
+               unsigned long cur_nice_jiffies;
+               struct cpu_dbs_info_s *dbs_info;
+
+               dbs_info = &per_cpu(cpu_dbs_info, cpu);
+               cur_nice = cputime64_sub(kstat_cpu(cpu).cpustat.nice,
+                                        dbs_info->prev_cpu_nice);
+               /*
+                * Assumption: nice time between sampling periods will be
+                * less than 2^32 jiffies for 32 bit sys
+                */
+               cur_nice_jiffies = (unsigned long)
+                                       cputime64_to_jiffies64(cur_nice);
+               dbs_info->prev_cpu_nice = kstat_cpu(cpu).cpustat.nice;
+               return idle_time + jiffies_to_usecs(cur_nice_jiffies);
+       }
         return idle_time;
  }
  
@@ -277,8 +317,8 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
         for_each_online_cpu(j) {
                 struct cpu_dbs_info_s *dbs_info;
                 dbs_info = &per_cpu(cpu_dbs_info, j);
-               dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
-               dbs_info->prev_cpu_wall = get_jiffies_64();
+               dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
+                                               &dbs_info->prev_cpu_wall);
         }
         mutex_unlock(&dbs_mutex);
  
@@ -334,9 +374,7 @@ static struct attribute_group dbs_attr_group = {
  
  static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
  {
-       unsigned int idle_ticks, total_ticks;
-       unsigned int load = 0;
-       cputime64_t cur_jiffies;
+       unsigned int max_load_freq;
  
         struct cpufreq_policy *policy;
         unsigned int j;
@@ -346,13 +384,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
  
         this_dbs_info->freq_lo = 0;
         policy = this_dbs_info->cur_policy;
-       cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
-       total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
-                       this_dbs_info->prev_cpu_wall);
-       this_dbs_info->prev_cpu_wall = get_jiffies_64();
  
-       if (!total_ticks)
-               return;
         /*
          * Every sampling_rate, we check, if current idle time is less
          * than 20% (default), then we try to increase frequency
@@ -365,27 +397,44 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
          * 5% (default) of current frequency
          */
  
-       /* Get Idle Time */
-       idle_ticks = UINT_MAX;
+       /* Get Absolute Load - in terms of freq */
+       max_load_freq = 0;
+
         for_each_cpu_mask_nr(j, policy->cpus) {
-               cputime64_t total_idle_ticks;
-               unsigned int tmp_idle_ticks;
                 struct cpu_dbs_info_s *j_dbs_info;
+               cputime64_t cur_wall_time, cur_idle_time;
+               unsigned int idle_time, wall_time;
+               unsigned int load, load_freq;
+               int freq_avg;
  
                 j_dbs_info = &per_cpu(cpu_dbs_info, j);
-               total_idle_ticks = get_cpu_idle_time(j);
-               tmp_idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks,
+
+               cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
+
+               wall_time = (unsigned int) cputime64_sub(cur_wall_time,
+                               j_dbs_info->prev_cpu_wall);
+               j_dbs_info->prev_cpu_wall = cur_wall_time;
+
+               idle_time = (unsigned int) cputime64_sub(cur_idle_time,
                                 j_dbs_info->prev_cpu_idle);
-               j_dbs_info->prev_cpu_idle = total_idle_ticks;
+               j_dbs_info->prev_cpu_idle = cur_idle_time;
+
+               if (unlikely(!wall_time || wall_time < idle_time))
+                       continue;
+
+               load = 100 * (wall_time - idle_time) / wall_time;
+
+               freq_avg = __cpufreq_driver_getavg(policy, j);
+               if (freq_avg <= 0)
+                       freq_avg = policy->cur;
  
-               if (tmp_idle_ticks < idle_ticks)
-                       idle_ticks = tmp_idle_ticks;
+               load_freq = load * freq_avg;
+               if (load_freq > max_load_freq)
+                       max_load_freq = load_freq;
         }
-       if (likely(total_ticks > idle_ticks))
-               load = (100 * (total_ticks - idle_ticks)) / total_ticks;
  
         /* Check for frequency increase */
-       if (load > dbs_tuners_ins.up_threshold) {
+       if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
                 /* if we are already at full speed then break out early */
                 if (!dbs_tuners_ins.powersave_bias) {
                         if (policy->cur == policy->max)
@@ -412,15 +461,13 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
          * can support the current CPU usage without triggering the up
          * policy. To be safe, we focus 10 points under the threshold.
          */
-       if (load < (dbs_tuners_ins.up_threshold - 10)) {
-               unsigned int freq_next, freq_cur;
-
-               freq_cur = __cpufreq_driver_getavg(policy);
-               if (!freq_cur)
-                       freq_cur = policy->cur;
-
-               freq_next = (freq_cur * load) /
-                       (dbs_tuners_ins.up_threshold - 10);
+       if (max_load_freq <
+           (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
+            policy->cur) {
+               unsigned int freq_next;
+               freq_next = max_load_freq /
+                               (dbs_tuners_ins.up_threshold -
+                                dbs_tuners_ins.down_differential);
  
                 if (!dbs_tuners_ins.powersave_bias) {
                         __cpufreq_driver_target(policy, freq_next,
@@ -526,8 +573,8 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
                         j_dbs_info = &per_cpu(cpu_dbs_info, j);
                         j_dbs_info->cur_policy = policy;
  
-                       j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
-                       j_dbs_info->prev_cpu_wall = get_jiffies_64();
+                       j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
+                                               &j_dbs_info->prev_cpu_wall);
                 }
                 this_dbs_info->cpu = cpu;
                 /*
@@ -579,22 +626,42 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
         return 0;
  }
  
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
+static
+#endif
  struct cpufreq_governor cpufreq_gov_ondemand = {
         .name                   = "ondemand",
         .governor               = cpufreq_governor_dbs,
         .max_transition_latency = TRANSITION_LATENCY_LIMIT,
         .owner                  = THIS_MODULE,
  };
-EXPORT_SYMBOL(cpufreq_gov_ondemand);
  
  static int __init cpufreq_gov_dbs_init(void)
  {
+       int err;
+       cputime64_t wall;
+       u64 idle_time;
+       int cpu = get_cpu();
+
+       idle_time = get_cpu_idle_time_us(cpu, &wall);
+       put_cpu();
+       if (idle_time != -1ULL) {
+               /* Idle micro accounting is supported. Use finer thresholds */
+               dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
+               dbs_tuners_ins.down_differential =
+                                       MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
+       }
+
         kondemand_wq = create_workqueue("kondemand");
         if (!kondemand_wq) {
                 printk(KERN_ERR "Creation of kondemand failed\n");
                 return -EFAULT;
         }
-       return cpufreq_register_governor(&cpufreq_gov_ondemand);
+       err = cpufreq_register_governor(&cpufreq_gov_ondemand);
+       if (err)
+               destroy_workqueue(kondemand_wq);
+
+       return err;
  }
  
  static void __exit cpufreq_gov_dbs_exit(void)
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c

index e8e1451ef1c1036e30e8b6902ec312457b7f7b5c..7e2e515087f89aba2498508460d1125f2f9f9f68 100644 (file)
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -36,12 +36,14 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy,
         return 0;
  }
  
+#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE
+static
+#endif
  struct cpufreq_governor cpufreq_gov_performance = {
         .name           = "performance",
         .governor       = cpufreq_governor_performance,
         .owner          = THIS_MODULE,
  };
-EXPORT_SYMBOL(cpufreq_gov_performance);
  
  
  static int __init cpufreq_gov_performance_init(void)
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c

index 88d2f44fba480f7a66d00ed2fdeb571120926659..e6db5faf3eb112f118cf8ce920168b3635602b78 100644 (file)
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -35,12 +35,14 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
         return 0;
  }
  
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE
+static
+#endif
  struct cpufreq_governor cpufreq_gov_powersave = {
         .name           = "powersave",
         .governor       = cpufreq_governor_powersave,
         .owner          = THIS_MODULE,
  };
-EXPORT_SYMBOL(cpufreq_gov_powersave);
  
  static int __init cpufreq_gov_powersave_init(void)
  {
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c

index 32244aa7cc0c1d5b8511d3486a1c8de9b1a59f7c..1442bbada05303bae02adcd3026f57aed4d55a4d 100644 (file)
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -187,6 +187,9 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
  }
  
  
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
+static
+#endif
  struct cpufreq_governor cpufreq_gov_userspace = {
         .name           = "userspace",
         .governor       = cpufreq_governor_userspace,
@@ -194,7 +197,6 @@ struct cpufreq_governor cpufreq_gov_userspace = {
         .show_setspeed  = show_speed,
         .owner          = THIS_MODULE,
  };
-EXPORT_SYMBOL(cpufreq_gov_userspace);
  
  static int __init cpufreq_gov_userspace_init(void)
  {
diff --git a/include/linux/compiler.h b/include/linux/compiler.h

index c8bd2daf95ec51e0f42ad05e6b8c13be1300ce16..8322141ee480c802ee6919d0f13a86218e45bfc4 100644 (file)
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -190,7 +190,9 @@ extern void __chk_io_ptr(const volatile void __iomem *);
   * ACCESS_ONCE() in different C statements.
   *
   * This macro does absolutely -nothing- to prevent the CPU from reordering,
- * merging, or refetching absolutely anything at any time.
+ * merging, or refetching absolutely anything at any time.  Its main intended
+ * use is to mediate communication between process-level code and irq/NMI
+ * handlers, all running on the same CPU.
   */
  #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  
diff --git a/include/linux/completion.h b/include/linux/completion.h

index 02ef8835999cf666c88bb587e47ff7104c0275ac..4a6b604ef7e4ae323d9f71fe2569ef5ed318b70c 100644 (file)
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -10,6 +10,18 @@
  
  #include <linux/wait.h>
  
+/**
+ * struct completion - structure used to maintain state for a "completion"
+ *
+ * This is the opaque structure used to maintain the state for a "completion".
+ * Completions currently use a FIFO to queue threads that have to wait for
+ * the "completion" event.
+ *
+ * See also:  complete(), wait_for_completion() (and friends _timeout,
+ * _interruptible, _interruptible_timeout, and _killable), init_completion(),
+ * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and
+ * INIT_COMPLETION().
+ */
  struct completion {
         unsigned int done;
         wait_queue_head_t wait;
@@ -21,6 +33,14 @@ struct completion {
  #define COMPLETION_INITIALIZER_ONSTACK(work) \
         ({ init_completion(&work); work; })
  
+/**
+ * DECLARE_COMPLETION: - declare and initialize a completion structure
+ * @work:  identifier for the completion structure
+ *
+ * This macro declares and initializes a completion structure. Generally used
+ * for static declarations. You should use the _ONSTACK variant for automatic
+ * variables.
+ */
  #define DECLARE_COMPLETION(work) \
         struct completion work = COMPLETION_INITIALIZER(work)
  
@@ -29,6 +49,13 @@ struct completion {
   * completions - so we use the _ONSTACK() variant for those that
   * are on the kernel stack:
   */
+/**
+ * DECLARE_COMPLETION_ONSTACK: - declare and initialize a completion structure
+ * @work:  identifier for the completion structure
+ *
+ * This macro declares and initializes a completion structure on the kernel
+ * stack.
+ */
  #ifdef CONFIG_LOCKDEP
  # define DECLARE_COMPLETION_ONSTACK(work) \
         struct completion work = COMPLETION_INITIALIZER_ONSTACK(work)
@@ -36,6 +63,13 @@ struct completion {
  # define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
  #endif
  
+/**
+ * init_completion: - Initialize a dynamically allocated completion
+ * @x:  completion structure that is to be initialized
+ *
+ * This inline function will initialize a dynamically created completion
+ * structure.
+ */
  static inline void init_completion(struct completion *x)
  {
         x->done = 0;
@@ -55,6 +89,13 @@ extern bool completion_done(struct completion *x);
  extern void complete(struct completion *);
  extern void complete_all(struct completion *);
  
+/**
+ * INIT_COMPLETION: - reinitialize a completion structure
+ * @x:  completion structure to be reinitialized
+ *
+ * This macro should be used to reinitialize a completion structure so it can
+ * be reused. This is especially important after complete_all() is used.
+ */
  #define INIT_COMPLETION(x)     ((x).done = 0)
  
  
diff --git a/include/linux/cpu.h b/include/linux/cpu.h

index d7faf88084973c6a5bfbfca9fe717352b2e1e1a2..c2747ac2ae43b8a7b22bebdef63ee92cbcf1c31a 100644 (file)
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -69,6 +69,7 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
  #endif
  
  int cpu_up(unsigned int cpu);
+void notify_cpu_starting(unsigned int cpu);
  extern void cpu_hotplug_init(void);
  extern void cpu_maps_update_begin(void);
  extern void cpu_maps_update_done(void);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h

index 6fd5668aa572079f4cd81dc36326aa93a4faddde..1ee608fd7b77c35bee3639c449f6f6483de2bc7c 100644 (file)
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -187,7 +187,8 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
                                    unsigned int relation);
  
  
-extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy);
+extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy,
+                                  unsigned int cpu);
  
  int cpufreq_register_governor(struct cpufreq_governor *governor);
  void cpufreq_unregister_governor(struct cpufreq_governor *governor);
@@ -226,7 +227,9 @@ struct cpufreq_driver {
         unsigned int    (*get)  (unsigned int cpu);
  
         /* optional */
-       unsigned int (*getavg)  (unsigned int cpu);
+       unsigned int (*getavg)  (struct cpufreq_policy *policy,
+                                unsigned int cpu);
+
         int     (*exit)         (struct cpufreq_policy *policy);
         int     (*suspend)      (struct cpufreq_policy *policy, pm_message_t pmsg);
         int     (*resume)       (struct cpufreq_policy *policy);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h

index da2698b0fdd1d0989e5f1e0c2739262d002eb3df..b86fa2ffca0c3ca5f613b4b63b79a2260310b9d3 100644 (file)
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -213,9 +213,16 @@ static inline int notifier_to_errno(int ret)
  #define CPU_DOWN_FAILED                0x0006 /* CPU (unsigned)v NOT going down */
  #define CPU_DEAD               0x0007 /* CPU (unsigned)v dead */
  #define CPU_DYING              0x0008 /* CPU (unsigned)v not running any task,
-                                       * not handling interrupts, soon dead */
+                                       * not handling interrupts, soon dead.
+                                       * Called on the dying cpu, interrupts
+                                       * are already disabled. Must not
+                                       * sleep, must not fail */
  #define CPU_POST_DEAD          0x0009 /* CPU (unsigned)v dead, cpu_hotplug
                                         * lock is dropped */
+#define CPU_STARTING           0x000A /* CPU (unsigned)v soon running.
+                                       * Called on the new cpu, just before
+                                       * enabling interrupts. Must not sleep,
+                                       * must not fail */
  
  /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
   * operation in progress
@@ -229,6 +236,7 @@ static inline int notifier_to_errno(int ret)
  #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
  #define CPU_DEAD_FROZEN                (CPU_DEAD | CPU_TASKS_FROZEN)
  #define CPU_DYING_FROZEN       (CPU_DYING | CPU_TASKS_FROZEN)
+#define CPU_STARTING_FROZEN    (CPU_STARTING | CPU_TASKS_FROZEN)
  
  /* Hibernation and suspend events */
  #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */
diff --git a/include/linux/proportions.h b/include/linux/proportions.h

index 5afc1b23346d1f04536ea9339c1790b1cceeef3a..cf793bbbd05e18e6d87e98dd1e3e4f9df767d8d6 100644 (file)
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -104,8 +104,8 @@ struct prop_local_single {
          * snapshot of the last seen global state
          * and a lock protecting this state
          */
-       int shift;
         unsigned long period;
+       int shift;
         spinlock_t lock;                /* protect the snapshot state */
  };
  
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h

index 4ab8436227276322042c5b7374e6d0c561e80486..5f89b62e6983192befd7cc827df50fb53cd0a173 100644 (file)
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -40,12 +40,21 @@
  #include <linux/cpumask.h>
  #include <linux/seqlock.h>
  
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+#define RCU_SECONDS_TILL_STALL_CHECK   ( 3 * HZ) /* for rcp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
  
  /* Global control variables for rcupdate callback mechanism. */
  struct rcu_ctrlblk {
         long    cur;            /* Current batch number.                      */
         long    completed;      /* Number of the last completed batch         */
-       int     next_pending;   /* Is the next batch already waiting?         */
+       long    pending;        /* Number of the last pending batch           */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+       unsigned long gp_start; /* Time at which GP started in jiffies. */
+       unsigned long jiffies_stall;
+                               /* Time at which to check for CPU stalls. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
  
         int     signaled;
  
@@ -66,11 +75,7 @@ static inline int rcu_batch_after(long a, long b)
         return (a - b) > 0;
  }
  
-/*
- * Per-CPU data for Read-Copy UPdate.
- * nxtlist - new callbacks are added here
- * curlist - current batch for which quiescent cycle started if any
- */
+/* Per-CPU data for Read-Copy UPdate. */
  struct rcu_data {
         /* 1) quiescent state handling : */
         long            quiescbatch;     /* Batch # for grace period */
@@ -78,12 +83,24 @@ struct rcu_data {
         int             qs_pending;      /* core waits for quiesc state */
  
         /* 2) batch handling */
-       long            batch;           /* Batch # for current RCU batch */
+       /*
+        * if nxtlist is not NULL, then:
+        * batch:
+        *      The batch # for the last entry of nxtlist
+        * [*nxttail[1], NULL = *nxttail[2]):
+        *      Entries that batch # <= batch
+        * [*nxttail[0], *nxttail[1]):
+        *      Entries that batch # <= batch - 1
+        * [nxtlist, *nxttail[0]):
+        *      Entries that batch # <= batch - 2
+        *      The grace period for these entries has completed, and
+        *      the other grace-period-completed entries may be moved
+        *      here temporarily in rcu_process_callbacks().
+        */
+       long            batch;
         struct rcu_head *nxtlist;
-       struct rcu_head **nxttail;
+       struct rcu_head **nxttail[3];
         long            qlen;            /* # of queued callbacks */
-       struct rcu_head *curlist;
-       struct rcu_head **curtail;
         struct rcu_head *donelist;
         struct rcu_head **donetail;
         long            blimit;          /* Upper limit on a processed batch */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h

index eb4443c7e05be213f49596b81b50f209923ab4da..e649bd3f2c976c3f5bed58c067c351a336403e75 100644 (file)
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -198,20 +198,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
         at->prev = last;
  }
  
-/**
- * list_for_each_rcu   -       iterate over an rcu-protected list
- * @pos:       the &struct list_head to use as a loop cursor.
- * @head:      the head for your list.
- *
- * This list-traversal primitive may safely run concurrently with
- * the _rcu list-mutation primitives such as list_add_rcu()
- * as long as the traversal is guarded by rcu_read_lock().
- */
-#define list_for_each_rcu(pos, head) \
-       for (pos = rcu_dereference((head)->next); \
-               prefetch(pos->next), pos != (head); \
-               pos = rcu_dereference(pos->next))
-
  #define __list_for_each_rcu(pos, head) \
         for (pos = rcu_dereference((head)->next); \
                 pos != (head); \
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h

index e8b4039cfb2fddc536e74b86246919970597aacb..86f1f5e43e333766ec6a9fe5276875046c2f2526 100644 (file)
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -132,6 +132,26 @@ struct rcu_head {
   */
  #define rcu_read_unlock_bh() __rcu_read_unlock_bh()
  
+/**
+ * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section
+ *
+ * Should be used with either
+ * - synchronize_sched()
+ * or
+ * - call_rcu_sched() and rcu_barrier_sched()
+ * on the write-side to insure proper synchronization.
+ */
+#define rcu_read_lock_sched() preempt_disable()
+
+/*
+ * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
+ *
+ * See rcu_read_lock_sched for more information.
+ */
+#define rcu_read_unlock_sched() preempt_enable()
+
+
+
  /**
   * rcu_dereference - fetch an RCU-protected pointer in an
   * RCU read-side critical section.  This pointer may later
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h

index 0967f03b07056928c4176826d4bf216f329cbde9..3e05c09b54a22408db83e0f0a87a5a8bf9a40e8f 100644 (file)
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -57,7 +57,13 @@ static inline void rcu_qsctr_inc(int cpu)
         rdssp->sched_qs++;
  }
  #define rcu_bh_qsctr_inc(cpu)
-#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
+
+/*
+ * Someone might want to pass call_rcu_bh as a function pointer.
+ * So this needs to just be a rename and not a macro function.
+ *  (no parentheses)
+ */
+#define call_rcu_bh            call_rcu
  
  /**
   * call_rcu_sched - Queue RCU callback for invocation after sched grace period.
@@ -111,7 +117,6 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
  struct softirq_action;
  
  #ifdef CONFIG_NO_HZ
-DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
  
  static inline void rcu_enter_nohz(void)
  {
@@ -126,8 +131,8 @@ static inline void rcu_exit_nohz(void)
  {
         static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
  
-       smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
         __get_cpu_var(rcu_dyntick_sched).dynticks++;
+       smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
         WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
                                 &rs);
  }
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 3d9120c5ad1589a0da722e514c370c0a3f1c4fe4..5d0819ee442a471e840b37a5ea1c35ee13030d25 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -451,8 +451,8 @@ struct signal_struct {
          * - everyone except group_exit_task is stopped during signal delivery
          *   of fatal signals, group_exit_task processes the signal.
          */
-       struct task_struct      *group_exit_task;
         int                     notify_count;
+       struct task_struct      *group_exit_task;
  
         /* thread group stop support, overloads group_exit_code too */
         int                     group_stop_count;
@@ -824,6 +824,9 @@ struct sched_domain {
         unsigned int ttwu_move_affine;
         unsigned int ttwu_move_balance;
  #endif
+#ifdef CONFIG_SCHED_DEBUG
+       char *name;
+#endif
  };
  
  extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
@@ -897,7 +900,7 @@ struct sched_class {
         void (*yield_task) (struct rq *rq);
         int  (*select_task_rq)(struct task_struct *p, int sync);
  
-       void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
+       void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
  
         struct task_struct * (*pick_next_task) (struct rq *rq);
         void (*put_prev_task) (struct rq *rq, struct task_struct *p);
@@ -1010,8 +1013,8 @@ struct sched_entity {
  
  struct sched_rt_entity {
         struct list_head run_list;
-       unsigned int time_slice;
         unsigned long timeout;
+       unsigned int time_slice;
         int nr_cpus_allowed;
  
         struct sched_rt_entity *back;
diff --git a/include/linux/security.h b/include/linux/security.h

index 80c4d002864cb2f3e9db8ebb575de2baeafe47c8..f5c4a51eb42ea9ec97f460e14a11c24850de82be 100644 (file)
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1560,11 +1560,6 @@ struct security_operations {
  extern int security_init(void);
  extern int security_module_enable(struct security_operations *ops);
  extern int register_security(struct security_operations *ops);
-extern struct dentry *securityfs_create_file(const char *name, mode_t mode,
-                                            struct dentry *parent, void *data,
-                                            const struct file_operations *fops);
-extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent);
-extern void securityfs_remove(struct dentry *dentry);
  
  /* Security operations */
  int security_ptrace_may_access(struct task_struct *child, unsigned int mode);
@@ -2424,25 +2419,6 @@ static inline int security_netlink_recv(struct sk_buff *skb, int cap)
         return cap_netlink_recv(skb, cap);
  }
  
-static inline struct dentry *securityfs_create_dir(const char *name,
-                                       struct dentry *parent)
-{
-       return ERR_PTR(-ENODEV);
-}
-
-static inline struct dentry *securityfs_create_file(const char *name,
-                                               mode_t mode,
-                                               struct dentry *parent,
-                                               void *data,
-                                               const struct file_operations *fops)
-{
-       return ERR_PTR(-ENODEV);
-}
-
-static inline void securityfs_remove(struct dentry *dentry)
-{
-}
-
  static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
  {
         return -EOPNOTSUPP;
@@ -2806,5 +2782,35 @@ static inline void security_audit_rule_free(void *lsmrule)
  #endif /* CONFIG_SECURITY */
  #endif /* CONFIG_AUDIT */
  
+#ifdef CONFIG_SECURITYFS
+
+extern struct dentry *securityfs_create_file(const char *name, mode_t mode,
+                                            struct dentry *parent, void *data,
+                                            const struct file_operations *fops);
+extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent);
+extern void securityfs_remove(struct dentry *dentry);
+
+#else /* CONFIG_SECURITYFS */
+
+static inline struct dentry *securityfs_create_dir(const char *name,
+                                                  struct dentry *parent)
+{
+       return ERR_PTR(-ENODEV);
+}
+
+static inline struct dentry *securityfs_create_file(const char *name,
+                                                   mode_t mode,
+                                                   struct dentry *parent,
+                                                   void *data,
+                                                   const struct file_operations *fops)
+{
+       return ERR_PTR(-ENODEV);
+}
+
+static inline void securityfs_remove(struct dentry *dentry)
+{}
+
+#endif
+
  #endif /* ! __LINUX_SECURITY_H */
  
diff --git a/include/linux/tick.h b/include/linux/tick.h

index 8cf8cfe2cc9712a176d7d4904107f4d30e74293f..98921a3e1aa8db33b7e743e6711abb671485ea12 100644 (file)
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -126,7 +126,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void)
         return len;
  }
  static inline void tick_nohz_stop_idle(int cpu) { }
-static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; }
+static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
  # endif /* !NO_HZ */
  
  #endif
diff --git a/kernel/cpu.c b/kernel/cpu.c

index f17e9854c24612e1e3f83b389f48224eb2d7b807..86d49045daed050ee3e1c3d18a8d80d8b1793b85 100644 (file)
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -199,13 +199,14 @@ static int __ref take_cpu_down(void *_param)
         struct take_cpu_down_param *param = _param;
         int err;
  
-       raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
-                               param->hcpu);
         /* Ensure this CPU doesn't handle any more interrupts. */
         err = __cpu_disable();
         if (err < 0)
                 return err;
  
+       raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+                               param->hcpu);
+
         /* Force idle task to run as soon as we yield: it should
            immediately notice cpu is offline and die quickly. */
         sched_idle_next();
@@ -453,6 +454,25 @@ out:
  }
  #endif /* CONFIG_PM_SLEEP_SMP */
  
+/**
+ * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ * @cpu: cpu that just started
+ *
+ * This function calls the cpu_chain notifiers with CPU_STARTING.
+ * It must be called by the arch code on the new cpu, before the new cpu
+ * enables interrupts and before the "boot" cpu returns from __cpu_up().
+ */
+void notify_cpu_starting(unsigned int cpu)
+{
+       unsigned long val = CPU_STARTING;
+
+#ifdef CONFIG_PM_SLEEP_SMP
+       if (cpu_isset(cpu, frozen_cpus))
+               val = CPU_STARTING_FROZEN;
+#endif /* CONFIG_PM_SLEEP_SMP */
+       raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
+}
+
  #endif /* CONFIG_SMP */
  
  /*
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index 827cd9adccb272f3d54a0ea8a98ba8c028cde211..eab7bd6628e0ad48ef5f1d4fcea2af05f173bae5 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1921,7 +1921,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
   * that has tasks along with an empty 'mems'.  But if we did see such
   * a cpuset, we'd handle it just like we do if its 'cpus' was empty.
   */
-static void scan_for_empty_cpusets(const struct cpuset *root)
+static void scan_for_empty_cpusets(struct cpuset *root)
  {
         LIST_HEAD(queue);
         struct cpuset *cp;      /* scans cpusets being updated */
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c

index aad93cdc9f68656b95eb496003c593d5d03beed4..37f72e551542234d2d7d905741e8a5f6fb7532ec 100644 (file)
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -47,6 +47,7 @@
  #include <linux/notifier.h>
  #include <linux/cpu.h>
  #include <linux/mutex.h>
+#include <linux/time.h>
  
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  static struct lock_class_key rcu_lock_key;
@@ -60,12 +61,14 @@ EXPORT_SYMBOL_GPL(rcu_lock_map);
  static struct rcu_ctrlblk rcu_ctrlblk = {
         .cur = -300,
         .completed = -300,
+       .pending = -300,
         .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
         .cpumask = CPU_MASK_NONE,
  };
  static struct rcu_ctrlblk rcu_bh_ctrlblk = {
         .cur = -300,
         .completed = -300,
+       .pending = -300,
         .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
         .cpumask = CPU_MASK_NONE,
  };
@@ -83,7 +86,10 @@ static void force_quiescent_state(struct rcu_data *rdp,
  {
         int cpu;
         cpumask_t cpumask;
+       unsigned long flags;
+
         set_need_resched();
+       spin_lock_irqsave(&rcp->lock, flags);
         if (unlikely(!rcp->signaled)) {
                 rcp->signaled = 1;
                 /*
@@ -109,6 +115,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
                 for_each_cpu_mask_nr(cpu, cpumask)
                         smp_send_reschedule(cpu);
         }
+       spin_unlock_irqrestore(&rcp->lock, flags);
  }
  #else
  static inline void force_quiescent_state(struct rcu_data *rdp,
@@ -118,6 +125,126 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
  }
  #endif
  
+static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
+               struct rcu_data *rdp)
+{
+       long batch;
+
+       head->next = NULL;
+       smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
+
+       /*
+        * Determine the batch number of this callback.
+        *
+        * Using ACCESS_ONCE to avoid the following error when gcc eliminates
+        * local variable "batch" and emits codes like this:
+        *      1) rdp->batch = rcp->cur + 1 # gets old value
+        *      ......
+        *      2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value
+        * then [*nxttail[0], *nxttail[1]) may contain callbacks
+        * that batch# = rdp->batch, see the comment of struct rcu_data.
+        */
+       batch = ACCESS_ONCE(rcp->cur) + 1;
+
+       if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) {
+               /* process callbacks */
+               rdp->nxttail[0] = rdp->nxttail[1];
+               rdp->nxttail[1] = rdp->nxttail[2];
+               if (rcu_batch_after(batch - 1, rdp->batch))
+                       rdp->nxttail[0] = rdp->nxttail[2];
+       }
+
+       rdp->batch = batch;
+       *rdp->nxttail[2] = head;
+       rdp->nxttail[2] = &head->next;
+
+       if (unlikely(++rdp->qlen > qhimark)) {
+               rdp->blimit = INT_MAX;
+               force_quiescent_state(rdp, &rcu_ctrlblk);
+       }
+}
+
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+       rcp->gp_start = jiffies;
+       rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       int cpu;
+       long delta;
+       unsigned long flags;
+
+       /* Only let one CPU complain about others per time interval. */
+
+       spin_lock_irqsave(&rcp->lock, flags);
+       delta = jiffies - rcp->jiffies_stall;
+       if (delta < 2 || rcp->cur != rcp->completed) {
+               spin_unlock_irqrestore(&rcp->lock, flags);
+               return;
+       }
+       rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+       spin_unlock_irqrestore(&rcp->lock, flags);
+
+       /* OK, time to rat on our buddy... */
+
+       printk(KERN_ERR "RCU detected CPU stalls:");
+       for_each_possible_cpu(cpu) {
+               if (cpu_isset(cpu, rcp->cpumask))
+                       printk(" %d", cpu);
+       }
+       printk(" (detected by %d, t=%ld jiffies)\n",
+              smp_processor_id(), (long)(jiffies - rcp->gp_start));
+}
+
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       unsigned long flags;
+
+       printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+                       smp_processor_id(), jiffies,
+                       jiffies - rcp->gp_start);
+       dump_stack();
+       spin_lock_irqsave(&rcp->lock, flags);
+       if ((long)(jiffies - rcp->jiffies_stall) >= 0)
+               rcp->jiffies_stall =
+                       jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+       spin_unlock_irqrestore(&rcp->lock, flags);
+       set_need_resched();  /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       long delta;
+
+       delta = jiffies - rcp->jiffies_stall;
+       if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
+
+               /* We haven't checked in, so go dump stack. */
+               print_cpu_stall(rcp);
+
+       } else if (rcp->cur != rcp->completed && delta >= 2) {
+
+               /* They had two seconds to dump stack, so complain. */
+               print_other_cpu_stall(rcp);
+       }
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
  /**
   * call_rcu - Queue an RCU callback for invocation after a grace period.
   * @head: structure to be used for queueing the RCU updates.
@@ -133,18 +260,10 @@ void call_rcu(struct rcu_head *head,
                                 void (*func)(struct rcu_head *rcu))
  {
         unsigned long flags;
-       struct rcu_data *rdp;
  
         head->func = func;
-       head->next = NULL;
         local_irq_save(flags);
-       rdp = &__get_cpu_var(rcu_data);
-       *rdp->nxttail = head;
-       rdp->nxttail = &head->next;
-       if (unlikely(++rdp->qlen > qhimark)) {
-               rdp->blimit = INT_MAX;
-               force_quiescent_state(rdp, &rcu_ctrlblk);
-       }
+       __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
         local_irq_restore(flags);
  }
  EXPORT_SYMBOL_GPL(call_rcu);
@@ -169,20 +288,10 @@ void call_rcu_bh(struct rcu_head *head,
                                 void (*func)(struct rcu_head *rcu))
  {
         unsigned long flags;
-       struct rcu_data *rdp;
  
         head->func = func;
-       head->next = NULL;
         local_irq_save(flags);
-       rdp = &__get_cpu_var(rcu_bh_data);
-       *rdp->nxttail = head;
-       rdp->nxttail = &head->next;
-
-       if (unlikely(++rdp->qlen > qhimark)) {
-               rdp->blimit = INT_MAX;
-               force_quiescent_state(rdp, &rcu_bh_ctrlblk);
-       }
-
+       __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
         local_irq_restore(flags);
  }
  EXPORT_SYMBOL_GPL(call_rcu_bh);
@@ -211,12 +320,6 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
  static inline void raise_rcu_softirq(void)
  {
         raise_softirq(RCU_SOFTIRQ);
-       /*
-        * The smp_mb() here is required to ensure that this cpu's
-        * __rcu_process_callbacks() reads the most recently updated
-        * value of rcu->cur.
-        */
-       smp_mb();
  }
  
  /*
@@ -225,6 +328,7 @@ static inline void raise_rcu_softirq(void)
   */
  static void rcu_do_batch(struct rcu_data *rdp)
  {
+       unsigned long flags;
         struct rcu_head *next, *list;
         int count = 0;
  
@@ -239,9 +343,9 @@ static void rcu_do_batch(struct rcu_data *rdp)
         }
         rdp->donelist = list;
  
-       local_irq_disable();
+       local_irq_save(flags);
         rdp->qlen -= count;
-       local_irq_enable();
+       local_irq_restore(flags);
         if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
                 rdp->blimit = blimit;
  
@@ -269,6 +373,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
   *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
   *   period (if necessary).
   */
+
  /*
   * Register a new batch of callbacks, and start it up if there is currently no
   * active batch and the batch to be registered has not already occurred.
@@ -276,15 +381,10 @@ static void rcu_do_batch(struct rcu_data *rdp)
   */
  static void rcu_start_batch(struct rcu_ctrlblk *rcp)
  {
-       if (rcp->next_pending &&
+       if (rcp->cur != rcp->pending &&
                         rcp->completed == rcp->cur) {
-               rcp->next_pending = 0;
-               /*
-                * next_pending == 0 must be visible in
-                * __rcu_process_callbacks() before it can see new value of cur.
-                */
-               smp_wmb();
                 rcp->cur++;
+               record_gp_stall_check_time(rcp);
  
                 /*
                  * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -322,6 +422,8 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
  static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
                                         struct rcu_data *rdp)
  {
+       unsigned long flags;
+
         if (rdp->quiescbatch != rcp->cur) {
                 /* start new grace period: */
                 rdp->qs_pending = 1;
@@ -345,7 +447,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
                 return;
         rdp->qs_pending = 0;
  
-       spin_lock(&rcp->lock);
+       spin_lock_irqsave(&rcp->lock, flags);
         /*
          * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
          * during cpu startup. Ignore the quiescent state.
@@ -353,7 +455,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
         if (likely(rdp->quiescbatch == rcp->cur))
                 cpu_quiet(rdp->cpu, rcp);
  
-       spin_unlock(&rcp->lock);
+       spin_unlock_irqrestore(&rcp->lock, flags);
  }
  
  
@@ -364,33 +466,38 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
   * which is dead and hence not processing interrupts.
   */
  static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
-                               struct rcu_head **tail)
+                               struct rcu_head **tail, long batch)
  {
-       local_irq_disable();
-       *this_rdp->nxttail = list;
-       if (list)
-               this_rdp->nxttail = tail;
-       local_irq_enable();
+       unsigned long flags;
+
+       if (list) {
+               local_irq_save(flags);
+               this_rdp->batch = batch;
+               *this_rdp->nxttail[2] = list;
+               this_rdp->nxttail[2] = tail;
+               local_irq_restore(flags);
+       }
  }
  
  static void __rcu_offline_cpu(struct rcu_data *this_rdp,
                                 struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
  {
-       /* if the cpu going offline owns the grace period
+       unsigned long flags;
+
+       /*
+        * if the cpu going offline owns the grace period
          * we can block indefinitely waiting for it, so flush
          * it here
          */
-       spin_lock_bh(&rcp->lock);
+       spin_lock_irqsave(&rcp->lock, flags);
         if (rcp->cur != rcp->completed)
                 cpu_quiet(rdp->cpu, rcp);
-       spin_unlock_bh(&rcp->lock);
-       rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
-       rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
-       rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
+       rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
+       rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
+       spin_unlock(&rcp->lock);
  
-       local_irq_disable();
         this_rdp->qlen += rdp->qlen;
-       local_irq_enable();
+       local_irq_restore(flags);
  }
  
  static void rcu_offline_cpu(int cpu)
@@ -420,38 +527,52 @@ static void rcu_offline_cpu(int cpu)
  static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
                                         struct rcu_data *rdp)
  {
-       if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
-               *rdp->donetail = rdp->curlist;
-               rdp->donetail = rdp->curtail;
-               rdp->curlist = NULL;
-               rdp->curtail = &rdp->curlist;
-       }
+       unsigned long flags;
+       long completed_snap;
  
-       if (rdp->nxtlist && !rdp->curlist) {
-               local_irq_disable();
-               rdp->curlist = rdp->nxtlist;
-               rdp->curtail = rdp->nxttail;
-               rdp->nxtlist = NULL;
-               rdp->nxttail = &rdp->nxtlist;
-               local_irq_enable();
+       if (rdp->nxtlist) {
+               local_irq_save(flags);
+               completed_snap = ACCESS_ONCE(rcp->completed);
  
                 /*
-                * start the next batch of callbacks
+                * move the other grace-period-completed entries to
+                * [rdp->nxtlist, *rdp->nxttail[0]) temporarily
                  */
+               if (!rcu_batch_before(completed_snap, rdp->batch))
+                       rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
+               else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
+                       rdp->nxttail[0] = rdp->nxttail[1];
  
-               /* determine batch number */
-               rdp->batch = rcp->cur + 1;
-               /* see the comment and corresponding wmb() in
-                * the rcu_start_batch()
+               /*
+                * the grace period for entries in
+                * [rdp->nxtlist, *rdp->nxttail[0]) has completed and
+                * move these entries to donelist
                  */
-               smp_rmb();
+               if (rdp->nxttail[0] != &rdp->nxtlist) {
+                       *rdp->donetail = rdp->nxtlist;
+                       rdp->donetail = rdp->nxttail[0];
+                       rdp->nxtlist = *rdp->nxttail[0];
+                       *rdp->donetail = NULL;
+
+                       if (rdp->nxttail[1] == rdp->nxttail[0])
+                               rdp->nxttail[1] = &rdp->nxtlist;
+                       if (rdp->nxttail[2] == rdp->nxttail[0])
+                               rdp->nxttail[2] = &rdp->nxtlist;
+                       rdp->nxttail[0] = &rdp->nxtlist;
+               }
+
+               local_irq_restore(flags);
+
+               if (rcu_batch_after(rdp->batch, rcp->pending)) {
+                       unsigned long flags2;
  
-               if (!rcp->next_pending) {
                         /* and start it/schedule start if it's a new batch */
-                       spin_lock(&rcp->lock);
-                       rcp->next_pending = 1;
-                       rcu_start_batch(rcp);
-                       spin_unlock(&rcp->lock);
+                       spin_lock_irqsave(&rcp->lock, flags2);
+                       if (rcu_batch_after(rdp->batch, rcp->pending)) {
+                               rcp->pending = rdp->batch;
+                               rcu_start_batch(rcp);
+                       }
+                       spin_unlock_irqrestore(&rcp->lock, flags2);
                 }
         }
  
@@ -462,21 +583,53 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
  
  static void rcu_process_callbacks(struct softirq_action *unused)
  {
+       /*
+        * Memory references from any prior RCU read-side critical sections
+        * executed by the interrupted code must be see before any RCU
+        * grace-period manupulations below.
+        */
+
+       smp_mb(); /* See above block comment. */
+
         __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
         __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
+
+       /*
+        * Memory references from any later RCU read-side critical sections
+        * executed by the interrupted code must be see after any RCU
+        * grace-period manupulations above.
+        */
+
+       smp_mb(); /* See above block comment. */
  }
  
  static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
  {
-       /* This cpu has pending rcu entries and the grace period
-        * for them has completed.
-        */
-       if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
-               return 1;
+       /* Check for CPU stalls, if enabled. */
+       check_cpu_stall(rcp);
  
-       /* This cpu has no pending entries, but there are new entries */
-       if (!rdp->curlist && rdp->nxtlist)
-               return 1;
+       if (rdp->nxtlist) {
+               long completed_snap = ACCESS_ONCE(rcp->completed);
+
+               /*
+                * This cpu has pending rcu entries and the grace period
+                * for them has completed.
+                */
+               if (!rcu_batch_before(completed_snap, rdp->batch))
+                       return 1;
+               if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
+                               rdp->nxttail[0] != rdp->nxttail[1])
+                       return 1;
+               if (rdp->nxttail[0] != &rdp->nxtlist)
+                       return 1;
+
+               /*
+                * This cpu has pending rcu entries and the new batch
+                * for then hasn't been started nor scheduled start
+                */
+               if (rcu_batch_after(rdp->batch, rcp->pending))
+                       return 1;
+       }
  
         /* This cpu has finished callbacks to invoke */
         if (rdp->donelist)
@@ -512,9 +665,15 @@ int rcu_needs_cpu(int cpu)
         struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
         struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
  
-       return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
+       return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
  }
  
+/*
+ * Top-level function driving RCU grace-period detection, normally
+ * invoked from the scheduler-clock interrupt.  This function simply
+ * increments counters that are read only from softirq by this same
+ * CPU, so there are no memory barriers required.
+ */
  void rcu_check_callbacks(int cpu, int user)
  {
         if (user ||
@@ -558,14 +717,17 @@ void rcu_check_callbacks(int cpu, int user)
  static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
                                                 struct rcu_data *rdp)
  {
+       unsigned long flags;
+
+       spin_lock_irqsave(&rcp->lock, flags);
         memset(rdp, 0, sizeof(*rdp));
-       rdp->curtail = &rdp->curlist;
-       rdp->nxttail = &rdp->nxtlist;
+       rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
         rdp->donetail = &rdp->donelist;
         rdp->quiescbatch = rcp->completed;
         rdp->qs_pending = 0;
         rdp->cpu = cpu;
         rdp->blimit = blimit;
+       spin_unlock_irqrestore(&rcp->lock, flags);
  }
  
  static void __cpuinit rcu_online_cpu(int cpu)
@@ -610,6 +772,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
   */
  void __init __rcu_init(void)
  {
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+       printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
         rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
                         (void *)(long)smp_processor_id());
         /* Register notifier for non-boot CPUs */
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c

index 27827931ca0dd6ca905040955616c970b2e7539d..ca4bbbe04aa4db9d150a503c9025dbe83e0d62a3 100644 (file)
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -58,14 +58,6 @@
  #include <linux/cpumask.h>
  #include <linux/rcupreempt_trace.h>
  
-/*
- * Macro that prevents the compiler from reordering accesses, but does
- * absolutely -nothing- to prevent CPUs from reordering.  This is used
- * only to mediate communication between mainline code and hardware
- * interrupt and NMI handlers.
- */
-#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
-
  /*
   * PREEMPT_RCU data structures.
   */
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c

index 5edf82c34bbceab891ecbc6e23d576a03c5763e8..35c2d3360ecf750be63922a89101f6cfac249107 100644 (file)
--- a/kernel/rcupreempt_trace.c
+++ b/kernel/rcupreempt_trace.c
@@ -308,11 +308,16 @@ out:
  
  static int __init rcupreempt_trace_init(void)
  {
+       int ret;
+
         mutex_init(&rcupreempt_trace_mutex);
         rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
         if (!rcupreempt_trace_buf)
                 return 1;
-       return rcupreempt_debugfs_init();
+       ret = rcupreempt_debugfs_init();
+       if (ret)
+               kfree(rcupreempt_trace_buf);
+       return ret;
  }
  
  static void __exit rcupreempt_trace_cleanup(void)
diff --git a/kernel/sched.c b/kernel/sched.c

index ad1962dc0aa20cb865675e8e08e49f5b49bce1df..6f230596bd0c1d21a2c68ffbff8207e93dcd65b5 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -204,11 +204,16 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
         rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
  }
  
+static inline int rt_bandwidth_enabled(void)
+{
+       return sysctl_sched_rt_runtime >= 0;
+}
+
  static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
  {
         ktime_t now;
  
-       if (rt_b->rt_runtime == RUNTIME_INF)
+       if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
                 return;
  
         if (hrtimer_active(&rt_b->rt_period_timer))
@@ -298,9 +303,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
  static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
  static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
  #endif /* CONFIG_RT_GROUP_SCHED */
-#else /* !CONFIG_FAIR_GROUP_SCHED */
+#else /* !CONFIG_USER_SCHED */
  #define root_task_group init_task_group
-#endif /* CONFIG_FAIR_GROUP_SCHED */
+#endif /* CONFIG_USER_SCHED */
  
  /* task_group_lock serializes add/remove of task groups and also changes to
   * a task group's cpu shares.
@@ -604,9 +609,9 @@ struct rq {
  
  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
  
-static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
  {
-       rq->curr->sched_class->check_preempt_curr(rq, p);
+       rq->curr->sched_class->check_preempt_curr(rq, p, sync);
  }
  
  static inline int cpu_of(struct rq *rq)
@@ -1102,7 +1107,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
         hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
  }
  
-static void init_hrtick(void)
+static inline void init_hrtick(void)
  {
  }
  #endif /* CONFIG_SMP */
@@ -1121,7 +1126,7 @@ static void init_rq_hrtick(struct rq *rq)
         rq->hrtick_timer.function = hrtick;
         rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
  }
-#else
+#else  /* CONFIG_SCHED_HRTICK */
  static inline void hrtick_clear(struct rq *rq)
  {
  }
@@ -1133,7 +1138,7 @@ static inline void init_rq_hrtick(struct rq *rq)
  static inline void init_hrtick(void)
  {
  }
-#endif
+#endif /* CONFIG_SCHED_HRTICK */
  
  /*
   * resched_task - mark a task 'to be rescheduled now'.
@@ -1380,38 +1385,24 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
         update_load_sub(&rq->load, load);
  }
  
-#ifdef CONFIG_SMP
-static unsigned long source_load(int cpu, int type);
-static unsigned long target_load(int cpu, int type);
-static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
-
-static unsigned long cpu_avg_load_per_task(int cpu)
-{
-       struct rq *rq = cpu_rq(cpu);
-
-       if (rq->nr_running)
-               rq->avg_load_per_task = rq->load.weight / rq->nr_running;
-
-       return rq->avg_load_per_task;
-}
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-
-typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
+#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
+typedef int (*tg_visitor)(struct task_group *, void *);
  
  /*
   * Iterate the full tree, calling @down when first entering a node and @up when
   * leaving it for the final time.
   */
-static void
-walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
+static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
  {
         struct task_group *parent, *child;
+       int ret;
  
         rcu_read_lock();
         parent = &root_task_group;
  down:
-       (*down)(parent, cpu, sd);
+       ret = (*down)(parent, data);
+       if (ret)
+               goto out_unlock;
         list_for_each_entry_rcu(child, &parent->children, siblings) {
                 parent = child;
                 goto down;
@@ -1419,15 +1410,43 @@ down:
  up:
                 continue;
         }
-       (*up)(parent, cpu, sd);
+       ret = (*up)(parent, data);
+       if (ret)
+               goto out_unlock;
  
         child = parent;
         parent = parent->parent;
         if (parent)
                 goto up;
+out_unlock:
         rcu_read_unlock();
+
+       return ret;
  }
  
+static int tg_nop(struct task_group *tg, void *data)
+{
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_SMP
+static unsigned long source_load(int cpu, int type);
+static unsigned long target_load(int cpu, int type);
+static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
+
+static unsigned long cpu_avg_load_per_task(int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+
+       if (rq->nr_running)
+               rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+
+       return rq->avg_load_per_task;
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
  static void __set_se_shares(struct sched_entity *se, unsigned long shares);
  
  /*
@@ -1486,11 +1505,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
   * This needs to be done in a bottom-up fashion because the rq weight of a
   * parent group depends on the shares of its child groups.
   */
-static void
-tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
+static int tg_shares_up(struct task_group *tg, void *data)
  {
         unsigned long rq_weight = 0;
         unsigned long shares = 0;
+       struct sched_domain *sd = data;
         int i;
  
         for_each_cpu_mask(i, sd->span) {
@@ -1515,6 +1534,8 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
                 __update_group_shares_cpu(tg, i, shares, rq_weight);
                 spin_unlock_irqrestore(&rq->lock, flags);
         }
+
+       return 0;
  }
  
  /*
@@ -1522,10 +1543,10 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
   * This needs to be done in a top-down fashion because the load of a child
   * group is a fraction of its parents load.
   */
-static void
-tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
+static int tg_load_down(struct task_group *tg, void *data)
  {
         unsigned long load;
+       long cpu = (long)data;
  
         if (!tg->parent) {
                 load = cpu_rq(cpu)->load.weight;
@@ -1536,11 +1557,8 @@ tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
         }
  
         tg->cfs_rq[cpu]->h_load = load;
-}
  
-static void
-tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
-{
+       return 0;
  }
  
  static void update_shares(struct sched_domain *sd)
@@ -1550,7 +1568,7 @@ static void update_shares(struct sched_domain *sd)
  
         if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
                 sd->last_update = now;
-               walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
+               walk_tg_tree(tg_nop, tg_shares_up, sd);
         }
  }
  
@@ -1561,9 +1579,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
         spin_lock(&rq->lock);
  }
  
-static void update_h_load(int cpu)
+static void update_h_load(long cpu)
  {
-       walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
+       walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
  }
  
  #else
@@ -1921,11 +1939,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                 running = task_running(rq, p);
                 on_rq = p->se.on_rq;
                 ncsw = 0;
-               if (!match_state || p->state == match_state) {
-                       ncsw = p->nivcsw + p->nvcsw;
-                       if (unlikely(!ncsw))
-                               ncsw = 1;
-               }
+               if (!match_state || p->state == match_state)
+                       ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
                 task_rq_unlock(rq, &flags);
  
                 /*
@@ -2285,7 +2300,7 @@ out_running:
         trace_mark(kernel_sched_wakeup,
                 "pid %d state %ld ## rq %p task %p rq->curr %p",
                 p->pid, p->state, rq, p, rq->curr);
-       check_preempt_curr(rq, p);
+       check_preempt_curr(rq, p, sync);
  
         p->state = TASK_RUNNING;
  #ifdef CONFIG_SMP
@@ -2420,7 +2435,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
         trace_mark(kernel_sched_wakeup_new,
                 "pid %d state %ld ## rq %p task %p rq->curr %p",
                 p->pid, p->state, rq, p, rq->curr);
-       check_preempt_curr(rq, p);
+       check_preempt_curr(rq, p, 0);
  #ifdef CONFIG_SMP
         if (p->sched_class->task_wake_up)
                 p->sched_class->task_wake_up(rq, p);
@@ -2880,7 +2895,7 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
          * Note that idle threads have a prio of MAX_PRIO, for this test
          * to be always true for them.
          */
-       check_preempt_curr(this_rq, p);
+       check_preempt_curr(this_rq, p, 0);
  }
  
  /*
@@ -4627,6 +4642,15 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
  }
  EXPORT_SYMBOL_GPL(__wake_up_sync);     /* For internal use only */
  
+/**
+ * complete: - signals a single thread waiting on this completion
+ * @x:  holds the state of this particular completion
+ *
+ * This will wake up a single thread waiting on this completion. Threads will be
+ * awakened in the same order in which they were queued.
+ *
+ * See also complete_all(), wait_for_completion() and related routines.
+ */
  void complete(struct completion *x)
  {
         unsigned long flags;
@@ -4638,6 +4662,12 @@ void complete(struct completion *x)
  }
  EXPORT_SYMBOL(complete);
  
+/**
+ * complete_all: - signals all threads waiting on this completion
+ * @x:  holds the state of this particular completion
+ *
+ * This will wake up all threads waiting on this particular completion event.
+ */
  void complete_all(struct completion *x)
  {
         unsigned long flags;
@@ -4658,10 +4688,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
                 wait.flags |= WQ_FLAG_EXCLUSIVE;
                 __add_wait_queue_tail(&x->wait, &wait);
                 do {
-                       if ((state == TASK_INTERRUPTIBLE &&
-                            signal_pending(current)) ||
-                           (state == TASK_KILLABLE &&
-                            fatal_signal_pending(current))) {
+                       if (signal_pending_state(state, current)) {
                                 timeout = -ERESTARTSYS;
                                 break;
                         }
@@ -4689,12 +4716,31 @@ wait_for_common(struct completion *x, long timeout, int state)
         return timeout;
  }
  
+/**
+ * wait_for_completion: - waits for completion of a task
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It is NOT
+ * interruptible and there is no timeout.
+ *
+ * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
+ * and interrupt capability. Also see complete().
+ */
  void __sched wait_for_completion(struct completion *x)
  {
         wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
  }
  EXPORT_SYMBOL(wait_for_completion);
  
+/**
+ * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. The timeout is in jiffies. It is not
+ * interruptible.
+ */
  unsigned long __sched
  wait_for_completion_timeout(struct completion *x, unsigned long timeout)
  {
@@ -4702,6 +4748,13 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
  }
  EXPORT_SYMBOL(wait_for_completion_timeout);
  
+/**
+ * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
+ * @x:  holds the state of this particular completion
+ *
+ * This waits for completion of a specific task to be signaled. It is
+ * interruptible.
+ */
  int __sched wait_for_completion_interruptible(struct completion *x)
  {
         long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
@@ -4711,6 +4764,14 @@ int __sched wait_for_completion_interruptible(struct completion *x)
  }
  EXPORT_SYMBOL(wait_for_completion_interruptible);
  
+/**
+ * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. It is interruptible. The timeout is in jiffies.
+ */
  unsigned long __sched
  wait_for_completion_interruptible_timeout(struct completion *x,
                                           unsigned long timeout)
@@ -4719,6 +4780,13 @@ wait_for_completion_interruptible_timeout(struct completion *x,
  }
  EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
  
+/**
+ * wait_for_completion_killable: - waits for completion of a task (killable)
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It can be
+ * interrupted by a kill signal.
+ */
  int __sched wait_for_completion_killable(struct completion *x)
  {
         long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
@@ -5121,7 +5189,8 @@ recheck:
                  * Do not allow realtime tasks into groups that have no runtime
                  * assigned.
                  */
-               if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+               if (rt_bandwidth_enabled() && rt_policy(policy) &&
+                               task_group(p)->rt_bandwidth.rt_runtime == 0)
                         return -EPERM;
  #endif
  
@@ -5957,7 +6026,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
         set_task_cpu(p, dest_cpu);
         if (on_rq) {
                 activate_task(rq_dest, p, 0);
-               check_preempt_curr(rq_dest, p);
+               check_preempt_curr(rq_dest, p, 0);
         }
  done:
         ret = 1;
@@ -6282,7 +6351,7 @@ set_table_entry(struct ctl_table *entry,
  static struct ctl_table *
  sd_alloc_ctl_domain_table(struct sched_domain *sd)
  {
-       struct ctl_table *table = sd_alloc_ctl_entry(12);
+       struct ctl_table *table = sd_alloc_ctl_entry(13);
  
         if (table == NULL)
                 return NULL;
@@ -6310,7 +6379,9 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
                 sizeof(int), 0644, proc_dointvec_minmax);
         set_table_entry(&table[10], "flags", &sd->flags,
                 sizeof(int), 0644, proc_dointvec_minmax);
-       /* &table[11] is terminator */
+       set_table_entry(&table[11], "name", sd->name,
+               CORENAME_MAX_SIZE, 0444, proc_dostring);
+       /* &table[12] is terminator */
  
         return table;
  }
@@ -7194,13 +7265,21 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
   * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
   */
  
+#ifdef CONFIG_SCHED_DEBUG
+# define SD_INIT_NAME(sd, type)                sd->name = #type
+#else
+# define SD_INIT_NAME(sd, type)                do { } while (0)
+#endif
+
  #define        SD_INIT(sd, type)       sd_init_##type(sd)
+
  #define SD_INIT_FUNC(type)     \
  static noinline void sd_init_##type(struct sched_domain *sd)   \
  {                                                              \
         memset(sd, 0, sizeof(*sd));                             \
         *sd = SD_##type##_INIT;                                 \
         sd->level = SD_LV_##type;                               \
+       SD_INIT_NAME(sd, type);                                 \
  }
  
  SD_INIT_FUNC(CPU)
@@ -8242,20 +8321,25 @@ void __might_sleep(char *file, int line)
  #ifdef in_atomic
         static unsigned long prev_jiffy;        /* ratelimiting */
  
-       if ((in_atomic() || irqs_disabled()) &&
-           system_state == SYSTEM_RUNNING && !oops_in_progress) {
-               if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-                       return;
-               prev_jiffy = jiffies;
-               printk(KERN_ERR "BUG: sleeping function called from invalid"
-                               " context at %s:%d\n", file, line);
-               printk("in_atomic():%d, irqs_disabled():%d\n",
-                       in_atomic(), irqs_disabled());
-               debug_show_held_locks(current);
-               if (irqs_disabled())
-                       print_irqtrace_events(current);
-               dump_stack();
-       }
+       if ((!in_atomic() && !irqs_disabled()) ||
+                   system_state != SYSTEM_RUNNING || oops_in_progress)
+               return;
+       if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+               return;
+       prev_jiffy = jiffies;
+
+       printk(KERN_ERR
+               "BUG: sleeping function called from invalid context at %s:%d\n",
+                       file, line);
+       printk(KERN_ERR
+               "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+                       in_atomic(), irqs_disabled(),
+                       current->pid, current->comm);
+
+       debug_show_held_locks(current);
+       if (irqs_disabled())
+               print_irqtrace_events(current);
+       dump_stack();
  #endif
  }
  EXPORT_SYMBOL(__might_sleep);
@@ -8753,73 +8837,95 @@ static DEFINE_MUTEX(rt_constraints_mutex);
  static unsigned long to_ratio(u64 period, u64 runtime)
  {
         if (runtime == RUNTIME_INF)
-               return 1ULL << 16;
+               return 1ULL << 20;
  
-       return div64_u64(runtime << 16, period);
+       return div64_u64(runtime << 20, period);
  }
  
-#ifdef CONFIG_CGROUP_SCHED
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+/* Must be called with tasklist_lock held */
+static inline int tg_has_rt_tasks(struct task_group *tg)
  {
-       struct task_group *tgi, *parent = tg->parent;
-       unsigned long total = 0;
+       struct task_struct *g, *p;
  
-       if (!parent) {
-               if (global_rt_period() < period)
-                       return 0;
+       do_each_thread(g, p) {
+               if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
+                       return 1;
+       } while_each_thread(g, p);
  
-               return to_ratio(period, runtime) <
-                       to_ratio(global_rt_period(), global_rt_runtime());
-       }
+       return 0;
+}
  
-       if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
-               return 0;
+struct rt_schedulable_data {
+       struct task_group *tg;
+       u64 rt_period;
+       u64 rt_runtime;
+};
  
-       rcu_read_lock();
-       list_for_each_entry_rcu(tgi, &parent->children, siblings) {
-               if (tgi == tg)
-                       continue;
+static int tg_schedulable(struct task_group *tg, void *data)
+{
+       struct rt_schedulable_data *d = data;
+       struct task_group *child;
+       unsigned long total, sum = 0;
+       u64 period, runtime;
  
-               total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
-                               tgi->rt_bandwidth.rt_runtime);
+       period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+       runtime = tg->rt_bandwidth.rt_runtime;
+
+       if (tg == d->tg) {
+               period = d->rt_period;
+               runtime = d->rt_runtime;
         }
-       rcu_read_unlock();
  
-       return total + to_ratio(period, runtime) <=
-               to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
-                               parent->rt_bandwidth.rt_runtime);
-}
-#elif defined CONFIG_USER_SCHED
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-{
-       struct task_group *tgi;
-       unsigned long total = 0;
-       unsigned long global_ratio =
-               to_ratio(global_rt_period(), global_rt_runtime());
+       /*
+        * Cannot have more runtime than the period.
+        */
+       if (runtime > period && runtime != RUNTIME_INF)
+               return -EINVAL;
  
-       rcu_read_lock();
-       list_for_each_entry_rcu(tgi, &task_groups, list) {
-               if (tgi == tg)
-                       continue;
+       /*
+        * Ensure we don't starve existing RT tasks.
+        */
+       if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
+               return -EBUSY;
+
+       total = to_ratio(period, runtime);
+
+       /*
+        * Nobody can have more than the global setting allows.
+        */
+       if (total > to_ratio(global_rt_period(), global_rt_runtime()))
+               return -EINVAL;
+
+       /*
+        * The sum of our children's runtime should not exceed our own.
+        */
+       list_for_each_entry_rcu(child, &tg->children, siblings) {
+               period = ktime_to_ns(child->rt_bandwidth.rt_period);
+               runtime = child->rt_bandwidth.rt_runtime;
+
+               if (child == d->tg) {
+                       period = d->rt_period;
+                       runtime = d->rt_runtime;
+               }
  
-               total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
-                               tgi->rt_bandwidth.rt_runtime);
+               sum += to_ratio(period, runtime);
         }
-       rcu_read_unlock();
  
-       return total + to_ratio(period, runtime) < global_ratio;
+       if (sum > total)
+               return -EINVAL;
+
+       return 0;
  }
-#endif
  
-/* Must be called with tasklist_lock held */
-static inline int tg_has_rt_tasks(struct task_group *tg)
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
  {
-       struct task_struct *g, *p;
-       do_each_thread(g, p) {
-               if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
-                       return 1;
-       } while_each_thread(g, p);
-       return 0;
+       struct rt_schedulable_data data = {
+               .tg = tg,
+               .rt_period = period,
+               .rt_runtime = runtime,
+       };
+
+       return walk_tg_tree(tg_schedulable, tg_nop, &data);
  }
  
  static int tg_set_bandwidth(struct task_group *tg,
@@ -8829,14 +8935,9 @@ static int tg_set_bandwidth(struct task_group *tg,
  
         mutex_lock(&rt_constraints_mutex);
         read_lock(&tasklist_lock);
-       if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
-               err = -EBUSY;
+       err = __rt_schedulable(tg, rt_period, rt_runtime);
+       if (err)
                 goto unlock;
-       }
-       if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
-               err = -EINVAL;
-               goto unlock;
-       }
  
         spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
         tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@ -8905,19 +9006,25 @@ long sched_group_rt_period(struct task_group *tg)
  
  static int sched_rt_global_constraints(void)
  {
-       struct task_group *tg = &root_task_group;
-       u64 rt_runtime, rt_period;
+       u64 runtime, period;
         int ret = 0;
  
         if (sysctl_sched_rt_period <= 0)
                 return -EINVAL;
  
-       rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
-       rt_runtime = tg->rt_bandwidth.rt_runtime;
+       runtime = global_rt_runtime();
+       period = global_rt_period();
+
+       /*
+        * Sanity check on the sysctl variables.
+        */
+       if (runtime > period && runtime != RUNTIME_INF)
+               return -EINVAL;
  
         mutex_lock(&rt_constraints_mutex);
-       if (!__rt_schedulable(tg, rt_period, rt_runtime))
-               ret = -EINVAL;
+       read_lock(&tasklist_lock);
+       ret = __rt_schedulable(NULL, 0, 0);
+       read_unlock(&tasklist_lock);
         mutex_unlock(&rt_constraints_mutex);
  
         return ret;
@@ -8991,7 +9098,6 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
  
         if (!cgrp->parent) {
                 /* This is early initialization for the top cgroup */
-               init_task_group.css.cgroup = cgrp;
                 return &init_task_group.css;
         }
  
@@ -9000,9 +9106,6 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
         if (IS_ERR(tg))
                 return ERR_PTR(-ENOMEM);
  
-       /* Bind the cgroup to task_group object we just created */
-       tg->css.cgroup = cgrp;
-
         return &tg->css;
  }
  
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index fb8994c6d4bb4bbe90a71f89341baee3cc6e9806..18fd17172eb66bb567ca4bcc47ca6c0cea923462 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -408,64 +408,6 @@ static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
         return __sched_period(nr_running);
  }
  
-/*
- * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
- * that it favours >=0 over <0.
- *
- *   -20         |
- *               |
- *     0 --------+-------
- *             .'
- *    19     .'
- *
- */
-static unsigned long
-calc_delta_asym(unsigned long delta, struct sched_entity *se)
-{
-       struct load_weight lw = {
-               .weight = NICE_0_LOAD,
-               .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
-       };
-
-       for_each_sched_entity(se) {
-               struct load_weight *se_lw = &se->load;
-               unsigned long rw = cfs_rq_of(se)->load.weight;
-
-#ifdef CONFIG_FAIR_SCHED_GROUP
-               struct cfs_rq *cfs_rq = se->my_q;
-               struct task_group *tg = NULL
-
-               if (cfs_rq)
-                       tg = cfs_rq->tg;
-
-               if (tg && tg->shares < NICE_0_LOAD) {
-                       /*
-                        * scale shares to what it would have been had
-                        * tg->weight been NICE_0_LOAD:
-                        *
-                        *   weight = 1024 * shares / tg->weight
-                        */
-                       lw.weight *= se->load.weight;
-                       lw.weight /= tg->shares;
-
-                       lw.inv_weight = 0;
-
-                       se_lw = &lw;
-                       rw += lw.weight - se->load.weight;
-               } else
-#endif
-
-               if (se->load.weight < NICE_0_LOAD) {
-                       se_lw = &lw;
-                       rw += NICE_0_LOAD - se->load.weight;
-               }
-
-               delta = calc_delta_mine(delta, rw, se_lw);
-       }
-
-       return delta;
-}
-
  /*
   * Update the current task's runtime statistics. Skip current tasks that
   * are not in our scheduling class.
@@ -586,11 +528,12 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
         update_load_add(&cfs_rq->load, se->load.weight);
         if (!parent_entity(se))
                 inc_cpu_load(rq_of(cfs_rq), se->load.weight);
-       if (entity_is_task(se))
+       if (entity_is_task(se)) {
                 add_cfs_task_weight(cfs_rq, se->load.weight);
+               list_add(&se->group_node, &cfs_rq->tasks);
+       }
         cfs_rq->nr_running++;
         se->on_rq = 1;
-       list_add(&se->group_node, &cfs_rq->tasks);
  }
  
  static void
@@ -599,11 +542,12 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
         update_load_sub(&cfs_rq->load, se->load.weight);
         if (!parent_entity(se))
                 dec_cpu_load(rq_of(cfs_rq), se->load.weight);
-       if (entity_is_task(se))
+       if (entity_is_task(se)) {
                 add_cfs_task_weight(cfs_rq, -se->load.weight);
+               list_del_init(&se->group_node);
+       }
         cfs_rq->nr_running--;
         se->on_rq = 0;
-       list_del_init(&se->group_node);
  }
  
  static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -1085,7 +1029,6 @@ static long effective_load(struct task_group *tg, int cpu,
                 long wl, long wg)
  {
         struct sched_entity *se = tg->se[cpu];
-       long more_w;
  
         if (!tg->parent)
                 return wl;
@@ -1097,18 +1040,17 @@ static long effective_load(struct task_group *tg, int cpu,
         if (!wl && sched_feat(ASYM_EFF_LOAD))
                 return wl;
  
-       /*
-        * Instead of using this increment, also add the difference
-        * between when the shares were last updated and now.
-        */
-       more_w = se->my_q->load.weight - se->my_q->rq_weight;
-       wl += more_w;
-       wg += more_w;
-
         for_each_sched_entity(se) {
-#define D(n) (likely(n) ? (n) : 1)
-
                 long S, rw, s, a, b;
+               long more_w;
+
+               /*
+                * Instead of using this increment, also add the difference
+                * between when the shares were last updated and now.
+                */
+               more_w = se->my_q->load.weight - se->my_q->rq_weight;
+               wl += more_w;
+               wg += more_w;
  
                 S = se->my_q->tg->shares;
                 s = se->my_q->shares;
@@ -1117,7 +1059,11 @@ static long effective_load(struct task_group *tg, int cpu,
                 a = S*(rw + wl);
                 b = S*rw + s*wg;
  
-               wl = s*(a-b)/D(b);
+               wl = s*(a-b);
+
+               if (likely(b))
+                       wl /= b;
+
                 /*
                  * Assume the group is already running and will
                  * thus already be accounted for in the weight.
@@ -1126,7 +1072,6 @@ static long effective_load(struct task_group *tg, int cpu,
                  * alter the group weight.
                  */
                 wg = 0;
-#undef D
         }
  
         return wl;
@@ -1143,7 +1088,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
  #endif
  
  static int
-wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
+wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
             struct task_struct *p, int prev_cpu, int this_cpu, int sync,
             int idx, unsigned long load, unsigned long this_load,
             unsigned int imbalance)
@@ -1158,6 +1103,11 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
         if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
                 return 0;
  
+       if (!sync && sched_feat(SYNC_WAKEUPS) &&
+           curr->se.avg_overlap < sysctl_sched_migration_cost &&
+           p->se.avg_overlap < sysctl_sched_migration_cost)
+               sync = 1;
+
         /*
          * If sync wakeup then subtract the (maximum possible)
          * effect of the currently running task from the load
@@ -1182,17 +1132,14 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
          * a reasonable amount of time then attract this newly
          * woken task:
          */
-       if (sync && balanced) {
-               if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
-                   p->se.avg_overlap < sysctl_sched_migration_cost)
-                       return 1;
-       }
+       if (sync && balanced)
+               return 1;
  
         schedstat_inc(p, se.nr_wakeups_affine_attempts);
         tl_per_task = cpu_avg_load_per_task(this_cpu);
  
-       if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) ||
-                       balanced) {
+       if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <=
+                       tl_per_task)) {
                 /*
                  * This domain has SD_WAKE_AFFINE and
                  * p is cache cold in this domain, and
@@ -1211,16 +1158,17 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
         struct sched_domain *sd, *this_sd = NULL;
         int prev_cpu, this_cpu, new_cpu;
         unsigned long load, this_load;
-       struct rq *rq, *this_rq;
+       struct rq *this_rq;
         unsigned int imbalance;
         int idx;
  
         prev_cpu        = task_cpu(p);
-       rq              = task_rq(p);
         this_cpu        = smp_processor_id();
         this_rq         = cpu_rq(this_cpu);
         new_cpu         = prev_cpu;
  
+       if (prev_cpu == this_cpu)
+               goto out;
         /*
          * 'this_sd' is the first domain that both
          * this_cpu and prev_cpu are present in:
@@ -1248,13 +1196,10 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
         load = source_load(prev_cpu, idx);
         this_load = target_load(this_cpu, idx);
  
-       if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
+       if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
                                      load, this_load, imbalance))
                 return this_cpu;
  
-       if (prev_cpu == this_cpu)
-               goto out;
-
         /*
          * Start passive balancing when half the imbalance_pct
          * limit is reached.
@@ -1281,62 +1226,20 @@ static unsigned long wakeup_gran(struct sched_entity *se)
          * + nice tasks.
          */
         if (sched_feat(ASYM_GRAN))
-               gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
-       else
-               gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
+               gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
  
         return gran;
  }
  
-/*
- * Should 'se' preempt 'curr'.
- *
- *             |s1
- *        |s2
- *   |s3
- *         g
- *      |<--->|c
- *
- *  w(c, s1) = -1
- *  w(c, s2) =  0
- *  w(c, s3) =  1
- *
- */
-static int
-wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
-{
-       s64 gran, vdiff = curr->vruntime - se->vruntime;
-
-       if (vdiff < 0)
-               return -1;
-
-       gran = wakeup_gran(curr);
-       if (vdiff > gran)
-               return 1;
-
-       return 0;
-}
-
-/* return depth at which a sched entity is present in the hierarchy */
-static inline int depth_se(struct sched_entity *se)
-{
-       int depth = 0;
-
-       for_each_sched_entity(se)
-               depth++;
-
-       return depth;
-}
-
  /*
   * Preempt the current task with a newly woken task if needed:
   */
-static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
+static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
  {
         struct task_struct *curr = rq->curr;
         struct cfs_rq *cfs_rq = task_cfs_rq(curr);
         struct sched_entity *se = &curr->se, *pse = &p->se;
-       int se_depth, pse_depth;
+       s64 delta_exec;
  
         if (unlikely(rt_prio(p->prio))) {
                 update_rq_clock(rq);
@@ -1350,6 +1253,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
  
         cfs_rq_of(pse)->next = pse;
  
+       /*
+        * We can come here with TIF_NEED_RESCHED already set from new task
+        * wake up path.
+        */
+       if (test_tsk_need_resched(curr))
+               return;
+
         /*
          * Batch tasks do not preempt (their preemption is driven by
          * the tick):
@@ -1360,33 +1270,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
         if (!sched_feat(WAKEUP_PREEMPT))
                 return;
  
-       /*
-        * preemption test can be made between sibling entities who are in the
-        * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
-        * both tasks until we find their ancestors who are siblings of common
-        * parent.
-        */
-
-       /* First walk up until both entities are at same depth */
-       se_depth = depth_se(se);
-       pse_depth = depth_se(pse);
-
-       while (se_depth > pse_depth) {
-               se_depth--;
-               se = parent_entity(se);
-       }
-
-       while (pse_depth > se_depth) {
-               pse_depth--;
-               pse = parent_entity(pse);
-       }
-
-       while (!is_same_group(se, pse)) {
-               se = parent_entity(se);
-               pse = parent_entity(pse);
+       if (sched_feat(WAKEUP_OVERLAP) && (sync ||
+                       (se->avg_overlap < sysctl_sched_migration_cost &&
+                        pse->avg_overlap < sysctl_sched_migration_cost))) {
+               resched_task(curr);
+               return;
         }
  
-       if (wakeup_preempt_entity(se, pse) == 1)
+       delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+       if (delta_exec > wakeup_gran(pse))
                 resched_task(curr);
  }
  
@@ -1445,19 +1337,9 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
         if (next == &cfs_rq->tasks)
                 return NULL;
  
-       /* Skip over entities that are not tasks */
-       do {
-               se = list_entry(next, struct sched_entity, group_node);
-               next = next->next;
-       } while (next != &cfs_rq->tasks && !entity_is_task(se));
-
-       if (next == &cfs_rq->tasks)
-               return NULL;
-
-       cfs_rq->balance_iterator = next;
-
-       if (entity_is_task(se))
-               p = task_of(se);
+       se = list_entry(next, struct sched_entity, group_node);
+       p = task_of(se);
+       cfs_rq->balance_iterator = next->next;
  
         return p;
  }
@@ -1507,7 +1389,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
         rcu_read_lock();
         update_h_load(busiest_cpu);
  
-       list_for_each_entry(tg, &task_groups, list) {
+       list_for_each_entry_rcu(tg, &task_groups, list) {
                 struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
                 unsigned long busiest_h_load = busiest_cfs_rq->h_load;
                 unsigned long busiest_weight = busiest_cfs_rq->load.weight;
@@ -1620,10 +1502,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
                  * 'current' within the tree based on its new key value.
                  */
                 swap(curr->vruntime, se->vruntime);
+               resched_task(rq->curr);
         }
  
         enqueue_task_fair(rq, p, 0);
-       resched_task(rq->curr);
  }
  
  /*
@@ -1642,7 +1524,7 @@ static void prio_changed_fair(struct rq *rq, struct task_struct *p,
                 if (p->prio > oldprio)
                         resched_task(rq->curr);
         } else
-               check_preempt_curr(rq, p);
+               check_preempt_curr(rq, p, 0);
  }
  
  /*
@@ -1659,7 +1541,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p,
         if (running)
                 resched_task(rq->curr);
         else
-               check_preempt_curr(rq, p);
+               check_preempt_curr(rq, p, 0);
  }
  
  /* Account for a task changing its policy or group.
diff --git a/kernel/sched_features.h b/kernel/sched_features.h

index 9353ca78154e880c786376108d2ef1be4335cde4..7c9e8f4a049f6c6ec6fb935de7d766b2135e8782 100644 (file)
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -11,3 +11,4 @@ SCHED_FEAT(ASYM_GRAN, 1)
  SCHED_FEAT(LB_BIAS, 1)
  SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
  SCHED_FEAT(ASYM_EFF_LOAD, 1)
+SCHED_FEAT(WAKEUP_OVERLAP, 0)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c

index 3a4f92dbbe6609b786da0add62f2306e991b791f..dec4ccabe2f5c8af51566dcc323ce52858ff8764 100644 (file)
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync)
  /*
   * Idle tasks are unconditionally rescheduled:
   */
-static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
+static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
  {
         resched_task(rq->idle);
  }
@@ -76,7 +76,7 @@ static void switched_to_idle(struct rq *rq, struct task_struct *p,
         if (running)
                 resched_task(rq->curr);
         else
-               check_preempt_curr(rq, p);
+               check_preempt_curr(rq, p, 0);
  }
  
  static void prio_changed_idle(struct rq *rq, struct task_struct *p,
@@ -93,7 +93,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
                 if (p->prio > oldprio)
                         resched_task(rq->curr);
         } else
-               check_preempt_curr(rq, p);
+               check_preempt_curr(rq, p, 0);
  }
  
  /*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index 1113157b20581b07cbcdf325d4d3428cdd7cd288..cdf5740ab03e8133c0a2b7713d6c77d2be1f07bf 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
  
  static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
  {
+       struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
         struct sched_rt_entity *rt_se = rt_rq->rt_se;
  
-       if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {
-               struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
-
-               enqueue_rt_entity(rt_se);
+       if (rt_rq->rt_nr_running) {
+               if (rt_se && !on_rt_rq(rt_se))
+                       enqueue_rt_entity(rt_se);
                 if (rt_rq->highest_prio < curr->prio)
                         resched_task(curr);
         }
@@ -231,6 +231,9 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
  #endif /* CONFIG_RT_GROUP_SCHED */
  
  #ifdef CONFIG_SMP
+/*
+ * We ran out of runtime, see if we can borrow some from our neighbours.
+ */
  static int do_balance_runtime(struct rt_rq *rt_rq)
  {
         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -250,9 +253,18 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
                         continue;
  
                 spin_lock(&iter->rt_runtime_lock);
+               /*
+                * Either all rqs have inf runtime and there's nothing to steal
+                * or __disable_runtime() below sets a specific rq to inf to
+                * indicate its been disabled and disalow stealing.
+                */
                 if (iter->rt_runtime == RUNTIME_INF)
                         goto next;
  
+               /*
+                * From runqueues with spare time, take 1/n part of their
+                * spare time, but no more than our period.
+                */
                 diff = iter->rt_runtime - iter->rt_time;
                 if (diff > 0) {
                         diff = div_u64((u64)diff, weight);
@@ -274,6 +286,9 @@ next:
         return more;
  }
  
+/*
+ * Ensure this RQ takes back all the runtime it lend to its neighbours.
+ */
  static void __disable_runtime(struct rq *rq)
  {
         struct root_domain *rd = rq->rd;
@@ -289,17 +304,33 @@ static void __disable_runtime(struct rq *rq)
  
                 spin_lock(&rt_b->rt_runtime_lock);
                 spin_lock(&rt_rq->rt_runtime_lock);
+               /*
+                * Either we're all inf and nobody needs to borrow, or we're
+                * already disabled and thus have nothing to do, or we have
+                * exactly the right amount of runtime to take out.
+                */
                 if (rt_rq->rt_runtime == RUNTIME_INF ||
                                 rt_rq->rt_runtime == rt_b->rt_runtime)
                         goto balanced;
                 spin_unlock(&rt_rq->rt_runtime_lock);
  
+               /*
+                * Calculate the difference between what we started out with
+                * and what we current have, that's the amount of runtime
+                * we lend and now have to reclaim.
+                */
                 want = rt_b->rt_runtime - rt_rq->rt_runtime;
  
+               /*
+                * Greedy reclaim, take back as much as we can.
+                */
                 for_each_cpu_mask(i, rd->span) {
                         struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
                         s64 diff;
  
+                       /*
+                        * Can't reclaim from ourselves or disabled runqueues.
+                        */
                         if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
                                 continue;
  
@@ -319,8 +350,16 @@ static void __disable_runtime(struct rq *rq)
                 }
  
                 spin_lock(&rt_rq->rt_runtime_lock);
+               /*
+                * We cannot be left wanting - that would mean some runtime
+                * leaked out of the system.
+                */
                 BUG_ON(want);
  balanced:
+               /*
+                * Disable all the borrow logic by pretending we have inf
+                * runtime - in which case borrowing doesn't make sense.
+                */
                 rt_rq->rt_runtime = RUNTIME_INF;
                 spin_unlock(&rt_rq->rt_runtime_lock);
                 spin_unlock(&rt_b->rt_runtime_lock);
@@ -343,6 +382,9 @@ static void __enable_runtime(struct rq *rq)
         if (unlikely(!scheduler_running))
                 return;
  
+       /*
+        * Reset each runqueue's bandwidth settings
+        */
         for_each_leaf_rt_rq(rt_rq, rq) {
                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
  
@@ -389,7 +431,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
         int i, idle = 1;
         cpumask_t span;
  
-       if (rt_b->rt_runtime == RUNTIME_INF)
+       if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
                 return 1;
  
         span = sched_rt_period_mask();
@@ -487,6 +529,9 @@ static void update_curr_rt(struct rq *rq)
         curr->se.exec_start = rq->clock;
         cpuacct_charge(curr, delta_exec);
  
+       if (!rt_bandwidth_enabled())
+               return;
+
         for_each_sched_rt_entity(rt_se) {
                 rt_rq = rt_rq_of_se(rt_se);
  
@@ -784,7 +829,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
  /*
   * Preempt the current task with a newly woken task if needed:
   */
-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
  {
         if (p->prio < rq->curr->prio) {
                 resched_task(rq->curr);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c

index cb02324bdb88b1c477f055af028bd4eace476d4d..a4d2193981675aa7b5e2048bf229d5dbdbeccdcd 100644 (file)
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -20,6 +20,7 @@
  #include <linux/profile.h>
  #include <linux/sched.h>
  #include <linux/tick.h>
+#include <linux/module.h>
  
  #include <asm/irq_regs.h>
  
@@ -190,9 +191,17 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
  {
         struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
  
-       *last_update_time = ktime_to_us(ts->idle_lastupdate);
+       if (!tick_nohz_enabled)
+               return -1;
+
+       if (ts->idle_active)
+               *last_update_time = ktime_to_us(ts->idle_lastupdate);
+       else
+               *last_update_time = ktime_to_us(ktime_get());
+
         return ktime_to_us(ts->idle_sleeptime);
  }
+EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
  
  /**
   * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
diff --git a/kernel/user.c b/kernel/user.c

index 865ecf57a09604cc1307407cc99f90c1cecc5309..39d6159fae430cf60811839f4e2dbd20aadb9e4a 100644 (file)
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -169,7 +169,7 @@ static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
  {
         struct user_struct *up = container_of(kobj, struct user_struct, kobj);
  
-       return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
+       return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
  }
  
  static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
@@ -180,7 +180,7 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
         unsigned long rt_runtime;
         int rc;
  
-       sscanf(buf, "%lu", &rt_runtime);
+       sscanf(buf, "%ld", &rt_runtime);
  
         rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
  
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index 7d7a31d0ddebabd916412e6132fefb1221f4361a..ce697e0b319ea08e8b8127197dd87becd9f0ef99 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
           Say N here if you want the RCU torture tests to start only
           after being manually enabled via /proc.
  
+config RCU_CPU_STALL_DETECTOR
+       bool "Check for stalled CPUs delaying RCU grace periods"
+       depends on CLASSIC_RCU
+       default n
+       help
+         This option causes RCU to printk information on which
+         CPUs are delaying the current grace period, but only when
+         the grace period extends for excessive time periods.
+
+         Say Y if you want RCU to perform such checks.
+
+         Say N if you are unsure.
+
  config KPROBES_SANITY_TEST
         bool "Kprobes sanity tests"
         depends on DEBUG_KERNEL
diff --git a/scripts/Makefile b/scripts/Makefile

index 1c73c5aea66b06bcecff315356abcecc2a79522f..aafdf064feefdf49bc98d66c72199907e30ca921 100644 (file)
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -20,6 +20,7 @@ hostprogs-y += unifdef
  
  subdir-$(CONFIG_MODVERSIONS) += genksyms
  subdir-y                     += mod
+subdir-$(CONFIG_SECURITY_SELINUX) += selinux
  
  # Let clean descend into subdirs
-subdir-        += basic kconfig package
+subdir-        += basic kconfig package selinux
diff --git a/scripts/selinux/Makefile b/scripts/selinux/Makefile

new file mode 100644 (file)

index 0000000..ca4b1ec
--- /dev/null
+++ b/scripts/selinux/Makefile
@@ -0,0 +1,2 @@
+subdir-y := mdp
+subdir-        += mdp
diff --git a/scripts/selinux/README b/scripts/selinux/README

new file mode 100644 (file)

index 0000000..a936315
--- /dev/null
+++ b/scripts/selinux/README
@@ -0,0 +1,2 @@
+Please see Documentation/SELinux.txt for information on
+installing a dummy SELinux policy.
diff --git a/scripts/selinux/install_policy.sh b/scripts/selinux/install_policy.sh

new file mode 100644 (file)

index 0000000..7b9ccf6
--- /dev/null
+++ b/scripts/selinux/install_policy.sh
@@ -0,0 +1,69 @@
+#!/bin/sh
+if [ `id -u` -ne 0 ]; then
+       echo "$0: must be root to install the selinux policy"
+       exit 1
+fi
+SF=`which setfiles`
+if [ $? -eq 1 ]; then
+       if [ -f /sbin/setfiles ]; then
+               SF="/usr/setfiles"
+       else
+               echo "no selinux tools installed: setfiles"
+               exit 1
+       fi
+fi
+
+cd mdp
+
+CP=`which checkpolicy`
+VERS=`$CP -V | awk '{print $1}'`
+
+./mdp policy.conf file_contexts
+$CP -o policy.$VERS policy.conf
+
+mkdir -p /etc/selinux/dummy/policy
+mkdir -p /etc/selinux/dummy/contexts/files
+
+cp file_contexts /etc/selinux/dummy/contexts/files
+cp dbus_contexts /etc/selinux/dummy/contexts
+cp policy.$VERS /etc/selinux/dummy/policy
+FC_FILE=/etc/selinux/dummy/contexts/files/file_contexts
+
+if [ ! -d /etc/selinux ]; then
+       mkdir -p /etc/selinux
+fi
+if [ ! -f /etc/selinux/config ]; then
+       cat > /etc/selinux/config << EOF
+SELINUX=enforcing
+SELINUXTYPE=dummy
+EOF
+else
+       TYPE=`cat /etc/selinux/config | grep "^SELINUXTYPE" | tail -1 | awk -F= '{ print $2 '}`
+       if [ "eq$TYPE" != "eqdummy" ]; then
+               selinuxenabled
+               if [ $? -eq 0 ]; then
+                       echo "SELinux already enabled with a non-dummy policy."
+                       echo "Exiting.  Please install policy by hand if that"
+                       echo "is what you REALLY want."
+                       exit 1
+               fi
+               mv /etc/selinux/config /etc/selinux/config.mdpbak
+               grep -v "^SELINUXTYPE" /etc/selinux/config.mdpbak >> /etc/selinux/config
+               echo "SELINUXTYPE=dummy" >> /etc/selinux/config
+       fi
+fi
+
+cd /etc/selinux/dummy/contexts/files
+$SF file_contexts /
+
+mounts=`cat /proc/$$/mounts | egrep "ext2|ext3|xfs|jfs|ext4|ext4dev|gfs2" | awk '{ print $2 '}`
+$SF file_contexts $mounts
+
+
+dodev=`cat /proc/$$/mounts | grep "/dev "`
+if [ "eq$dodev" != "eq" ]; then
+       mount --move /dev /mnt
+       $SF file_contexts /dev
+       mount --move /mnt /dev
+fi
+
diff --git a/scripts/selinux/mdp/.gitignore b/scripts/selinux/mdp/.gitignore

new file mode 100644 (file)

index 0000000..654546d
--- /dev/null
+++ b/scripts/selinux/mdp/.gitignore
@@ -0,0 +1,2 @@
+# Generated file
+mdp
diff --git a/scripts/selinux/mdp/Makefile b/scripts/selinux/mdp/Makefile

new file mode 100644 (file)

index 0000000..eb365b3
--- /dev/null
+++ b/scripts/selinux/mdp/Makefile
@@ -0,0 +1,5 @@
+hostprogs-y    := mdp
+HOST_EXTRACFLAGS += -Isecurity/selinux/include
+
+always         := $(hostprogs-y)
+clean-files    := $(hostprogs-y) policy.* file_contexts
diff --git a/scripts/selinux/mdp/dbus_contexts b/scripts/selinux/mdp/dbus_contexts

new file mode 100644 (file)

index 0000000..116e684
--- /dev/null
+++ b/scripts/selinux/mdp/dbus_contexts
@@ -0,0 +1,6 @@
+<!DOCTYPE busconfig PUBLIC "-//freedesktop//DTD D-BUS Bus Configuration 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/busconfig.dtd">
+<busconfig>
+  <selinux>
+  </selinux>
+</busconfig>
diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c

new file mode 100644 (file)

index 0000000..ca757d4
--- /dev/null
+++ b/scripts/selinux/mdp/mdp.c
@@ -0,0 +1,242 @@
+/*
+ *
+ * mdp - make dummy policy
+ *
+ * When pointed at a kernel tree, builds a dummy policy for that kernel
+ * with exactly one type with full rights to itself.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Authors: Serge E. Hallyn <serue@us.ibm.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "flask.h"
+
+void usage(char *name)
+{
+       printf("usage: %s [-m] policy_file context_file\n", name);
+       exit(1);
+}
+
+void find_common_name(char *cname, char *dest, int len)
+{
+       char *start, *end;
+
+       start = strchr(cname, '_')+1;
+       end = strchr(start, '_');
+       if (!start || !end || start-cname > len || end-start > len) {
+               printf("Error with commons defines\n");
+               exit(1);
+       }
+       strncpy(dest, start, end-start);
+       dest[end-start] = '\0';
+}
+
+#define S_(x) x,
+static char *classlist[] = {
+#include "class_to_string.h"
+       NULL
+};
+#undef S_
+
+#include "initial_sid_to_string.h"
+
+#define TB_(x) char *x[] = {
+#define TE_(x) NULL };
+#define S_(x) x,
+#include "common_perm_to_string.h"
+#undef TB_
+#undef TE_
+#undef S_
+
+struct common {
+       char *cname;
+       char **perms;
+};
+struct common common[] = {
+#define TB_(x) { #x, x },
+#define S_(x)
+#define TE_(x)
+#include "common_perm_to_string.h"
+#undef TB_
+#undef TE_
+#undef S_
+};
+
+#define S_(x, y, z) {x, #y},
+struct av_inherit {
+       int class;
+       char *common;
+};
+struct av_inherit av_inherit[] = {
+#include "av_inherit.h"
+};
+#undef S_
+
+#include "av_permissions.h"
+#define S_(x, y, z) {x, y, z},
+struct av_perms {
+       int class;
+       int perm_i;
+       char *perm_s;
+};
+struct av_perms av_perms[] = {
+#include "av_perm_to_string.h"
+};
+#undef S_
+
+int main(int argc, char *argv[])
+{
+       int i, j, mls = 0;
+       char **arg, *polout, *ctxout;
+       int classlist_len, initial_sid_to_string_len;
+       FILE *fout;
+
+       if (argc < 3)
+               usage(argv[0]);
+       arg = argv+1;
+       if (argc==4 && strcmp(argv[1], "-m") == 0) {
+               mls = 1;
+               arg++;
+       }
+       polout = *arg++;
+       ctxout = *arg;
+
+       fout = fopen(polout, "w");
+       if (!fout) {
+               printf("Could not open %s for writing\n", polout);
+               usage(argv[0]);
+       }
+
+       classlist_len = sizeof(classlist) / sizeof(char *);
+       /* print out the classes */
+       for (i=1; i < classlist_len; i++) {
+               if(classlist[i])
+                       fprintf(fout, "class %s\n", classlist[i]);
+               else
+                       fprintf(fout, "class user%d\n", i);
+       }
+       fprintf(fout, "\n");
+
+       initial_sid_to_string_len = sizeof(initial_sid_to_string) / sizeof (char *);
+       /* print out the sids */
+       for (i=1; i < initial_sid_to_string_len; i++)
+               fprintf(fout, "sid %s\n", initial_sid_to_string[i]);
+       fprintf(fout, "\n");
+
+       /* print out the commons */
+       for (i=0; i< sizeof(common)/sizeof(struct common); i++) {
+               char cname[101];
+               find_common_name(common[i].cname, cname, 100);
+               cname[100] = '\0';
+               fprintf(fout, "common %s\n{\n", cname);
+               for (j=0; common[i].perms[j]; j++)
+                       fprintf(fout, "\t%s\n", common[i].perms[j]);
+               fprintf(fout, "}\n\n");
+       }
+       fprintf(fout, "\n");
+
+       /* print out the class permissions */
+       for (i=1; i < classlist_len; i++) {
+               if (classlist[i]) {
+                       int firstperm = -1, numperms = 0;
+
+                       fprintf(fout, "class %s\n", classlist[i]);
+                       /* does it inherit from a common? */
+                       for (j=0; j < sizeof(av_inherit)/sizeof(struct av_inherit); j++)
+                               if (av_inherit[j].class == i)
+                                       fprintf(fout, "inherits %s\n", av_inherit[j].common);
+
+                       for (j=0; j < sizeof(av_perms)/sizeof(struct av_perms); j++) {
+                               if (av_perms[j].class == i) {
+                                       if (firstperm == -1)
+                                               firstperm = j;
+                                       numperms++;
+                               }
+                       }
+                       if (!numperms) {
+                               fprintf(fout, "\n");
+                               continue;
+                       }
+
+                       fprintf(fout, "{\n");
+                       /* print out the av_perms */
+                       for (j=0; j < numperms; j++) {
+                               fprintf(fout, "\t%s\n", av_perms[firstperm+j].perm_s);
+                       }
+                       fprintf(fout, "}\n\n");
+               }
+       }
+       fprintf(fout, "\n");
+
+       /* NOW PRINT OUT MLS STUFF */
+       if (mls) {
+               printf("MLS not yet implemented\n");
+               exit(1);
+       }
+
+       /* types, roles, and allows */
+       fprintf(fout, "type base_t;\n");
+       fprintf(fout, "role base_r types { base_t };\n");
+       for (i=1; i < classlist_len; i++) {
+               if (classlist[i])
+                       fprintf(fout, "allow base_t base_t:%s *;\n", classlist[i]);
+               else
+                       fprintf(fout, "allow base_t base_t:user%d *;\n", i);
+       }
+       fprintf(fout, "user user_u roles { base_r };\n");
+       fprintf(fout, "\n");
+
+       /* default sids */
+       for (i=1; i < initial_sid_to_string_len; i++)
+               fprintf(fout, "sid %s user_u:base_r:base_t\n", initial_sid_to_string[i]);
+       fprintf(fout, "\n");
+
+
+       fprintf(fout, "fs_use_xattr ext2 user_u:base_r:base_t;\n");
+       fprintf(fout, "fs_use_xattr ext3 user_u:base_r:base_t;\n");
+       fprintf(fout, "fs_use_xattr jfs user_u:base_r:base_t;\n");
+       fprintf(fout, "fs_use_xattr xfs user_u:base_r:base_t;\n");
+       fprintf(fout, "fs_use_xattr reiserfs user_u:base_r:base_t;\n");
+
+       fprintf(fout, "fs_use_task pipefs user_u:base_r:base_t;\n");
+       fprintf(fout, "fs_use_task sockfs user_u:base_r:base_t;\n");
+
+       fprintf(fout, "fs_use_trans devpts user_u:base_r:base_t;\n");
+       fprintf(fout, "fs_use_trans tmpfs user_u:base_r:base_t;\n");
+       fprintf(fout, "fs_use_trans shm user_u:base_r:base_t;\n");
+
+       fprintf(fout, "genfscon proc / user_u:base_r:base_t\n");
+
+       fclose(fout);
+
+       fout = fopen(ctxout, "w");
+       if (!fout) {
+               printf("Wrote policy, but cannot open %s for writing\n", ctxout);
+               usage(argv[0]);
+       }
+       fprintf(fout, "/ user_u:base_r:base_t\n");
+       fprintf(fout, "/.* user_u:base_r:base_t\n");
+       fclose(fout);
+
+       return 0;
+}
diff --git a/security/Kconfig b/security/Kconfig

index 559293922a479eabf188b70e963d5264f9032799..d9f47ce7e2076877064e50eb2e34fd0f9c9fbd2a 100644 (file)
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -51,6 +51,14 @@ config SECURITY
  
           If you are unsure how to answer this question, answer N.
  
+config SECURITYFS
+       bool "Enable the securityfs filesystem"
+       help
+         This will build the securityfs filesystem.  It is currently used by
+         the TPM bios character driver.  It is not used by SELinux or SMACK.
+
+         If you are unsure how to answer this question, answer N.
+
  config SECURITY_NETWORK
         bool "Socket and Networking Security Hooks"
         depends on SECURITY
diff --git a/security/Makefile b/security/Makefile

index f65426099aa625be9aac60a1a009ea860956dbb7..c05c127fff9a795d9b3c21d7c1a510c6b7a5a4cc 100644 (file)
--- a/security/Makefile
+++ b/security/Makefile
@@ -10,7 +10,8 @@ subdir-$(CONFIG_SECURITY_SMACK)               += smack
  obj-y          += commoncap.o
  
  # Object file lists
-obj-$(CONFIG_SECURITY)                 += security.o capability.o inode.o
+obj-$(CONFIG_SECURITY)                 += security.o capability.o
+obj-$(CONFIG_SECURITYFS)               += inode.o
  # Must precede capability.o in order to stack properly.
  obj-$(CONFIG_SECURITY_SELINUX)         += selinux/built-in.o
  obj-$(CONFIG_SECURITY_SMACK)           += smack/built-in.o
diff --git a/security/commoncap.c b/security/commoncap.c

index e4c4b3fc0c04e49e553610a8bd4d433e55e9212a..399bfdb9e2da99c4ef81fdd8b0391b1f5571c371 100644 (file)
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -541,7 +541,7 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid,
   * yet with increased caps.
   * So we check for increased caps on the target process.
   */
-static inline int cap_safe_nice(struct task_struct *p)
+static int cap_safe_nice(struct task_struct *p)
  {
         if (!cap_issubset(p->cap_permitted, current->cap_permitted) &&
             !capable(CAP_SYS_NICE))
diff --git a/security/inode.c b/security/inode.c

index acc6cf0d79001fa06d53c5b592fb4c90f5ea88a2..ca4958ebad8d30c7c4f8ce4bdf1b98309fe2e13e 100644 (file)
--- a/security/inode.c
+++ b/security/inode.c
@@ -190,7 +190,7 @@ static int create_by_name(const char *name, mode_t mode,
   * @name: a pointer to a string containing the name of the file to create.
   * @mode: the permission that the file should have
   * @parent: a pointer to the parent dentry for this file.  This should be a
- *          directory dentry if set.  If this paramater is NULL, then the
+ *          directory dentry if set.  If this parameter is %NULL, then the
   *          file will be created in the root of the securityfs filesystem.
   * @data: a pointer to something that the caller will want to get to later
   *        on.  The inode.i_private pointer will point to this value on
@@ -199,18 +199,18 @@ static int create_by_name(const char *name, mode_t mode,
   *        this file.
   *
   * This is the basic "create a file" function for securityfs.  It allows for a
- * wide range of flexibility in createing a file, or a directory (if you
+ * wide range of flexibility in creating a file, or a directory (if you
   * want to create a directory, the securityfs_create_dir() function is
- * recommended to be used instead.)
+ * recommended to be used instead).
   *
- * This function will return a pointer to a dentry if it succeeds.  This
+ * This function returns a pointer to a dentry if it succeeds.  This
   * pointer must be passed to the securityfs_remove() function when the file is
   * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.)  If an error occurs, NULL will be returned.
+ * you are responsible here).  If an error occurs, %NULL is returned.
   *
- * If securityfs is not enabled in the kernel, the value -ENODEV will be
+ * If securityfs is not enabled in the kernel, the value %-ENODEV is
   * returned.  It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
   * code.
   */
  struct dentry *securityfs_create_file(const char *name, mode_t mode,
@@ -252,19 +252,19 @@ EXPORT_SYMBOL_GPL(securityfs_create_file);
   * @name: a pointer to a string containing the name of the directory to
   *        create.
   * @parent: a pointer to the parent dentry for this file.  This should be a
- *          directory dentry if set.  If this paramater is NULL, then the
+ *          directory dentry if set.  If this parameter is %NULL, then the
   *          directory will be created in the root of the securityfs filesystem.
   *
- * This function creates a directory in securityfs with the given name.
+ * This function creates a directory in securityfs with the given @name.
   *
- * This function will return a pointer to a dentry if it succeeds.  This
+ * This function returns a pointer to a dentry if it succeeds.  This
   * pointer must be passed to the securityfs_remove() function when the file is
   * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.)  If an error occurs, NULL will be returned.
+ * you are responsible here).  If an error occurs, %NULL will be returned.
   *
- * If securityfs is not enabled in the kernel, the value -ENODEV will be
+ * If securityfs is not enabled in the kernel, the value %-ENODEV is
   * returned.  It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
   * code.
   */
  struct dentry *securityfs_create_dir(const char *name, struct dentry *parent)
@@ -278,16 +278,15 @@ EXPORT_SYMBOL_GPL(securityfs_create_dir);
  /**
   * securityfs_remove - removes a file or directory from the securityfs filesystem
   *
- * @dentry: a pointer to a the dentry of the file or directory to be
- *          removed.
+ * @dentry: a pointer to a the dentry of the file or directory to be removed.
   *
   * This function removes a file or directory in securityfs that was previously
   * created with a call to another securityfs function (like
   * securityfs_create_file() or variants thereof.)
   *
   * This function is required to be called in order for the file to be
- * removed, no automatic cleanup of files will happen when a module is
- * removed, you are responsible here.
+ * removed. No automatic cleanup of files will happen when a module is
+ * removed; you are responsible here.
   */
  void securityfs_remove(struct dentry *dentry)
  {
diff --git a/security/security.c b/security/security.c

index 3a4b4f55b33f373d5a85145d61e6de95cce4bf3c..255b08559b2b62e057d5fda7a8b4396b2bdbb065 100644 (file)
--- a/security/security.c
+++ b/security/security.c
@@ -82,8 +82,8 @@ __setup("security=", choose_lsm);
   *
   * Return true if:
   *     -The passed LSM is the one chosen by user at boot time,
- *     -or user didsn't specify a specific LSM and we're the first to ask
- *      for registeration permissoin,
+ *     -or user didn't specify a specific LSM and we're the first to ask
+ *      for registration permission,
   *     -or the passed LSM is currently loaded.
   * Otherwise, return false.
   */
@@ -101,13 +101,13 @@ int __init security_module_enable(struct security_operations *ops)
   * register_security - registers a security framework with the kernel
   * @ops: a pointer to the struct security_options that is to be registered
   *
- * This function is to allow a security module to register itself with the
+ * This function allows a security module to register itself with the
   * kernel security subsystem.  Some rudimentary checking is done on the @ops
   * value passed to this function. You'll need to check first if your LSM
   * is allowed to register its @ops by calling security_module_enable(@ops).
   *
   * If there is already a security module registered with the kernel,
- * an error will be returned.  Otherwise 0 is returned on success.
+ * an error will be returned.  Otherwise %0 is returned on success.
   */
  int register_security(struct security_operations *ops)
  {
diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig

index a436d1cfa88b8ef68fec2de2addf3af4cc2e54e3..26301dd651d3a4722efde37d16855f3c622b71bc 100644 (file)
--- a/security/selinux/Kconfig
+++ b/security/selinux/Kconfig
@@ -6,9 +6,6 @@ config SECURITY_SELINUX
         help
           This selects NSA Security-Enhanced Linux (SELinux).
           You will also need a policy configuration and a labeled filesystem.
-         You can obtain the policy compiler (checkpolicy), the utility for
-         labeling filesystems (setfiles), and an example policy configuration
-         from <http://www.nsa.gov/selinux/>.
           If you are unsure how to answer this question, answer N.
  
  config SECURITY_SELINUX_BOOTPARAM
diff --git a/security/selinux/avc.c b/security/selinux/avc.c

index 114b4b4c97b23b9acdf25e92d1b250f78167457d..cb30c7e350b356c00a8d77dc5532ff20b6e5f62d 100644 (file)
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -136,7 +136,7 @@ static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass)
   * @tclass: target security class
   * @av: access vector
   */
-static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
+void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
  {
         const char **common_pts = NULL;
         u32 common_base = 0;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c

index 03fc6a81ae32bd783ddd96eca85f118a2ba79bd8..4a7374c12d9ca5a2b9c76c1101ecb42ee44b48af 100644 (file)
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -957,7 +957,8 @@ out_err:
         return rc;
  }
  
-void selinux_write_opts(struct seq_file *m, struct security_mnt_opts *opts)
+static void selinux_write_opts(struct seq_file *m,
+                              struct security_mnt_opts *opts)
  {
         int i;
         char *prefix;
@@ -1290,7 +1291,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
                 /* Default to the fs superblock SID. */
                 isec->sid = sbsec->sid;
  
-               if (sbsec->proc) {
+               if (sbsec->proc && !S_ISLNK(inode->i_mode)) {
                         struct proc_inode *proci = PROC_I(inode);
                         if (proci->pde) {
                                 isec->sclass = inode_mode_to_security_class(inode->i_mode);
@@ -3548,38 +3549,44 @@ out:
  #endif /* IPV6 */
  
  static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad,
-                            char **addrp, int src, u8 *proto)
+                            char **_addrp, int src, u8 *proto)
  {
-       int ret = 0;
+       char *addrp;
+       int ret;
  
         switch (ad->u.net.family) {
         case PF_INET:
                 ret = selinux_parse_skb_ipv4(skb, ad, proto);
-               if (ret || !addrp)
-                       break;
-               *addrp = (char *)(src ? &ad->u.net.v4info.saddr :
-                                       &ad->u.net.v4info.daddr);
-               break;
+               if (ret)
+                       goto parse_error;
+               addrp = (char *)(src ? &ad->u.net.v4info.saddr :
+                                      &ad->u.net.v4info.daddr);
+               goto okay;
  
  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
         case PF_INET6:
                 ret = selinux_parse_skb_ipv6(skb, ad, proto);
-               if (ret || !addrp)
-                       break;
-               *addrp = (char *)(src ? &ad->u.net.v6info.saddr :
-                                       &ad->u.net.v6info.daddr);
-               break;
+               if (ret)
+                       goto parse_error;
+               addrp = (char *)(src ? &ad->u.net.v6info.saddr :
+                                      &ad->u.net.v6info.daddr);
+               goto okay;
  #endif /* IPV6 */
         default:
-               break;
+               addrp = NULL;
+               goto okay;
         }
  
-       if (unlikely(ret))
-               printk(KERN_WARNING
-                      "SELinux: failure in selinux_parse_skb(),"
-                      " unable to parse packet\n");
-
+parse_error:
+       printk(KERN_WARNING
+              "SELinux: failure in selinux_parse_skb(),"
+              " unable to parse packet\n");
         return ret;
+
+okay:
+       if (_addrp)
+               *_addrp = addrp;
+       return 0;
  }
  
  /**
@@ -5219,8 +5226,12 @@ static int selinux_setprocattr(struct task_struct *p,
  
                 if (sid == 0)
                         return -EINVAL;
-
-               /* Only allow single threaded processes to change context */
+               /*
+                * SELinux allows to change context in the following case only.
+                *  - Single threaded processes.
+                *  - Multi threaded processes intend to change its context into
+                *    more restricted domain (defined by TYPEBOUNDS statement).
+                */
                 if (atomic_read(&p->mm->mm_users) != 1) {
                         struct task_struct *g, *t;
                         struct mm_struct *mm = p->mm;
@@ -5228,11 +5239,16 @@ static int selinux_setprocattr(struct task_struct *p,
                         do_each_thread(g, t) {
                                 if (t->mm == mm && t != p) {
                                         read_unlock(&tasklist_lock);
-                                       return -EPERM;
+                                       error = security_bounded_transition(tsec->sid, sid);
+                                       if (!error)
+                                               goto boundary_ok;
+
+                                       return error;
                                 }
                         } while_each_thread(g, t);
                         read_unlock(&tasklist_lock);
                 }
+boundary_ok:
  
                 /* Check permissions for the transition. */
                 error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS,
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h

index 7b9769f5e775e1dcacd24292aa2ba43d60e38691..d12ff1a9c0aa5e383347af29f9f03fe96e626ed2 100644 (file)
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -12,6 +12,7 @@
  #include <linux/kdev_t.h>
  #include <linux/spinlock.h>
  #include <linux/init.h>
+#include <linux/audit.h>
  #include <linux/in6.h>
  #include <linux/path.h>
  #include <asm/system.h>
@@ -126,6 +127,9 @@ int avc_add_callback(int (*callback)(u32 event, u32 ssid, u32 tsid,
                      u32 events, u32 ssid, u32 tsid,
                      u16 tclass, u32 perms);
  
+/* Shows permission in human readable form */
+void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av);
+
  /* Exported to selinuxfs */
  int avc_get_hash_stats(char *page);
  extern unsigned int avc_cache_threshold;
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h

index 7c543003d653676f5d72a34ffd60d1f301b2d11a..72447370bc959f4551583d421993138dc9c513b2 100644 (file)
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -27,13 +27,14 @@
  #define POLICYDB_VERSION_RANGETRANS    21
  #define POLICYDB_VERSION_POLCAP                22
  #define POLICYDB_VERSION_PERMISSIVE    23
+#define POLICYDB_VERSION_BOUNDARY      24
  
  /* Range of policy versions we understand*/
  #define POLICYDB_VERSION_MIN   POLICYDB_VERSION_BASE
  #ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
  #define POLICYDB_VERSION_MAX   CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
  #else
-#define POLICYDB_VERSION_MAX   POLICYDB_VERSION_PERMISSIVE
+#define POLICYDB_VERSION_MAX   POLICYDB_VERSION_BOUNDARY
  #endif
  
  #define CONTEXT_MNT    0x01
@@ -62,6 +63,16 @@ enum {
  extern int selinux_policycap_netpeer;
  extern int selinux_policycap_openperm;
  
+/*
+ * type_datum properties
+ * available at the kernel policy version >= POLICYDB_VERSION_BOUNDARY
+ */
+#define TYPEDATUM_PROPERTY_PRIMARY     0x0001
+#define TYPEDATUM_PROPERTY_ATTRIBUTE   0x0002
+
+/* limitation of boundary depth  */
+#define POLICYDB_BOUNDS_MAXDEPTH       4
+
  int security_load_policy(void *data, size_t len);
  
  int security_policycap_supported(unsigned int req_cap);
@@ -117,6 +128,8 @@ int security_node_sid(u16 domain, void *addr, u32 addrlen,
  int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid,
                                  u16 tclass);
  
+int security_bounded_transition(u32 oldsid, u32 newsid);
+
  int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid);
  
  int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type,
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c

index a1be97f8beea0ac0aa0fa7519801128b6eb54b62..1215b8e47dba669a6d86519b342de621b2efbfd2 100644 (file)
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -98,7 +98,7 @@ struct avtab_node *
  avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datum *datum)
  {
         int hvalue;
-       struct avtab_node *prev, *cur, *newnode;
+       struct avtab_node *prev, *cur;
         u16 specified = key->specified & ~(AVTAB_ENABLED|AVTAB_ENABLED_OLD);
  
         if (!h || !h->htable)
@@ -122,9 +122,7 @@ avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datu
                     key->target_class < cur->key.target_class)
                         break;
         }
-       newnode = avtab_insert_node(h, hvalue, prev, cur, key, datum);
-
-       return newnode;
+       return avtab_insert_node(h, hvalue, prev, cur, key, datum);
  }
  
  struct avtab_datum *avtab_search(struct avtab *h, struct avtab_key *key)
@@ -231,7 +229,7 @@ void avtab_destroy(struct avtab *h)
  
         for (i = 0; i < h->nslot; i++) {
                 cur = h->htable[i];
-               while (cur != NULL) {
+               while (cur) {
                         temp = cur;
                         cur = cur->next;
                         kmem_cache_free(avtab_node_cachep, temp);
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c

index fb4efe4f4bc8a5377192052d65242b683a87b438..4a4e35cac22bfc2a7426dd85e582ce113e07e843 100644 (file)
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -29,7 +29,7 @@ static int cond_evaluate_expr(struct policydb *p, struct cond_expr *expr)
         int s[COND_EXPR_MAXDEPTH];
         int sp = -1;
  
-       for (cur = expr; cur != NULL; cur = cur->next) {
+       for (cur = expr; cur; cur = cur->next) {
                 switch (cur->expr_type) {
                 case COND_BOOL:
                         if (sp == (COND_EXPR_MAXDEPTH - 1))
@@ -97,14 +97,14 @@ int evaluate_cond_node(struct policydb *p, struct cond_node *node)
                 if (new_state == -1)
                         printk(KERN_ERR "SELinux: expression result was undefined - disabling all rules.\n");
                 /* turn the rules on or off */
-               for (cur = node->true_list; cur != NULL; cur = cur->next) {
+               for (cur = node->true_list; cur; cur = cur->next) {
                         if (new_state <= 0)
                                 cur->node->key.specified &= ~AVTAB_ENABLED;
                         else
                                 cur->node->key.specified |= AVTAB_ENABLED;
                 }
  
-               for (cur = node->false_list; cur != NULL; cur = cur->next) {
+               for (cur = node->false_list; cur; cur = cur->next) {
                         /* -1 or 1 */
                         if (new_state)
                                 cur->node->key.specified &= ~AVTAB_ENABLED;
@@ -128,7 +128,7 @@ int cond_policydb_init(struct policydb *p)
  static void cond_av_list_destroy(struct cond_av_list *list)
  {
         struct cond_av_list *cur, *next;
-       for (cur = list; cur != NULL; cur = next) {
+       for (cur = list; cur; cur = next) {
                 next = cur->next;
                 /* the avtab_ptr_t node is destroy by the avtab */
                 kfree(cur);
@@ -139,7 +139,7 @@ static void cond_node_destroy(struct cond_node *node)
  {
         struct cond_expr *cur_expr, *next_expr;
  
-       for (cur_expr = node->expr; cur_expr != NULL; cur_expr = next_expr) {
+       for (cur_expr = node->expr; cur_expr; cur_expr = next_expr) {
                 next_expr = cur_expr->next;
                 kfree(cur_expr);
         }
@@ -155,7 +155,7 @@ static void cond_list_destroy(struct cond_node *list)
         if (list == NULL)
                 return;
  
-       for (cur = list; cur != NULL; cur = next) {
+       for (cur = list; cur; cur = next) {
                 next = cur->next;
                 cond_node_destroy(cur);
         }
@@ -239,7 +239,7 @@ int cond_read_bool(struct policydb *p, struct hashtab *h, void *fp)
         rc = next_entry(key, fp, len);
         if (rc < 0)
                 goto err;
-       key[len] = 0;
+       key[len] = '\0';
         if (hashtab_insert(h, key, booldatum))
                 goto err;
  
@@ -291,7 +291,7 @@ static int cond_insertf(struct avtab *a, struct avtab_key *k, struct avtab_datum
                                         goto err;
                                 }
                                 found = 0;
-                               for (cur = other; cur != NULL; cur = cur->next) {
+                               for (cur = other; cur; cur = cur->next) {
                                         if (cur->node == node_ptr) {
                                                 found = 1;
                                                 break;
@@ -485,7 +485,7 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decisi
         if (!ctab || !key || !avd)
                 return;
  
-       for (node = avtab_search_node(ctab, key); node != NULL;
+       for (node = avtab_search_node(ctab, key); node;
                                 node = avtab_search_node_next(node, key->specified)) {
                 if ((u16)(AVTAB_ALLOWED|AVTAB_ENABLED) ==
                     (node->key.specified & (AVTAB_ALLOWED|AVTAB_ENABLED)))
diff --git a/security/selinux/ss/conditional.h b/security/selinux/ss/conditional.h

index 65b9f8366e9c8635020c934e66f12a4901bbc1d8..53ddb013ae573f8bb053da9fa9416fbcf5ee9701 100644 (file)
--- a/security/selinux/ss/conditional.h
+++ b/security/selinux/ss/conditional.h
@@ -28,7 +28,7 @@ struct cond_expr {
  #define COND_XOR       5 /* bool ^ bool */
  #define COND_EQ                6 /* bool == bool */
  #define COND_NEQ       7 /* bool != bool */
-#define COND_LAST      8
+#define COND_LAST      COND_NEQ
         __u32 expr_type;
         __u32 bool;
         struct cond_expr *next;
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c

index ddc275490af89bddfd1a7e37d60e645fb39de358..68c7348d1acc6628f4c6121207edc34f6a7f707a 100644 (file)
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -109,7 +109,7 @@ int ebitmap_netlbl_export(struct ebitmap *ebmap,
         *catmap = c_iter;
         c_iter->startbit = e_iter->startbit & ~(NETLBL_CATMAP_SIZE - 1);
  
-       while (e_iter != NULL) {
+       while (e_iter) {
                 for (i = 0; i < EBITMAP_UNIT_NUMS; i++) {
                         unsigned int delta, e_startbit, c_endbit;
  
@@ -197,7 +197,7 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap,
                         }
                 }
                 c_iter = c_iter->next;
-       } while (c_iter != NULL);
+       } while (c_iter);
         if (e_iter != NULL)
                 ebmap->highbit = e_iter->startbit + EBITMAP_SIZE;
         else
diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c

index 2e7788e13213002832368b8febed33853c423cea..933e735bb1850d8b7d125f4899124ff8f2af0d0d 100644 (file)
--- a/security/selinux/ss/hashtab.c
+++ b/security/selinux/ss/hashtab.c
@@ -81,7 +81,7 @@ void *hashtab_search(struct hashtab *h, const void *key)
  
         hvalue = h->hash_value(h, key);
         cur = h->htable[hvalue];
-       while (cur != NULL && h->keycmp(h, key, cur->key) > 0)
+       while (cur && h->keycmp(h, key, cur->key) > 0)
                 cur = cur->next;
  
         if (cur == NULL || (h->keycmp(h, key, cur->key) != 0))
@@ -100,7 +100,7 @@ void hashtab_destroy(struct hashtab *h)
  
         for (i = 0; i < h->size; i++) {
                 cur = h->htable[i];
-               while (cur != NULL) {
+               while (cur) {
                         temp = cur;
                         cur = cur->next;
                         kfree(temp);
@@ -127,7 +127,7 @@ int hashtab_map(struct hashtab *h,
  
         for (i = 0; i < h->size; i++) {
                 cur = h->htable[i];
-               while (cur != NULL) {
+               while (cur) {
                         ret = apply(cur->key, cur->datum, args);
                         if (ret)
                                 return ret;
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c

index 77d745da48bb6e4c2153fc3a64852b9c08f80e51..b5407f16c2a4e71b06550beb671ddb9de14591b3 100644 (file)
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -283,8 +283,8 @@ int mls_context_to_sid(struct policydb *pol,
                 p++;
  
         delim = *p;
-       if (delim != 0)
-               *p++ = 0;
+       if (delim != '\0')
+               *p++ = '\0';
  
         for (l = 0; l < 2; l++) {
                 levdatum = hashtab_search(pol->p_levels.table, scontextp);
@@ -302,14 +302,14 @@ int mls_context_to_sid(struct policydb *pol,
                                 while (*p && *p != ',' && *p != '-')
                                         p++;
                                 delim = *p;
-                               if (delim != 0)
-                                       *p++ = 0;
+                               if (delim != '\0')
+                                       *p++ = '\0';
  
                                 /* Separate into range if exists */
                                 rngptr = strchr(scontextp, '.');
                                 if (rngptr != NULL) {
                                         /* Remove '.' */
-                                       *rngptr++ = 0;
+                                       *rngptr++ = '\0';
                                 }
  
                                 catdatum = hashtab_search(pol->p_cats.table,
@@ -357,8 +357,8 @@ int mls_context_to_sid(struct policydb *pol,
                                 p++;
  
                         delim = *p;
-                       if (delim != 0)
-                               *p++ = 0;
+                       if (delim != '\0')
+                               *p++ = '\0';
                 } else
                         break;
         }
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c

index 2391761ae42248806d92b2d2e220f5b1dd7ae6ea..72e4a54973aae503c9c9378aa392750f7f4adb20 100644 (file)
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -30,6 +30,7 @@
  #include <linux/slab.h>
  #include <linux/string.h>
  #include <linux/errno.h>
+#include <linux/audit.h>
  #include "security.h"
  
  #include "policydb.h"
@@ -116,7 +117,12 @@ static struct policydb_compat_info policydb_compat[] = {
                 .version        = POLICYDB_VERSION_PERMISSIVE,
                 .sym_num        = SYM_NUM,
                 .ocon_num       = OCON_NUM,
-       }
+       },
+       {
+               .version        = POLICYDB_VERSION_BOUNDARY,
+               .sym_num        = SYM_NUM,
+               .ocon_num       = OCON_NUM,
+       },
  };
  
  static struct policydb_compat_info *policydb_lookup_compat(int version)
@@ -254,7 +260,9 @@ static int role_index(void *key, void *datum, void *datap)
  
         role = datum;
         p = datap;
-       if (!role->value || role->value > p->p_roles.nprim)
+       if (!role->value
+           || role->value > p->p_roles.nprim
+           || role->bounds > p->p_roles.nprim)
                 return -EINVAL;
         p->p_role_val_to_name[role->value - 1] = key;
         p->role_val_to_struct[role->value - 1] = role;
@@ -270,9 +278,12 @@ static int type_index(void *key, void *datum, void *datap)
         p = datap;
  
         if (typdatum->primary) {
-               if (!typdatum->value || typdatum->value > p->p_types.nprim)
+               if (!typdatum->value
+                   || typdatum->value > p->p_types.nprim
+                   || typdatum->bounds > p->p_types.nprim)
                         return -EINVAL;
                 p->p_type_val_to_name[typdatum->value - 1] = key;
+               p->type_val_to_struct[typdatum->value - 1] = typdatum;
         }
  
         return 0;
@@ -285,7 +296,9 @@ static int user_index(void *key, void *datum, void *datap)
  
         usrdatum = datum;
         p = datap;
-       if (!usrdatum->value || usrdatum->value > p->p_users.nprim)
+       if (!usrdatum->value
+           || usrdatum->value > p->p_users.nprim
+           || usrdatum->bounds > p->p_users.nprim)
                 return -EINVAL;
         p->p_user_val_to_name[usrdatum->value - 1] = key;
         p->user_val_to_struct[usrdatum->value - 1] = usrdatum;
@@ -438,6 +451,14 @@ static int policydb_index_others(struct policydb *p)
                 goto out;
         }
  
+       p->type_val_to_struct =
+               kmalloc(p->p_types.nprim * sizeof(*(p->type_val_to_struct)),
+                       GFP_KERNEL);
+       if (!p->type_val_to_struct) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
         if (cond_init_bool_indexes(p)) {
                 rc = -ENOMEM;
                 goto out;
@@ -625,6 +646,7 @@ void policydb_destroy(struct policydb *p)
         kfree(p->class_val_to_struct);
         kfree(p->role_val_to_struct);
         kfree(p->user_val_to_struct);
+       kfree(p->type_val_to_struct);
  
         avtab_destroy(&p->te_avtab);
  
@@ -932,7 +954,7 @@ static int perm_read(struct policydb *p, struct hashtab *h, void *fp)
         rc = next_entry(key, fp, len);
         if (rc < 0)
                 goto bad;
-       key[len] = 0;
+       key[len] = '\0';
  
         rc = hashtab_insert(h, key, perdatum);
         if (rc)
@@ -979,7 +1001,7 @@ static int common_read(struct policydb *p, struct hashtab *h, void *fp)
         rc = next_entry(key, fp, len);
         if (rc < 0)
                 goto bad;
-       key[len] = 0;
+       key[len] = '\0';
  
         for (i = 0; i < nel; i++) {
                 rc = perm_read(p, comdatum->permissions.table, fp);
@@ -1117,7 +1139,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp)
         rc = next_entry(key, fp, len);
         if (rc < 0)
                 goto bad;
-       key[len] = 0;
+       key[len] = '\0';
  
         if (len2) {
                 cladatum->comkey = kmalloc(len2 + 1, GFP_KERNEL);
@@ -1128,7 +1150,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp)
                 rc = next_entry(cladatum->comkey, fp, len2);
                 if (rc < 0)
                         goto bad;
-               cladatum->comkey[len2] = 0;
+               cladatum->comkey[len2] = '\0';
  
                 cladatum->comdatum = hashtab_search(p->p_commons.table,
                                                     cladatum->comkey);
@@ -1176,8 +1198,8 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp)
  {
         char *key = NULL;
         struct role_datum *role;
-       int rc;
-       __le32 buf[2];
+       int rc, to_read = 2;
+       __le32 buf[3];
         u32 len;
  
         role = kzalloc(sizeof(*role), GFP_KERNEL);
@@ -1186,12 +1208,17 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp)
                 goto out;
         }
  
-       rc = next_entry(buf, fp, sizeof buf);
+       if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
+               to_read = 3;
+
+       rc = next_entry(buf, fp, sizeof(buf[0]) * to_read);
         if (rc < 0)
                 goto bad;
  
         len = le32_to_cpu(buf[0]);
         role->value = le32_to_cpu(buf[1]);
+       if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
+               role->bounds = le32_to_cpu(buf[2]);
  
         key = kmalloc(len + 1, GFP_KERNEL);
         if (!key) {
@@ -1201,7 +1228,7 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp)
         rc = next_entry(key, fp, len);
         if (rc < 0)
                 goto bad;
-       key[len] = 0;
+       key[len] = '\0';
  
         rc = ebitmap_read(&role->dominates, fp);
         if (rc)
@@ -1236,8 +1263,8 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp)
  {
         char *key = NULL;
         struct type_datum *typdatum;
-       int rc;
-       __le32 buf[3];
+       int rc, to_read = 3;
+       __le32 buf[4];
         u32 len;
  
         typdatum = kzalloc(sizeof(*typdatum), GFP_KERNEL);
@@ -1246,13 +1273,27 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp)
                 return rc;
         }
  
-       rc = next_entry(buf, fp, sizeof buf);
+       if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
+               to_read = 4;
+
+       rc = next_entry(buf, fp, sizeof(buf[0]) * to_read);
         if (rc < 0)
                 goto bad;
  
         len = le32_to_cpu(buf[0]);
         typdatum->value = le32_to_cpu(buf[1]);
-       typdatum->primary = le32_to_cpu(buf[2]);
+       if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) {
+               u32 prop = le32_to_cpu(buf[2]);
+
+               if (prop & TYPEDATUM_PROPERTY_PRIMARY)
+                       typdatum->primary = 1;
+               if (prop & TYPEDATUM_PROPERTY_ATTRIBUTE)
+                       typdatum->attribute = 1;
+
+               typdatum->bounds = le32_to_cpu(buf[3]);
+       } else {
+               typdatum->primary = le32_to_cpu(buf[2]);
+       }
  
         key = kmalloc(len + 1, GFP_KERNEL);
         if (!key) {
@@ -1262,7 +1303,7 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp)
         rc = next_entry(key, fp, len);
         if (rc < 0)
                 goto bad;
-       key[len] = 0;
+       key[len] = '\0';
  
         rc = hashtab_insert(h, key, typdatum);
         if (rc)
@@ -1309,8 +1350,8 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp)
  {
         char *key = NULL;
         struct user_datum *usrdatum;
-       int rc;
-       __le32 buf[2];
+       int rc, to_read = 2;
+       __le32 buf[3];
         u32 len;
  
         usrdatum = kzalloc(sizeof(*usrdatum), GFP_KERNEL);
@@ -1319,12 +1360,17 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp)
                 goto out;
         }
  
-       rc = next_entry(buf, fp, sizeof buf);
+       if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
+               to_read = 3;
+
+       rc = next_entry(buf, fp, sizeof(buf[0]) * to_read);
         if (rc < 0)
                 goto bad;
  
         len = le32_to_cpu(buf[0]);
         usrdatum->value = le32_to_cpu(buf[1]);
+       if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
+               usrdatum->bounds = le32_to_cpu(buf[2]);
  
         key = kmalloc(len + 1, GFP_KERNEL);
         if (!key) {
@@ -1334,7 +1380,7 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp)
         rc = next_entry(key, fp, len);
         if (rc < 0)
                 goto bad;
-       key[len] = 0;
+       key[len] = '\0';
  
         rc = ebitmap_read(&usrdatum->roles, fp);
         if (rc)
@@ -1388,7 +1434,7 @@ static int sens_read(struct policydb *p, struct hashtab *h, void *fp)
         rc = next_entry(key, fp, len);
         if (rc < 0)
                 goto bad;
-       key[len] = 0;
+       key[len] = '\0';
  
         levdatum->level = kmalloc(sizeof(struct mls_level), GFP_ATOMIC);
         if (!levdatum->level) {
@@ -1440,7 +1486,7 @@ static int cat_read(struct policydb *p, struct hashtab *h, void *fp)
         rc = next_entry(key, fp, len);
         if (rc < 0)
                 goto bad;
-       key[len] = 0;
+       key[len] = '\0';
  
         rc = hashtab_insert(h, key, catdatum);
         if (rc)
@@ -1465,6 +1511,133 @@ static int (*read_f[SYM_NUM]) (struct policydb *p, struct hashtab *h, void *fp)
         cat_read,
  };
  
+static int user_bounds_sanity_check(void *key, void *datum, void *datap)
+{
+       struct user_datum *upper, *user;
+       struct policydb *p = datap;
+       int depth = 0;
+
+       upper = user = datum;
+       while (upper->bounds) {
+               struct ebitmap_node *node;
+               unsigned long bit;
+
+               if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
+                       printk(KERN_ERR "SELinux: user %s: "
+                              "too deep or looped boundary",
+                              (char *) key);
+                       return -EINVAL;
+               }
+
+               upper = p->user_val_to_struct[upper->bounds - 1];
+               ebitmap_for_each_positive_bit(&user->roles, node, bit) {
+                       if (ebitmap_get_bit(&upper->roles, bit))
+                               continue;
+
+                       printk(KERN_ERR
+                              "SELinux: boundary violated policy: "
+                              "user=%s role=%s bounds=%s\n",
+                              p->p_user_val_to_name[user->value - 1],
+                              p->p_role_val_to_name[bit],
+                              p->p_user_val_to_name[upper->value - 1]);
+
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static int role_bounds_sanity_check(void *key, void *datum, void *datap)
+{
+       struct role_datum *upper, *role;
+       struct policydb *p = datap;
+       int depth = 0;
+
+       upper = role = datum;
+       while (upper->bounds) {
+               struct ebitmap_node *node;
+               unsigned long bit;
+
+               if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
+                       printk(KERN_ERR "SELinux: role %s: "
+                              "too deep or looped bounds\n",
+                              (char *) key);
+                       return -EINVAL;
+               }
+
+               upper = p->role_val_to_struct[upper->bounds - 1];
+               ebitmap_for_each_positive_bit(&role->types, node, bit) {
+                       if (ebitmap_get_bit(&upper->types, bit))
+                               continue;
+
+                       printk(KERN_ERR
+                              "SELinux: boundary violated policy: "
+                              "role=%s type=%s bounds=%s\n",
+                              p->p_role_val_to_name[role->value - 1],
+                              p->p_type_val_to_name[bit],
+                              p->p_role_val_to_name[upper->value - 1]);
+
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static int type_bounds_sanity_check(void *key, void *datum, void *datap)
+{
+       struct type_datum *upper, *type;
+       struct policydb *p = datap;
+       int depth = 0;
+
+       upper = type = datum;
+       while (upper->bounds) {
+               if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
+                       printk(KERN_ERR "SELinux: type %s: "
+                              "too deep or looped boundary\n",
+                              (char *) key);
+                       return -EINVAL;
+               }
+
+               upper = p->type_val_to_struct[upper->bounds - 1];
+               if (upper->attribute) {
+                       printk(KERN_ERR "SELinux: type %s: "
+                              "bounded by attribute %s",
+                              (char *) key,
+                              p->p_type_val_to_name[upper->value - 1]);
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static int policydb_bounds_sanity_check(struct policydb *p)
+{
+       int rc;
+
+       if (p->policyvers < POLICYDB_VERSION_BOUNDARY)
+               return 0;
+
+       rc = hashtab_map(p->p_users.table,
+                        user_bounds_sanity_check, p);
+       if (rc)
+               return rc;
+
+       rc = hashtab_map(p->p_roles.table,
+                        role_bounds_sanity_check, p);
+       if (rc)
+               return rc;
+
+       rc = hashtab_map(p->p_types.table,
+                        type_bounds_sanity_check, p);
+       if (rc)
+               return rc;
+
+       return 0;
+}
+
  extern int ss_initialized;
  
  /*
@@ -1523,7 +1696,7 @@ int policydb_read(struct policydb *p, void *fp)
                 kfree(policydb_str);
                 goto bad;
         }
-       policydb_str[len] = 0;
+       policydb_str[len] = '\0';
         if (strcmp(policydb_str, POLICYDB_STRING)) {
                 printk(KERN_ERR "SELinux:  policydb string %s does not match "
                        "my string %s\n", policydb_str, POLICYDB_STRING);
@@ -1961,6 +2134,10 @@ int policydb_read(struct policydb *p, void *fp)
                                 goto bad;
         }
  
+       rc = policydb_bounds_sanity_check(p);
+       if (rc)
+               goto bad;
+
         rc = 0;
  out:
         return rc;
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h

index 4253370fda6a22cf7353b0c5c415316fbaa05f8c..55152d498b5342aba65d04c0a6be1b79f784a9f5 100644 (file)
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -61,6 +61,7 @@ struct class_datum {
  /* Role attributes */
  struct role_datum {
         u32 value;                      /* internal role value */
+       u32 bounds;                     /* boundary of role */
         struct ebitmap dominates;       /* set of roles dominated by this role */
         struct ebitmap types;           /* set of authorized types for role */
  };
@@ -81,12 +82,15 @@ struct role_allow {
  /* Type attributes */
  struct type_datum {
         u32 value;              /* internal type value */
+       u32 bounds;             /* boundary of type */
         unsigned char primary;  /* primary name? */
+       unsigned char attribute;/* attribute ?*/
  };
  
  /* User attributes */
  struct user_datum {
         u32 value;                      /* internal user value */
+       u32 bounds;                     /* bounds of user */
         struct ebitmap roles;           /* set of authorized roles for user */
         struct mls_range range;         /* MLS range (min - max) for user */
         struct mls_level dfltlevel;     /* default login MLS level for user */
@@ -209,6 +213,7 @@ struct policydb {
         struct class_datum **class_val_to_struct;
         struct role_datum **role_val_to_struct;
         struct user_datum **user_val_to_struct;
+       struct type_datum **type_val_to_struct;
  
         /* type enforcement access vectors and transitions */
         struct avtab te_avtab;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c

index 8551952ef329bcf62c9fe1516f8a4d181988b543..ab0cc0c7b9444e60a8cf625c0edb1aaa479456ba 100644 (file)
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -88,6 +88,11 @@ static u32 latest_granting;
  static int context_struct_to_string(struct context *context, char **scontext,
                                     u32 *scontext_len);
  
+static int context_struct_compute_av(struct context *scontext,
+                                    struct context *tcontext,
+                                    u16 tclass,
+                                    u32 requested,
+                                    struct av_decision *avd);
  /*
   * Return the boolean value of a constraint expression
   * when it is applied to the specified source and target
@@ -273,6 +278,100 @@ mls_ops:
         return s[0];
  }
  
+/*
+ * security_boundary_permission - drops violated permissions
+ * on boundary constraint.
+ */
+static void type_attribute_bounds_av(struct context *scontext,
+                                    struct context *tcontext,
+                                    u16 tclass,
+                                    u32 requested,
+                                    struct av_decision *avd)
+{
+       struct context lo_scontext;
+       struct context lo_tcontext;
+       struct av_decision lo_avd;
+       struct type_datum *source
+               = policydb.type_val_to_struct[scontext->type - 1];
+       struct type_datum *target
+               = policydb.type_val_to_struct[tcontext->type - 1];
+       u32 masked = 0;
+
+       if (source->bounds) {
+               memset(&lo_avd, 0, sizeof(lo_avd));
+
+               memcpy(&lo_scontext, scontext, sizeof(lo_scontext));
+               lo_scontext.type = source->bounds;
+
+               context_struct_compute_av(&lo_scontext,
+                                         tcontext,
+                                         tclass,
+                                         requested,
+                                         &lo_avd);
+               if ((lo_avd.allowed & avd->allowed) == avd->allowed)
+                       return;         /* no masked permission */
+               masked = ~lo_avd.allowed & avd->allowed;
+       }
+
+       if (target->bounds) {
+               memset(&lo_avd, 0, sizeof(lo_avd));
+
+               memcpy(&lo_tcontext, tcontext, sizeof(lo_tcontext));
+               lo_tcontext.type = target->bounds;
+
+               context_struct_compute_av(scontext,
+                                         &lo_tcontext,
+                                         tclass,
+                                         requested,
+                                         &lo_avd);
+               if ((lo_avd.allowed & avd->allowed) == avd->allowed)
+                       return;         /* no masked permission */
+               masked = ~lo_avd.allowed & avd->allowed;
+       }
+
+       if (source->bounds && target->bounds) {
+               memset(&lo_avd, 0, sizeof(lo_avd));
+               /*
+                * lo_scontext and lo_tcontext are already
+                * set up.
+                */
+
+               context_struct_compute_av(&lo_scontext,
+                                         &lo_tcontext,
+                                         tclass,
+                                         requested,
+                                         &lo_avd);
+               if ((lo_avd.allowed & avd->allowed) == avd->allowed)
+                       return;         /* no masked permission */
+               masked = ~lo_avd.allowed & avd->allowed;
+       }
+
+       if (masked) {
+               struct audit_buffer *ab;
+               char *stype_name
+                       = policydb.p_type_val_to_name[source->value - 1];
+               char *ttype_name
+                       = policydb.p_type_val_to_name[target->value - 1];
+               char *tclass_name
+                       = policydb.p_class_val_to_name[tclass - 1];
+
+               /* mask violated permissions */
+               avd->allowed &= ~masked;
+
+               /* notice to userspace via audit message */
+               ab = audit_log_start(current->audit_context,
+                                    GFP_ATOMIC, AUDIT_SELINUX_ERR);
+               if (!ab)
+                       return;
+
+               audit_log_format(ab, "av boundary violation: "
+                                "source=%s target=%s tclass=%s",
+                                stype_name, ttype_name, tclass_name);
+               avc_dump_av(ab, tclass, masked);
+               audit_log_end(ab);
+       }
+}
+
  /*
   * Compute access vectors based on a context structure pair for
   * the permissions in a particular class.
@@ -356,7 +455,7 @@ static int context_struct_compute_av(struct context *scontext,
                         avkey.source_type = i + 1;
                         avkey.target_type = j + 1;
                         for (node = avtab_search_node(&policydb.te_avtab, &avkey);
-                            node != NULL;
+                            node;
                              node = avtab_search_node_next(node, avkey.specified)) {
                                 if (node->key.specified == AVTAB_ALLOWED)
                                         avd->allowed |= node->datum.data;
@@ -404,6 +503,14 @@ static int context_struct_compute_av(struct context *scontext,
                                                         PROCESS__DYNTRANSITION);
         }
  
+       /*
+        * If the given source and target types have boundary
+        * constraint, lazy checks have to mask any violated
+        * permission and notice it to userspace via audit.
+        */
+       type_attribute_bounds_av(scontext, tcontext,
+                                tclass, requested, avd);
+
         return 0;
  
  inval_class:
@@ -549,6 +656,69 @@ out:
         return rc;
  }
  
+/*
+ * security_bounded_transition - check whether the given
+ * transition is directed to bounded, or not.
+ * It returns 0, if @newsid is bounded by @oldsid.
+ * Otherwise, it returns error code.
+ *
+ * @oldsid : current security identifier
+ * @newsid : destinated security identifier
+ */
+int security_bounded_transition(u32 old_sid, u32 new_sid)
+{
+       struct context *old_context, *new_context;
+       struct type_datum *type;
+       int index;
+       int rc = -EINVAL;
+
+       read_lock(&policy_rwlock);
+
+       old_context = sidtab_search(&sidtab, old_sid);
+       if (!old_context) {
+               printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n",
+                      __func__, old_sid);
+               goto out;
+       }
+
+       new_context = sidtab_search(&sidtab, new_sid);
+       if (!new_context) {
+               printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n",
+                      __func__, new_sid);
+               goto out;
+       }
+
+       /* type/domain unchaned */
+       if (old_context->type == new_context->type) {
+               rc = 0;
+               goto out;
+       }
+
+       index = new_context->type;
+       while (true) {
+               type = policydb.type_val_to_struct[index - 1];
+               BUG_ON(!type);
+
+               /* not bounded anymore */
+               if (!type->bounds) {
+                       rc = -EPERM;
+                       break;
+               }
+
+               /* @newsid is bounded by @oldsid */
+               if (type->bounds == old_context->type) {
+                       rc = 0;
+                       break;
+               }
+               index = type->bounds;
+       }
+out:
+       read_unlock(&policy_rwlock);
+
+       return rc;
+}
+
+
  /**
   * security_compute_av - Compute access vector decisions.
   * @ssid: source security identifier
@@ -794,7 +964,7 @@ static int string_to_context_struct(struct policydb *pol,
         *p++ = 0;
  
         typdatum = hashtab_search(pol->p_types.table, scontextp);
-       if (!typdatum)
+       if (!typdatum || typdatum->attribute)
                 goto out;
  
         ctx->type = typdatum->value;
@@ -1037,7 +1207,7 @@ static int security_compute_sid(u32 ssid,
         /* If no permanent rule, also check for enabled conditional rules */
         if (!avdatum) {
                 node = avtab_search_node(&policydb.te_cond_avtab, &avkey);
-               for (; node != NULL; node = avtab_search_node_next(node, specified)) {
+               for (; node; node = avtab_search_node_next(node, specified)) {
                         if (node->key.specified & AVTAB_ENABLED) {
                                 avdatum = &node->datum;
                                 break;
@@ -2050,7 +2220,7 @@ int security_set_bools(int len, int *values)
                         policydb.bool_val_to_struct[i]->state = 0;
         }
  
-       for (cur = policydb.cond_list; cur != NULL; cur = cur->next) {
+       for (cur = policydb.cond_list; cur; cur = cur->next) {
                 rc = evaluate_cond_node(&policydb, cur);
                 if (rc)
                         goto out;
@@ -2102,7 +2272,7 @@ static int security_preserve_bools(struct policydb *p)
                 if (booldatum)
                         booldatum->state = bvalues[i];
         }
-       for (cur = p->cond_list; cur != NULL; cur = cur->next) {
+       for (cur = p->cond_list; cur; cur = cur->next) {
                 rc = evaluate_cond_node(p, cur);
                 if (rc)
                         goto out;
diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c

index a81ded10412980ea9d4b186233210fec9b696ac2..e817989764cd48dd4061bc49c30c6cb8da357474 100644 (file)
--- a/security/selinux/ss/sidtab.c
+++ b/security/selinux/ss/sidtab.c
@@ -43,7 +43,7 @@ int sidtab_insert(struct sidtab *s, u32 sid, struct context *context)
         hvalue = SIDTAB_HASH(sid);
         prev = NULL;
         cur = s->htable[hvalue];
-       while (cur != NULL && sid > cur->sid) {
+       while (cur && sid > cur->sid) {
                 prev = cur;
                 cur = cur->next;
         }
@@ -92,7 +92,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force)
  
         hvalue = SIDTAB_HASH(sid);
         cur = s->htable[hvalue];
-       while (cur != NULL && sid > cur->sid)
+       while (cur && sid > cur->sid)
                 cur = cur->next;
  
         if (force && cur && sid == cur->sid && cur->context.len)
@@ -103,7 +103,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force)
                 sid = SECINITSID_UNLABELED;
                 hvalue = SIDTAB_HASH(sid);
                 cur = s->htable[hvalue];
-               while (cur != NULL && sid > cur->sid)
+               while (cur && sid > cur->sid)
                         cur = cur->next;
                 if (!cur || sid != cur->sid)
                         return NULL;
@@ -136,7 +136,7 @@ int sidtab_map(struct sidtab *s,
  
         for (i = 0; i < SIDTAB_SIZE; i++) {
                 cur = s->htable[i];
-               while (cur != NULL) {
+               while (cur) {
                         rc = apply(cur->sid, &cur->context, args);
                         if (rc)
                                 goto out;
@@ -155,7 +155,7 @@ static inline u32 sidtab_search_context(struct sidtab *s,
  
         for (i = 0; i < SIDTAB_SIZE; i++) {
                 cur = s->htable[i];
-               while (cur != NULL) {
+               while (cur) {
                         if (context_cmp(&cur->context, context))
                                 return cur->sid;
                         cur = cur->next;
@@ -242,7 +242,7 @@ void sidtab_destroy(struct sidtab *s)
  
         for (i = 0; i < SIDTAB_SIZE; i++) {
                 cur = s->htable[i];
-               while (cur != NULL) {
+               while (cur) {
                         temp = cur;
                         cur = cur->next;
                         context_destroy(&temp->context);
diff --git a/security/smack/smack.h b/security/smack/smack.h

index 4a4477f5afdcd3468074c72ea05aa5697dea4566..31dce559595ad48e732a306c8d759a5ce5adde80 100644 (file)
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -178,6 +178,7 @@ u32 smack_to_secid(const char *);
  extern int smack_cipso_direct;
  extern int smack_net_nltype;
  extern char *smack_net_ambient;
+extern char *smack_onlycap;
  
  extern struct smack_known *smack_known;
  extern struct smack_known smack_known_floor;
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c

index f6b5f6eed6dd00f6aab7f9873e26dbb700984063..79ff21ed4c3be11b3f10f974874549617d1141a5 100644 (file)
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -157,7 +157,7 @@ int smk_access(char *subject_label, char *object_label, int request)
   *
   * This function checks the current subject label/object label pair
   * in the access rule list and returns 0 if the access is permitted,
- * non zero otherwise. It allows that current my have the capability
+ * non zero otherwise. It allows that current may have the capability
   * to override the rules.
   */
  int smk_curacc(char *obj_label, u32 mode)
@@ -168,6 +168,14 @@ int smk_curacc(char *obj_label, u32 mode)
         if (rc == 0)
                 return 0;
  
+       /*
+        * Return if a specific label has been designated as the
+        * only one that gets privilege and current does not
+        * have that label.
+        */
+       if (smack_onlycap != NULL && smack_onlycap != current->security)
+               return rc;
+
         if (capable(CAP_MAC_OVERRIDE))
                 return 0;
  
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c

index 271a835fbbe3f72f4e5465cbf84b9b271fa7e77a..e7c642458ec9431b1639935880b6082f21492f6a 100644 (file)
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -39,6 +39,7 @@ enum smk_inos {
         SMK_DIRECT      = 6,    /* CIPSO level indicating direct label */
         SMK_AMBIENT     = 7,    /* internet ambient label */
         SMK_NLTYPE      = 8,    /* label scheme to use by default */
+       SMK_ONLYCAP     = 9,    /* the only "capable" label */
  };
  
  /*
@@ -68,6 +69,16 @@ int smack_net_nltype = NETLBL_NLTYPE_CIPSOV4;
   */
  int smack_cipso_direct = SMACK_CIPSO_DIRECT_DEFAULT;
  
+/*
+ * Unless a process is running with this label even
+ * having CAP_MAC_OVERRIDE isn't enough to grant
+ * privilege to violate MAC policy. If no label is
+ * designated (the NULL case) capabilities apply to
+ * everyone. It is expected that the hat (^) label
+ * will be used if any label is used.
+ */
+char *smack_onlycap;
+
  static int smk_cipso_doi_value = SMACK_CIPSO_DOI_DEFAULT;
  struct smk_list_entry *smack_list;
  
@@ -787,6 +798,85 @@ static const struct file_operations smk_ambient_ops = {
         .write          = smk_write_ambient,
  };
  
+/**
+ * smk_read_onlycap - read() for /smack/onlycap
+ * @filp: file pointer, not actually used
+ * @buf: where to put the result
+ * @cn: maximum to send along
+ * @ppos: where to start
+ *
+ * Returns number of bytes read or error code, as appropriate
+ */
+static ssize_t smk_read_onlycap(struct file *filp, char __user *buf,
+                               size_t cn, loff_t *ppos)
+{
+       char *smack = "";
+       ssize_t rc = -EINVAL;
+       int asize;
+
+       if (*ppos != 0)
+               return 0;
+
+       if (smack_onlycap != NULL)
+               smack = smack_onlycap;
+
+       asize = strlen(smack) + 1;
+
+       if (cn >= asize)
+               rc = simple_read_from_buffer(buf, cn, ppos, smack, asize);
+
+       return rc;
+}
+
+/**
+ * smk_write_onlycap - write() for /smack/onlycap
+ * @filp: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start
+ *
+ * Returns number of bytes written or error code, as appropriate
+ */
+static ssize_t smk_write_onlycap(struct file *file, const char __user *buf,
+                                size_t count, loff_t *ppos)
+{
+       char in[SMK_LABELLEN];
+       char *sp = current->security;
+
+       if (!capable(CAP_MAC_ADMIN))
+               return -EPERM;
+
+       /*
+        * This can be done using smk_access() but is done
+        * explicitly for clarity. The smk_access() implementation
+        * would use smk_access(smack_onlycap, MAY_WRITE)
+        */
+       if (smack_onlycap != NULL && smack_onlycap != sp)
+               return -EPERM;
+
+       if (count >= SMK_LABELLEN)
+               return -EINVAL;
+
+       if (copy_from_user(in, buf, count) != 0)
+               return -EFAULT;
+
+       /*
+        * Should the null string be passed in unset the onlycap value.
+        * This seems like something to be careful with as usually
+        * smk_import only expects to return NULL for errors. It
+        * is usually the case that a nullstring or "\n" would be
+        * bad to pass to smk_import but in fact this is useful here.
+        */
+       smack_onlycap = smk_import(in, count);
+
+       return count;
+}
+
+static const struct file_operations smk_onlycap_ops = {
+       .read           = smk_read_onlycap,
+       .write          = smk_write_onlycap,
+};
+
  struct option_names {
         int     o_number;
         char    *o_name;
@@ -919,6 +1009,8 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
                         {"ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR},
                 [SMK_NLTYPE]    =
                         {"nltype", &smk_nltype_ops, S_IRUGO|S_IWUSR},
+               [SMK_ONLYCAP]   =
+                       {"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR},
                 /* last one */ {""}
         };
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 11 Oct 2008 15:50:01 +0000 (08:50 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 11 Oct 2008 15:50:01 +0000 (08:50 -0700)
Documentation/DocBook/kernel-api.tmpl		patch \| blob \| history
Documentation/RCU/checklist.txt		patch \| blob \| history
Documentation/RCU/rcuref.txt		patch \| blob \| history
Documentation/RCU/whatisRCU.txt		patch \| blob \| history
Documentation/SELinux.txt	[new file with mode: 0644]	patch \| blob
Documentation/kernel-doc-nano-HOWTO.txt		patch \| blob \| history
Documentation/scheduler/sched-design-CFS.txt		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
arch/alpha/kernel/smp.c		patch \| blob \| history
arch/arm/kernel/smp.c		patch \| blob \| history
arch/cris/arch-v32/kernel/smp.c		patch \| blob \| history
arch/ia64/kernel/smpboot.c		patch \| blob \| history
arch/m32r/kernel/smpboot.c		patch \| blob \| history
arch/mips/kernel/smp.c		patch \| blob \| history
arch/powerpc/kernel/smp.c		patch \| blob \| history
arch/s390/kernel/smp.c		patch \| blob \| history
arch/sh/kernel/smp.c		patch \| blob \| history
arch/sparc/kernel/sun4d_smp.c		patch \| blob \| history
arch/sparc/kernel/sun4m_smp.c		patch \| blob \| history
arch/um/kernel/smp.c		patch \| blob \| history
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c		patch \| blob \| history
arch/x86/kernel/cpu/cpufreq/elanfreq.c		patch \| blob \| history
arch/x86/kernel/cpu/cpufreq/powernow-k6.c		patch \| blob \| history
arch/x86/kernel/smpboot.c		patch \| blob \| history
arch/x86/mach-voyager/voyager_smp.c		patch \| blob \| history
drivers/char/tpm/Kconfig		patch \| blob \| history
drivers/cpufreq/cpufreq.c		patch \| blob \| history
drivers/cpufreq/cpufreq_conservative.c		patch \| blob \| history
drivers/cpufreq/cpufreq_ondemand.c		patch \| blob \| history
drivers/cpufreq/cpufreq_performance.c		patch \| blob \| history
drivers/cpufreq/cpufreq_powersave.c		patch \| blob \| history
drivers/cpufreq/cpufreq_userspace.c		patch \| blob \| history
include/linux/compiler.h		patch \| blob \| history
include/linux/completion.h		patch \| blob \| history
include/linux/cpu.h		patch \| blob \| history
include/linux/cpufreq.h		patch \| blob \| history
include/linux/notifier.h		patch \| blob \| history
include/linux/proportions.h		patch \| blob \| history
include/linux/rcuclassic.h		patch \| blob \| history
include/linux/rculist.h		patch \| blob \| history
include/linux/rcupdate.h		patch \| blob \| history
include/linux/rcupreempt.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
include/linux/security.h		patch \| blob \| history
include/linux/tick.h		patch \| blob \| history
kernel/cpu.c		patch \| blob \| history
kernel/cpuset.c		patch \| blob \| history
kernel/rcuclassic.c		patch \| blob \| history
kernel/rcupreempt.c		patch \| blob \| history
kernel/rcupreempt_trace.c		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history
kernel/sched_features.h		patch \| blob \| history
kernel/sched_idletask.c		patch \| blob \| history
kernel/sched_rt.c		patch \| blob \| history
kernel/time/tick-sched.c		patch \| blob \| history
kernel/user.c		patch \| blob \| history
lib/Kconfig.debug		patch \| blob \| history
scripts/Makefile		patch \| blob \| history
scripts/selinux/Makefile	[new file with mode: 0644]	patch \| blob
scripts/selinux/README	[new file with mode: 0644]	patch \| blob
scripts/selinux/install_policy.sh	[new file with mode: 0644]	patch \| blob
scripts/selinux/mdp/.gitignore	[new file with mode: 0644]	patch \| blob
scripts/selinux/mdp/Makefile	[new file with mode: 0644]	patch \| blob
scripts/selinux/mdp/dbus_contexts	[new file with mode: 0644]	patch \| blob
scripts/selinux/mdp/mdp.c	[new file with mode: 0644]	patch \| blob
security/Kconfig		patch \| blob \| history
security/Makefile		patch \| blob \| history
security/commoncap.c		patch \| blob \| history
security/inode.c		patch \| blob \| history
security/security.c		patch \| blob \| history
security/selinux/Kconfig		patch \| blob \| history
security/selinux/avc.c		patch \| blob \| history
security/selinux/hooks.c		patch \| blob \| history
security/selinux/include/avc.h		patch \| blob \| history
security/selinux/include/security.h		patch \| blob \| history
security/selinux/ss/avtab.c		patch \| blob \| history
security/selinux/ss/conditional.c		patch \| blob \| history
security/selinux/ss/conditional.h		patch \| blob \| history
security/selinux/ss/ebitmap.c		patch \| blob \| history
security/selinux/ss/hashtab.c		patch \| blob \| history
security/selinux/ss/mls.c		patch \| blob \| history
security/selinux/ss/policydb.c		patch \| blob \| history
security/selinux/ss/policydb.h		patch \| blob \| history
security/selinux/ss/services.c		patch \| blob \| history
security/selinux/ss/sidtab.c		patch \| blob \| history
security/smack/smack.h		patch \| blob \| history
security/smack/smack_access.c		patch \| blob \| history
security/smack/smackfs.c		patch \| blob \| history