Merge branch 'linus' into core/rcu

author Ingo Molnar <mingo@elte.hu>

Fri, 11 Jul 2008 08:46:50 +0000 (10:46 +0200)

committer Ingo Molnar <mingo@elte.hu>

Fri, 11 Jul 2008 08:46:50 +0000 (10:46 +0200)
author Ingo Molnar <mingo@elte.hu>
Fri, 11 Jul 2008 08:46:50 +0000 (10:46 +0200)
committer Ingo Molnar <mingo@elte.hu>
Fri, 11 Jul 2008 08:46:50 +0000 (10:46 +0200)
diff --combined include/linux/dcache.h

index 1f5cebf10a23b50a7a6a1f978d72649366f5a090,d982eb89c77d324a7875c1486ac421d7ecdfdae7..98202c672fdebf25e3fc9dd3c7362fc3e9d8e5b8
--- 1/include/linux/dcache.h
--- 2/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@@ -3,7 -3,6 +3,7 @@@
   
   #include <asm/atomic.h>
   #include <linux/list.h>
+ +#include <linux/rculist.h>
   #include <linux/spinlock.h>
   #include <linux/cache.h>
   #include <linux/rcupdate.h>
@@@ -301,7 -300,7 +301,7 @@@ extern int d_validate(struct dentry *, 
   extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
   
   extern char *__d_path(const struct path *path, struct path *root, char *, int);
- extern char *d_path(struct path *, char *, int);
+ extern char *d_path(const struct path *, char *, int);
   extern char *dentry_path(struct dentry *, char *, int);
   
   /* Allocation counts.. */
diff --combined kernel/rcuclassic.c

index d8348792f9f59c915bc7df71951229b248b8dd5b,a38895a5b8e2eeb1da28a829bebfe4a963c97561..214e1cde98129b7fca9e23754c5258a190b91eec
--- 1/kernel/rcuclassic.c
--- 2/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@@ -89,8 -89,22 +89,22 @@@ static void force_quiescent_state(struc
                 /*
                  * Don't send IPI to itself. With irqs disabled,
                  * rdp->cpu is the current cpu.
+                *
+                * cpu_online_map is updated by the _cpu_down()
+                * using stop_machine_run(). Since we're in irqs disabled
+                * section, stop_machine_run() is not exectuting, hence
+                * the cpu_online_map is stable.
+                *
+                * However,  a cpu might have been offlined _just_ before
+                * we disabled irqs while entering here.
+                * And rcu subsystem might not yet have handled the CPU_DEAD
+                * notification, leading to the offlined cpu's bit
+                * being set in the rcp->cpumask.
+                *
+                * Hence cpumask = (rcp->cpumask & cpu_online_map) to prevent
+                * sending smp_reschedule() to an offlined CPU.
                  */
-               cpumask = rcp->cpumask;
+               cpus_and(cpumask, rcp->cpumask, cpu_online_map);
                 cpu_clear(rdp->cpu, cpumask);
                 for_each_cpu_mask(cpu, cpumask)
                         smp_send_reschedule(cpu);
@@@ -502,38 -516,10 +516,38 @@@ void rcu_check_callbacks(int cpu, int u
         if (user ||
             (idle_cpu(cpu) && !in_softirq() &&
                                 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+ +
+ +              /*
+ +               * Get here if this CPU took its interrupt from user
+ +               * mode or from the idle loop, and if this is not a
+ +               * nested interrupt.  In this case, the CPU is in
+ +               * a quiescent state, so count it.
+ +               *
+ +               * Also do a memory barrier.  This is needed to handle
+ +               * the case where writes from a preempt-disable section
+ +               * of code get reordered into schedule() by this CPU's
+ +               * write buffer.  The memory barrier makes sure that
+ +               * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
+ +               * by other CPUs to happen after any such write.
+ +               */
+ +
+ +              smp_mb();  /* See above block comment. */
                 rcu_qsctr_inc(cpu);
                 rcu_bh_qsctr_inc(cpu);
- -      } else if (!in_softirq())
+ +
+ +      } else if (!in_softirq()) {
+ +
+ +              /*
+ +               * Get here if this CPU did not take its interrupt from
+ +               * softirq, in other words, if it is not interrupting
+ +               * a rcu_bh read-side critical section.  This is an _bh
+ +               * critical section, so count it.  The memory barrier
+ +               * is needed for the same reason as is the above one.
+ +               */
+ +
+ +              smp_mb();  /* See above block comment. */
                 rcu_bh_qsctr_inc(cpu);
+ +      }
         raise_rcu_softirq();
   }
   
diff --combined kernel/rcupreempt.c

index 396b121edfe5393ae7fa7cbb59fa38405edbc515,41d275a81df512229c424c317ed78d246a01ae3a..536ce83c55fe08915e59d69894c080a751894af9
--- 1/kernel/rcupreempt.c
--- 2/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@@ -46,11 -46,11 +46,11 @@@
   #include <asm/atomic.h>
   #include <linux/bitops.h>
   #include <linux/module.h>
+ +#include <linux/kthread.h>
   #include <linux/completion.h>
   #include <linux/moduleparam.h>
   #include <linux/percpu.h>
   #include <linux/notifier.h>
- -#include <linux/rcupdate.h>
   #include <linux/cpu.h>
   #include <linux/random.h>
   #include <linux/delay.h>
@@@ -82,18 -82,14 +82,18 @@@ struct rcu_data 
         spinlock_t      lock;           /* Protect rcu_data fields. */
         long            completed;      /* Number of last completed batch. */
         int             waitlistcount;
- -      struct tasklet_struct rcu_tasklet;
         struct rcu_head *nextlist;
         struct rcu_head **nexttail;
         struct rcu_head *waitlist[GP_STAGES];
         struct rcu_head **waittail[GP_STAGES];
- -      struct rcu_head *donelist;
+ +      struct rcu_head *donelist;      /* from waitlist & waitschedlist */
         struct rcu_head **donetail;
         long rcu_flipctr[2];
+ +      struct rcu_head *nextschedlist;
+ +      struct rcu_head **nextschedtail;
+ +      struct rcu_head *waitschedlist;
+ +      struct rcu_head **waitschedtail;
+ +      int rcu_sched_sleeping;
   #ifdef CONFIG_RCU_TRACE
         struct rcupreempt_trace trace;
   #endif /* #ifdef CONFIG_RCU_TRACE */
@@@ -135,24 -131,11 +135,24 @@@ enum rcu_try_flip_states 
         rcu_try_flip_waitmb_state,
   };
   
+ +/*
+ + * States for rcu_ctrlblk.rcu_sched_sleep.
+ + */
+ +
+ +enum rcu_sched_sleep_states {
+ +      rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP.  */
+ +      rcu_sched_sleep_prep,   /* Thinking of sleeping, rechecking. */
+ +      rcu_sched_sleeping,     /* Sleeping, awaken if GP needed. */
+ +};
+ +
   struct rcu_ctrlblk {
         spinlock_t      fliplock;       /* Protect state-machine transitions. */
         long            completed;      /* Number of last completed batch. */
         enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
                                                         the rcu state machine */
+ +      spinlock_t      schedlock;      /* Protect rcu_sched sleep state. */
+ +      enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
+ +      wait_queue_head_t sched_wq;     /* Place for rcu_sched to sleep. */
   };
   
   static DEFINE_PER_CPU(struct rcu_data, rcu_data);
@@@ -160,12 -143,8 +160,12 @@@ static struct rcu_ctrlblk rcu_ctrlblk 
         .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
         .completed = 0,
         .rcu_try_flip_state = rcu_try_flip_idle_state,
+ +      .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
+ +      .sched_sleep = rcu_sched_not_sleeping,
+ +      .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
   };
   
+ +static struct task_struct *rcu_sched_grace_period_task;
   
   #ifdef CONFIG_RCU_TRACE
   static char *rcu_try_flip_state_names[] =
@@@ -228,8 -207,6 +228,8 @@@ static DEFINE_PER_CPU_SHARED_ALIGNED(en
    */
   #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
   
+ +#define RCU_SCHED_BATCH_TIME (HZ / 50)
+ +
   /*
    * Return the number of RCU batches processed thus far.  Useful
    * for debug and statistics.
@@@ -434,34 -411,32 +434,34 @@@ static void __rcu_advance_callbacks(str
         }
   }
   
- -#ifdef CONFIG_NO_HZ
+ +DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
+ +      .dynticks = 1,
+ +};
   
- -DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
- -static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
+ +#ifdef CONFIG_NO_HZ
   static DEFINE_PER_CPU(int, rcu_update_flag);
   
   /**
    * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
    *
    * If the CPU was idle with dynamic ticks active, this updates the
- - * dynticks_progress_counter to let the RCU handling know that the
+ + * rcu_dyntick_sched.dynticks to let the RCU handling know that the
    * CPU is active.
    */
   void rcu_irq_enter(void)
   {
         int cpu = smp_processor_id();
+ +      struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
   
         if (per_cpu(rcu_update_flag, cpu))
                 per_cpu(rcu_update_flag, cpu)++;
   
         /*
          * Only update if we are coming from a stopped ticks mode
- -       * (dynticks_progress_counter is even).
+ +       * (rcu_dyntick_sched.dynticks is even).
          */
         if (!in_interrupt() &&
- -          (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
+ +          (rdssp->dynticks & 0x1) == 0) {
                 /*
                  * The following might seem like we could have a race
                  * with NMI/SMIs. But this really isn't a problem.
@@@ -484,12 -459,12 +484,12 @@@
                  * RCU read-side critical sections on this CPU would
                  * have already completed.
                  */
- -              per_cpu(dynticks_progress_counter, cpu)++;
+ +              rdssp->dynticks++;
                 /*
                  * The following memory barrier ensures that any
                  * rcu_read_lock() primitives in the irq handler
                  * are seen by other CPUs to follow the above
- -               * increment to dynticks_progress_counter. This is
+ +               * increment to rcu_dyntick_sched.dynticks. This is
                  * required in order for other CPUs to correctly
                  * determine when it is safe to advance the RCU
                  * grace-period state machine.
@@@ -497,7 -472,7 +497,7 @@@
                 smp_mb(); /* see above block comment. */
                 /*
                  * Since we can't determine the dynamic tick mode from
- -               * the dynticks_progress_counter after this routine,
+ +               * the rcu_dyntick_sched.dynticks after this routine,
                  * we use a second flag to acknowledge that we came
                  * from an idle state with ticks stopped.
                  */
@@@ -505,7 -480,7 +505,7 @@@
                 /*
                  * If we take an NMI/SMI now, they will also increment
                  * the rcu_update_flag, and will not update the
- -               * dynticks_progress_counter on exit. That is for
+ +               * rcu_dyntick_sched.dynticks on exit. That is for
                  * this IRQ to do.
                  */
         }
@@@ -515,13 -490,12 +515,13 @@@
    * rcu_irq_exit - Called from exiting Hard irq context.
    *
    * If the CPU was idle with dynamic ticks active, update the
- - * dynticks_progress_counter to put let the RCU handling be
+ + * rcu_dyntick_sched.dynticks to put let the RCU handling be
    * aware that the CPU is going back to idle with no ticks.
    */
   void rcu_irq_exit(void)
   {
         int cpu = smp_processor_id();
+ +      struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
   
         /*
          * rcu_update_flag is set if we interrupted the CPU
@@@ -529,7 -503,7 +529,7 @@@
          * Once this occurs, we keep track of interrupt nesting
          * because a NMI/SMI could also come in, and we still
          * only want the IRQ that started the increment of the
- -       * dynticks_progress_counter to be the one that modifies
+ +       * rcu_dyntick_sched.dynticks to be the one that modifies
          * it on exit.
          */
         if (per_cpu(rcu_update_flag, cpu)) {
@@@ -541,29 -515,28 +541,29 @@@
   
                 /*
                  * If an NMI/SMI happens now we are still
- -               * protected by the dynticks_progress_counter being odd.
+ +               * protected by the rcu_dyntick_sched.dynticks being odd.
                  */
   
                 /*
                  * The following memory barrier ensures that any
                  * rcu_read_unlock() primitives in the irq handler
                  * are seen by other CPUs to preceed the following
- -               * increment to dynticks_progress_counter. This
+ +               * increment to rcu_dyntick_sched.dynticks. This
                  * is required in order for other CPUs to determine
                  * when it is safe to advance the RCU grace-period
                  * state machine.
                  */
                 smp_mb(); /* see above block comment. */
- -              per_cpu(dynticks_progress_counter, cpu)++;
- -              WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
+ +              rdssp->dynticks++;
+ +              WARN_ON(rdssp->dynticks & 0x1);
         }
   }
   
   static void dyntick_save_progress_counter(int cpu)
   {
- -      per_cpu(rcu_dyntick_snapshot, cpu) =
- -              per_cpu(dynticks_progress_counter, cpu);
+ +      struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+ +
+ +      rdssp->dynticks_snap = rdssp->dynticks;
   }
   
   static inline int
@@@ -571,10 -544,9 +571,10 @@@ rcu_try_flip_waitack_needed(int cpu
   {
         long curr;
         long snap;
+ +      struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
   
- -      curr = per_cpu(dynticks_progress_counter, cpu);
- -      snap = per_cpu(rcu_dyntick_snapshot, cpu);
+ +      curr = rdssp->dynticks;
+ +      snap = rdssp->dynticks_snap;
         smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
   
         /*
@@@ -595,7 -567,7 +595,7 @@@
          * that this CPU already acknowledged the counter.
          */
   
- -      if ((curr - snap) > 2 || (snap & 0x1) == 0)
+ +      if ((curr - snap) > 2 || (curr & 0x1) == 0)
                 return 0;
   
         /* We need this CPU to explicitly acknowledge the counter flip. */
@@@ -608,10 -580,9 +608,10 @@@ rcu_try_flip_waitmb_needed(int cpu
   {
         long curr;
         long snap;
+ +      struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
   
- -      curr = per_cpu(dynticks_progress_counter, cpu);
- -      snap = per_cpu(rcu_dyntick_snapshot, cpu);
+ +      curr = rdssp->dynticks;
+ +      snap = rdssp->dynticks_snap;
         smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
   
         /*
@@@ -638,86 -609,14 +638,86 @@@
         return 1;
   }
   
+ +static void dyntick_save_progress_counter_sched(int cpu)
+ +{
+ +      struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+ +
+ +      rdssp->sched_dynticks_snap = rdssp->dynticks;
+ +}
+ +
+ +static int rcu_qsctr_inc_needed_dyntick(int cpu)
+ +{
+ +      long curr;
+ +      long snap;
+ +      struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+ +
+ +      curr = rdssp->dynticks;
+ +      snap = rdssp->sched_dynticks_snap;
+ +      smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+ +
+ +      /*
+ +       * If the CPU remained in dynticks mode for the entire time
+ +       * and didn't take any interrupts, NMIs, SMIs, or whatever,
+ +       * then it cannot be in the middle of an rcu_read_lock(), so
+ +       * the next rcu_read_lock() it executes must use the new value
+ +       * of the counter.  Therefore, this CPU has been in a quiescent
+ +       * state the entire time, and we don't need to wait for it.
+ +       */
+ +
+ +      if ((curr == snap) && ((curr & 0x1) == 0))
+ +              return 0;
+ +
+ +      /*
+ +       * If the CPU passed through or entered a dynticks idle phase with
+ +       * no active irq handlers, then, as above, this CPU has already
+ +       * passed through a quiescent state.
+ +       */
+ +
+ +      if ((curr - snap) > 2 || (snap & 0x1) == 0)
+ +              return 0;
+ +
+ +      /* We need this CPU to go through a quiescent state. */
+ +
+ +      return 1;
+ +}
+ +
   #else /* !CONFIG_NO_HZ */
   
- -# define dyntick_save_progress_counter(cpu)   do { } while (0)
- -# define rcu_try_flip_waitack_needed(cpu)     (1)
- -# define rcu_try_flip_waitmb_needed(cpu)      (1)
+ +# define dyntick_save_progress_counter(cpu)           do { } while (0)
+ +# define rcu_try_flip_waitack_needed(cpu)             (1)
+ +# define rcu_try_flip_waitmb_needed(cpu)              (1)
+ +
+ +# define dyntick_save_progress_counter_sched(cpu)     do { } while (0)
+ +# define rcu_qsctr_inc_needed_dyntick(cpu)            (1)
   
   #endif /* CONFIG_NO_HZ */
   
+ +static void save_qsctr_sched(int cpu)
+ +{
+ +      struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+ +
+ +      rdssp->sched_qs_snap = rdssp->sched_qs;
+ +}
+ +
+ +static inline int rcu_qsctr_inc_needed(int cpu)
+ +{
+ +      struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+ +
+ +      /*
+ +       * If there has been a quiescent state, no more need to wait
+ +       * on this CPU.
+ +       */
+ +
+ +      if (rdssp->sched_qs != rdssp->sched_qs_snap) {
+ +              smp_mb(); /* force ordering with cpu entering schedule(). */
+ +              return 0;
+ +      }
+ +
+ +      /* We need this CPU to go through a quiescent state. */
+ +
+ +      return 1;
+ +}
+ +
   /*
    * Get here when RCU is idle.  Decide whether we need to
    * move out of idle state, and return non-zero if so.
@@@ -920,26 -819,6 +920,26 @@@ void rcu_check_callbacks(int cpu, int u
         unsigned long flags;
         struct rcu_data *rdp = RCU_DATA_CPU(cpu);
   
+ +      /*
+ +       * If this CPU took its interrupt from user mode or from the
+ +       * idle loop, and this is not a nested interrupt, then
+ +       * this CPU has to have exited all prior preept-disable
+ +       * sections of code.  So increment the counter to note this.
+ +       *
+ +       * The memory barrier is needed to handle the case where
+ +       * writes from a preempt-disable section of code get reordered
+ +       * into schedule() by this CPU's write buffer.  So the memory
+ +       * barrier makes sure that the rcu_qsctr_inc() is seen by other
+ +       * CPUs to happen after any such write.
+ +       */
+ +
+ +      if (user ||
+ +          (idle_cpu(cpu) && !in_softirq() &&
+ +           hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+ +              smp_mb();       /* Guard against aggressive schedule(). */
+ +              rcu_qsctr_inc(cpu);
+ +      }
+ +
         rcu_check_mb(cpu);
         if (rcu_ctrlblk.completed == rdp->completed)
                 rcu_try_flip();
@@@ -990,8 -869,6 +990,8 @@@ void rcu_offline_cpu(int cpu
         struct rcu_head *list = NULL;
         unsigned long flags;
         struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+ +      struct rcu_head *schedlist = NULL;
+ +      struct rcu_head **schedtail = &schedlist;
         struct rcu_head **tail = &list;
   
         /*
@@@ -1005,11 -882,6 +1005,11 @@@
                 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
                                                 list, tail);
         rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
+ +      rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
+ +                              schedlist, schedtail);
+ +      rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
+ +                              schedlist, schedtail);
+ +      rdp->rcu_sched_sleeping = 0;
         spin_unlock_irqrestore(&rdp->lock, flags);
         rdp->waitlistcount = 0;
   
@@@ -1044,54 -916,32 +1044,50 @@@
          * fix.
          */
   
- -      local_irq_save(flags);
+ +      local_irq_save(flags);  /* disable preempt till we know what lock. */
         rdp = RCU_DATA_ME();
         spin_lock(&rdp->lock);
         *rdp->nexttail = list;
         if (list)
                 rdp->nexttail = tail;
+ +      *rdp->nextschedtail = schedlist;
+ +      if (schedlist)
+ +              rdp->nextschedtail = schedtail;
         spin_unlock_irqrestore(&rdp->lock, flags);
   }
   
- void __devinit rcu_online_cpu(int cpu)
+ #else /* #ifdef CONFIG_HOTPLUG_CPU */
+ 
+ void rcu_offline_cpu(int cpu)
+ {
+ }
+ 
+ #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+ 
+ void __cpuinit rcu_online_cpu(int cpu)
   {
         unsigned long flags;
+ +      struct rcu_data *rdp;
   
         spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
         cpu_set(cpu, rcu_cpu_online_map);
         spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
+ +
+ +      /*
+ +       * The rcu_sched grace-period processing might have bypassed
+ +       * this CPU, given that it was not in the rcu_cpu_online_map
+ +       * when the grace-period scan started.  This means that the
+ +       * grace-period task might sleep.  So make sure that if this
+ +       * should happen, the first callback posted to this CPU will
+ +       * wake up the grace-period task if need be.
+ +       */
+ +
+ +      rdp = RCU_DATA_CPU(cpu);
+ +      spin_lock_irqsave(&rdp->lock, flags);
+ +      rdp->rcu_sched_sleeping = 1;
+ +      spin_unlock_irqrestore(&rdp->lock, flags);
   }
   
- #else /* #ifdef CONFIG_HOTPLUG_CPU */
- 
- void rcu_offline_cpu(int cpu)
- {
- }
- 
- void __devinit rcu_online_cpu(int cpu)
- {
- }
- 
- #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
- 
   static void rcu_process_callbacks(struct softirq_action *unused)
   {
         unsigned long flags;
@@@ -1132,196 -982,31 +1128,196 @@@ void call_rcu(struct rcu_head *head, vo
         *rdp->nexttail = head;
         rdp->nexttail = &head->next;
         RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
- -      spin_unlock(&rdp->lock);
- -      local_irq_restore(flags);
+ +      spin_unlock_irqrestore(&rdp->lock, flags);
   }
   EXPORT_SYMBOL_GPL(call_rcu);
   
+ +void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+ +{
+ +      unsigned long flags;
+ +      struct rcu_data *rdp;
+ +      int wake_gp = 0;
+ +
+ +      head->func = func;
+ +      head->next = NULL;
+ +      local_irq_save(flags);
+ +      rdp = RCU_DATA_ME();
+ +      spin_lock(&rdp->lock);
+ +      *rdp->nextschedtail = head;
+ +      rdp->nextschedtail = &head->next;
+ +      if (rdp->rcu_sched_sleeping) {
+ +
+ +              /* Grace-period processing might be sleeping... */
+ +
+ +              rdp->rcu_sched_sleeping = 0;
+ +              wake_gp = 1;
+ +      }
+ +      spin_unlock_irqrestore(&rdp->lock, flags);
+ +      if (wake_gp) {
+ +
+ +              /* Wake up grace-period processing, unless someone beat us. */
+ +
+ +              spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
+ +              if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
+ +                      wake_gp = 0;
+ +              rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
+ +              spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+ +              if (wake_gp)
+ +                      wake_up_interruptible(&rcu_ctrlblk.sched_wq);
+ +      }
+ +}
+ +EXPORT_SYMBOL_GPL(call_rcu_sched);
+ +
   /*
    * Wait until all currently running preempt_disable() code segments
    * (including hardware-irq-disable segments) complete.  Note that
    * in -rt this does -not- necessarily result in all currently executing
    * interrupt -handlers- having completed.
    */
- -void __synchronize_sched(void)
+ +synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
+ +EXPORT_SYMBOL_GPL(__synchronize_sched);
+ +
+ +/*
+ + * kthread function that manages call_rcu_sched grace periods.
+ + */
+ +static int rcu_sched_grace_period(void *arg)
   {
- -      cpumask_t oldmask;
+ +      int couldsleep;         /* might sleep after current pass. */
+ +      int couldsleepnext = 0; /* might sleep after next pass. */
         int cpu;
+ +      unsigned long flags;
+ +      struct rcu_data *rdp;
+ +      int ret;
   
- -      if (sched_getaffinity(0, &oldmask) < 0)
- -              oldmask = cpu_possible_map;
- -      for_each_online_cpu(cpu) {
- -              sched_setaffinity(0, &cpumask_of_cpu(cpu));
- -              schedule();
- -      }
- -      sched_setaffinity(0, &oldmask);
+ +      /*
+ +       * Each pass through the following loop handles one
+ +       * rcu_sched grace period cycle.
+ +       */
+ +      do {
+ +              /* Save each CPU's current state. */
+ +
+ +              for_each_online_cpu(cpu) {
+ +                      dyntick_save_progress_counter_sched(cpu);
+ +                      save_qsctr_sched(cpu);
+ +              }
+ +
+ +              /*
+ +               * Sleep for about an RCU grace-period's worth to
+ +               * allow better batching and to consume less CPU.
+ +               */
+ +              schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
+ +
+ +              /*
+ +               * If there was nothing to do last time, prepare to
+ +               * sleep at the end of the current grace period cycle.
+ +               */
+ +              couldsleep = couldsleepnext;
+ +              couldsleepnext = 1;
+ +              if (couldsleep) {
+ +                      spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
+ +                      rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
+ +                      spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+ +              }
+ +
+ +              /*
+ +               * Wait on each CPU in turn to have either visited
+ +               * a quiescent state or been in dynticks-idle mode.
+ +               */
+ +              for_each_online_cpu(cpu) {
+ +                      while (rcu_qsctr_inc_needed(cpu) &&
+ +                             rcu_qsctr_inc_needed_dyntick(cpu)) {
+ +                              /* resched_cpu(cpu); @@@ */
+ +                              schedule_timeout_interruptible(1);
+ +                      }
+ +              }
+ +
+ +              /* Advance callbacks for each CPU.  */
+ +
+ +              for_each_online_cpu(cpu) {
+ +
+ +                      rdp = RCU_DATA_CPU(cpu);
+ +                      spin_lock_irqsave(&rdp->lock, flags);
+ +
+ +                      /*
+ +                       * We are running on this CPU irq-disabled, so no
+ +                       * CPU can go offline until we re-enable irqs.
+ +                       * The current CPU might have already gone
+ +                       * offline (between the for_each_offline_cpu and
+ +                       * the spin_lock_irqsave), but in that case all its
+ +                       * callback lists will be empty, so no harm done.
+ +                       *
+ +                       * Advance the callbacks!  We share normal RCU's
+ +                       * donelist, since callbacks are invoked the
+ +                       * same way in either case.
+ +                       */
+ +                      if (rdp->waitschedlist != NULL) {
+ +                              *rdp->donetail = rdp->waitschedlist;
+ +                              rdp->donetail = rdp->waitschedtail;
+ +
+ +                              /*
+ +                               * Next rcu_check_callbacks() will
+ +                               * do the required raise_softirq().
+ +                               */
+ +                      }
+ +                      if (rdp->nextschedlist != NULL) {
+ +                              rdp->waitschedlist = rdp->nextschedlist;
+ +                              rdp->waitschedtail = rdp->nextschedtail;
+ +                              couldsleep = 0;
+ +                              couldsleepnext = 0;
+ +                      } else {
+ +                              rdp->waitschedlist = NULL;
+ +                              rdp->waitschedtail = &rdp->waitschedlist;
+ +                      }
+ +                      rdp->nextschedlist = NULL;
+ +                      rdp->nextschedtail = &rdp->nextschedlist;
+ +
+ +                      /* Mark sleep intention. */
+ +
+ +                      rdp->rcu_sched_sleeping = couldsleep;
+ +
+ +                      spin_unlock_irqrestore(&rdp->lock, flags);
+ +              }
+ +
+ +              /* If we saw callbacks on the last scan, go deal with them. */
+ +
+ +              if (!couldsleep)
+ +                      continue;
+ +
+ +              /* Attempt to block... */
+ +
+ +              spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
+ +              if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
+ +
+ +                      /*
+ +                       * Someone posted a callback after we scanned.
+ +                       * Go take care of it.
+ +                       */
+ +                      spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+ +                      couldsleepnext = 0;
+ +                      continue;
+ +              }
+ +
+ +              /* Block until the next person posts a callback. */
+ +
+ +              rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
+ +              spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
+ +              ret = 0;
+ +              __wait_event_interruptible(rcu_ctrlblk.sched_wq,
+ +                      rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
+ +                      ret);
+ +
+ +              /*
+ +               * Signals would prevent us from sleeping, and we cannot
+ +               * do much with them in any case.  So flush them.
+ +               */
+ +              if (ret)
+ +                      flush_signals(current);
+ +              couldsleepnext = 0;
+ +
+ +      } while (!kthread_should_stop());
+ +
+ +      return (0);
   }
- -EXPORT_SYMBOL_GPL(__synchronize_sched);
   
   /*
    * Check to see if any future RCU-related work will need to be done
@@@ -1338,9 -1023,7 +1334,9 @@@ int rcu_needs_cpu(int cpu
   
         return (rdp->donelist != NULL ||
                 !!rdp->waitlistcount ||
- -              rdp->nextlist != NULL);
+ +              rdp->nextlist != NULL ||
+ +              rdp->nextschedlist != NULL ||
+ +              rdp->waitschedlist != NULL);
   }
   
   int rcu_pending(int cpu)
@@@ -1351,9 -1034,7 +1347,9 @@@
   
         if (rdp->donelist != NULL ||
             !!rdp->waitlistcount ||
- -          rdp->nextlist != NULL)
+ +          rdp->nextlist != NULL ||
+ +          rdp->nextschedlist != NULL ||
+ +          rdp->waitschedlist != NULL)
                 return 1;
   
         /* The RCU core needs an acknowledgement from this CPU. */
@@@ -1420,11 -1101,6 +1416,11 @@@ void __init __rcu_init(void
                 rdp->donetail = &rdp->donelist;
                 rdp->rcu_flipctr[0] = 0;
                 rdp->rcu_flipctr[1] = 0;
+ +              rdp->nextschedlist = NULL;
+ +              rdp->nextschedtail = &rdp->nextschedlist;
+ +              rdp->waitschedlist = NULL;
+ +              rdp->waitschedtail = &rdp->waitschedlist;
+ +              rdp->rcu_sched_sleeping = 0;
         }
         register_cpu_notifier(&rcu_nb);
   
@@@ -1447,15 -1123,11 +1443,15 @@@
   }
   
   /*
- - * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
+ + * Late-boot-time RCU initialization that must wait until after scheduler
+ + * has been initialized.
    */
- -void synchronize_kernel(void)
+ +void __init rcu_init_sched(void)
   {
- -      synchronize_rcu();
+ +      rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
+ +                                                NULL,
+ +                                                "rcu_sched_grace_period");
+ +      WARN_ON(IS_ERR(rcu_sched_grace_period_task));
   }
   
   #ifdef CONFIG_RCU_TRACE
author	Ingo Molnar <mingo@elte.hu>
	Fri, 11 Jul 2008 08:46:50 +0000 (10:46 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 11 Jul 2008 08:46:50 +0000 (10:46 +0200)
		1	2
include/linux/dcache.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rcuclassic.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rcupreempt.c	patch \|	diff1 \|	diff2 \|	blob \| history