Merge commit 'v2.6.29-rc1' into timers/hrtimers

author Ingo Molnar <mingo@elte.hu>

Mon, 12 Jan 2009 10:32:03 +0000 (11:32 +0100)

committer Ingo Molnar <mingo@elte.hu>

Mon, 12 Jan 2009 10:32:03 +0000 (11:32 +0100)
author Ingo Molnar <mingo@elte.hu>
Mon, 12 Jan 2009 10:32:03 +0000 (11:32 +0100)
committer Ingo Molnar <mingo@elte.hu>
Mon, 12 Jan 2009 10:32:03 +0000 (11:32 +0100)
diff --combined include/linux/clockchips.h

index c6de413c5dd147f79f7479d4bd1fc6b28f588d01,cea153697ec788a3c3d61c2402a3f1188b5b06c7..3a1dbba4d3ae2da500e710387d130cd8ad5dd4c2
--- 1/include/linux/clockchips.h
--- 2/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@@ -36,7 -36,6 +36,7 @@@ enum clock_event_nofitiers 
         CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
         CLOCK_EVT_NOTIFY_SUSPEND,
         CLOCK_EVT_NOTIFY_RESUME,
+ +      CLOCK_EVT_NOTIFY_CPU_DYING,
         CLOCK_EVT_NOTIFY_CPU_DEAD,
   };
   
@@@ -83,13 -82,13 +83,13 @@@ struct clock_event_device 
         int                     shift;
         int                     rating;
         int                     irq;
-       cpumask_t               cpumask;
+       const struct cpumask    *cpumask;
         int                     (*set_next_event)(unsigned long evt,
                                                   struct clock_event_device *);
         void                    (*set_mode)(enum clock_event_mode mode,
                                             struct clock_event_device *);
         void                    (*event_handler)(struct clock_event_device *);
-       void                    (*broadcast)(cpumask_t mask);
+       void                    (*broadcast)(const struct cpumask *mask);
         struct list_head        list;
         enum clock_event_mode   mode;
         ktime_t                 next_event;
diff --combined kernel/hrtimer.c

index 61cb933395baabe54ae8d9c61ab100d8f855253d,1455b7651b6b27f9343809260976a1df3e4678e6..77aa33bb877ce79756aeab18c7c1b3717ff2333d
--- 1/kernel/hrtimer.c
--- 2/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@@ -32,7 -32,6 +32,6 @@@
    */
   
   #include <linux/cpu.h>
- #include <linux/irq.h>
   #include <linux/module.h>
   #include <linux/percpu.h>
   #include <linux/hrtimer.h>
@@@ -635,7 -634,6 +634,6 @@@ static inline void hrtimer_init_timer_h
   {
   }
   
- static void __run_hrtimer(struct hrtimer *timer);
   
   /*
    * When High resolution timers are active, try to reprogram. Note, that in case
@@@ -647,13 -645,9 +645,9 @@@ static inline int hrtimer_enqueue_repro
                                             struct hrtimer_clock_base *base)
   {
         if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
-               /*
-                * XXX: recursion check?
-                * hrtimer_forward() should round up with timer granularity
-                * so that we never get into inf recursion here,
-                * it doesn't do that though
-                */
-               __run_hrtimer(timer);
+               spin_unlock(&base->cpu_base->lock);
+               raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+               spin_lock(&base->cpu_base->lock);
                 return 1;
         }
         return 0;
@@@ -706,11 -700,6 +700,6 @@@ static inline int hrtimer_enqueue_repro
   }
   static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
   static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
- static inline int hrtimer_reprogram(struct hrtimer *timer,
-                                   struct hrtimer_clock_base *base)
- {
-       return 0;
- }
   
   #endif /* CONFIG_HIGH_RES_TIMERS */
   
@@@ -781,9 -770,11 +770,11 @@@ EXPORT_SYMBOL_GPL(hrtimer_forward)
    *
    * The timer is inserted in expiry order. Insertion into the
    * red black tree is O(log(n)). Must hold the base lock.
+  *
+  * Returns 1 when the new timer is the leftmost timer in the tree.
    */
- static void enqueue_hrtimer(struct hrtimer *timer,
-                           struct hrtimer_clock_base *base, int reprogram)
+ static int enqueue_hrtimer(struct hrtimer *timer,
+                          struct hrtimer_clock_base *base)
   {
         struct rb_node **link = &base->active.rb_node;
         struct rb_node *parent = NULL;
@@@ -815,20 -806,8 +806,8 @@@
          * Insert the timer to the rbtree and check whether it
          * replaces the first pending timer
          */
-       if (leftmost) {
-               /*
-                * Reprogram the clock event device. When the timer is already
-                * expired hrtimer_enqueue_reprogram has either called the
-                * callback or added it to the pending list and raised the
-                * softirq.
-                *
-                * This is a NOP for !HIGHRES
-                */
-               if (reprogram && hrtimer_enqueue_reprogram(timer, base))
-                       return;
- 
+       if (leftmost)
                 base->first = &timer->node;
-       }
   
         rb_link_node(&timer->node, parent, link);
         rb_insert_color(&timer->node, &base->active);
@@@ -837,6 -816,8 +816,8 @@@
          * state of a possibly running callback.
          */
         timer->state |= HRTIMER_STATE_ENQUEUED;
+ 
+       return leftmost;
   }
   
   /*
@@@ -913,7 -894,7 +894,7 @@@ hrtimer_start_range_ns(struct hrtimer *
   {
         struct hrtimer_clock_base *base, *new_base;
         unsigned long flags;
-       int ret;
+       int ret, leftmost;
   
         base = lock_hrtimer_base(timer, &flags);
   
@@@ -941,12 -922,16 +922,16 @@@
   
         timer_stats_hrtimer_set_start_info(timer);
   
+       leftmost = enqueue_hrtimer(timer, new_base);
+ 
         /*
          * Only allow reprogramming if the new base is on this CPU.
          * (it might still be on another CPU if the timer was pending)
+        *
+        * XXX send_remote_softirq() ?
          */
-       enqueue_hrtimer(timer, new_base,
-                       new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
+       if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases))
+               hrtimer_enqueue_reprogram(timer, new_base);
   
         unlock_hrtimer_base(timer, &flags);
   
@@@ -1158,42 -1143,19 +1143,42 @@@ static void __run_hrtimer(struct hrtime
         spin_lock(&cpu_base->lock);
   
         /*
-        * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
-        * reprogramming of the event hardware. This happens at the end of this
-        * function anyway.
+        * Note: We clear the CALLBACK bit after enqueue_hrtimer and
+        * we do not reprogramm the event hardware. Happens either in
+        * hrtimer_start_range_ns() or in hrtimer_interrupt()
          */
         if (restart != HRTIMER_NORESTART) {
                 BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
-               enqueue_hrtimer(timer, base, 0);
+               enqueue_hrtimer(timer, base);
         }
         timer->state &= ~HRTIMER_STATE_CALLBACK;
   }
   
   #ifdef CONFIG_HIGH_RES_TIMERS
   
+ +static int force_clock_reprogram;
+ +
+ +/*
+ + * After 5 iteration's attempts, we consider that hrtimer_interrupt()
+ + * is hanging, which could happen with something that slows the interrupt
+ + * such as the tracing. Then we force the clock reprogramming for each future
+ + * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
+ + * threshold that we will overwrite.
+ + * The next tick event will be scheduled to 3 times we currently spend on
+ + * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
+ + * 1/4 of their time to process the hrtimer interrupts. This is enough to
+ + * let it running without serious starvation.
+ + */
+ +
+ +static inline void
+ +hrtimer_interrupt_hanging(struct clock_event_device *dev,
+ +                      ktime_t try_time)
+ +{
+ +      force_clock_reprogram = 1;
+ +      dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
+ +      printk(KERN_WARNING "hrtimer: interrupt too slow, "
+ +              "forcing clock min delta to %lu ns\n", dev->min_delta_ns);
+ +}
   /*
    * High resolution timer interrupt
    * Called with interrupts disabled
@@@ -1203,7 -1165,6 +1188,7 @@@ void hrtimer_interrupt(struct clock_eve
         struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
         struct hrtimer_clock_base *base;
         ktime_t expires_next, now;
+ +      int nr_retries = 0;
         int i;
   
         BUG_ON(!cpu_base->hres_active);
@@@ -1211,10 -1172,6 +1196,10 @@@
         dev->next_event.tv64 = KTIME_MAX;
   
    retry:
+ +      /* 5 retries is enough to notice a hang */
+ +      if (!(++nr_retries % 5))
+ +              hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
+ +
         now = ktime_get();
   
         expires_next.tv64 = KTIME_MAX;
@@@ -1267,11 -1224,27 +1252,27 @@@
   
         /* Reprogramming necessary ? */
         if (expires_next.tv64 != KTIME_MAX) {
- -              if (tick_program_event(expires_next, 0))
+ +              if (tick_program_event(expires_next, force_clock_reprogram))
                         goto retry;
         }
   }
   
+ /*
+  * local version of hrtimer_peek_ahead_timers() called with interrupts
+  * disabled.
+  */
+ static void __hrtimer_peek_ahead_timers(void)
+ {
+       struct tick_device *td;
+ 
+       if (!hrtimer_hres_active())
+               return;
+ 
+       td = &__get_cpu_var(tick_cpu_device);
+       if (td && td->evtdev)
+               hrtimer_interrupt(td->evtdev);
+ }
+ 
   /**
    * hrtimer_peek_ahead_timers -- run soft-expired timers now
    *
@@@ -1283,20 -1256,23 +1284,23 @@@
    */
   void hrtimer_peek_ahead_timers(void)
   {
-       struct tick_device *td;
         unsigned long flags;
   
-       if (!hrtimer_hres_active())
-               return;
- 
         local_irq_save(flags);
-       td = &__get_cpu_var(tick_cpu_device);
-       if (td && td->evtdev)
-               hrtimer_interrupt(td->evtdev);
+       __hrtimer_peek_ahead_timers();
         local_irq_restore(flags);
   }
   
- #endif        /* CONFIG_HIGH_RES_TIMERS */
+ static void run_hrtimer_softirq(struct softirq_action *h)
+ {
+       hrtimer_peek_ahead_timers();
+ }
+ 
+ #else /* CONFIG_HIGH_RES_TIMERS */
+ 
+ static inline void __hrtimer_peek_ahead_timers(void) { }
+ 
+ #endif        /* !CONFIG_HIGH_RES_TIMERS */
   
   /*
    * Called from timer softirq every jiffy, expire hrtimers:
@@@ -1542,39 -1518,36 +1546,36 @@@ static void migrate_hrtimer_list(struc
                 __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
                 timer->base = new_base;
                 /*
-                * Enqueue the timers on the new cpu, but do not reprogram 
-                * the timer as that would enable a deadlock between
-                * hrtimer_enqueue_reprogramm() running the timer and us still
-                * holding a nested base lock.
-                *
-                * Instead we tickle the hrtimer interrupt after the migration
-                * is done, which will run all expired timers and re-programm
-                * the timer device.
+                * Enqueue the timers on the new cpu. This does not
+                * reprogram the event device in case the timer
+                * expires before the earliest on this CPU, but we run
+                * hrtimer_interrupt after we migrated everything to
+                * sort out already expired timers and reprogram the
+                * event device.
                  */
-               enqueue_hrtimer(timer, new_base, 0);
+               enqueue_hrtimer(timer, new_base);
   
                 /* Clear the migration state bit */
                 timer->state &= ~HRTIMER_STATE_MIGRATE;
         }
   }
   
- static int migrate_hrtimers(int scpu)
+ static void migrate_hrtimers(int scpu)
   {
         struct hrtimer_cpu_base *old_base, *new_base;
-       int dcpu, i;
+       int i;
   
         BUG_ON(cpu_online(scpu));
-       old_base = &per_cpu(hrtimer_bases, scpu);
-       new_base = &get_cpu_var(hrtimer_bases);
- 
-       dcpu = smp_processor_id();
- 
         tick_cancel_sched_timer(scpu);
+ 
+       local_irq_disable();
+       old_base = &per_cpu(hrtimer_bases, scpu);
+       new_base = &__get_cpu_var(hrtimer_bases);
         /*
          * The caller is globally serialized and nobody else
          * takes two locks at once, deadlock is not possible.
          */
-       spin_lock_irq(&new_base->lock);
+       spin_lock(&new_base->lock);
         spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
   
         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
@@@ -1583,15 -1556,11 +1584,11 @@@
         }
   
         spin_unlock(&old_base->lock);
-       spin_unlock_irq(&new_base->lock);
-       put_cpu_var(hrtimer_bases);
+       spin_unlock(&new_base->lock);
   
-       return dcpu;
- }
- 
- static void tickle_timers(void *arg)
- {
-       hrtimer_peek_ahead_timers();
+       /* Check, if we got expired work to do */
+       __hrtimer_peek_ahead_timers();
+       local_irq_enable();
   }
   
   #endif /* CONFIG_HOTPLUG_CPU */
@@@ -1609,18 -1578,11 +1606,15 @@@ static int __cpuinit hrtimer_cpu_notify
                 break;
   
   #ifdef CONFIG_HOTPLUG_CPU
+ +      case CPU_DYING:
+ +      case CPU_DYING_FROZEN:
+ +              clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
+ +              break;
         case CPU_DEAD:
         case CPU_DEAD_FROZEN:
         {
-               int dcpu;
- 
                 clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
-               dcpu = migrate_hrtimers(scpu);
-               smp_call_function_single(dcpu, tickle_timers, NULL, 0);
+               migrate_hrtimers(scpu);
                 break;
         }
   #endif
@@@ -1641,6 -1603,9 +1635,9 @@@ void __init hrtimers_init(void
         hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
                           (void *)(long)smp_processor_id());
         register_cpu_notifier(&hrtimers_nb);
+ #ifdef CONFIG_HIGH_RES_TIMERS
+       open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
+ #endif
   }
   
   /**
diff --combined kernel/time/tick-common.c

index 457d281258ee9d683c0d91dbf60f9811aad0290a,63e05d423a09903907558d7d2aba661f5b426db6..21a5ca849514b40b689c5b34e2c9caaf9cbf051b
--- 1/kernel/time/tick-common.c
--- 2/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@@ -136,7 -136,7 +136,7 @@@ void tick_setup_periodic(struct clock_e
    */
   static void tick_setup_device(struct tick_device *td,
                               struct clock_event_device *newdev, int cpu,
-                             const cpumask_t *cpumask)
+                             const struct cpumask *cpumask)
   {
         ktime_t next_event;
         void (*handler)(struct clock_event_device *) = NULL;
@@@ -171,8 -171,8 +171,8 @@@
          * When the device is not per cpu, pin the interrupt to the
          * current cpu:
          */
-       if (!cpus_equal(newdev->cpumask, *cpumask))
-               irq_set_affinity(newdev->irq, *cpumask);
+       if (!cpumask_equal(newdev->cpumask, cpumask))
+               irq_set_affinity(newdev->irq, cpumask);
   
         /*
          * When global broadcasting is active, check if the current
@@@ -202,14 -202,14 +202,14 @@@ static int tick_check_new_device(struc
         spin_lock_irqsave(&tick_device_lock, flags);
   
         cpu = smp_processor_id();
-       if (!cpu_isset(cpu, newdev->cpumask))
+       if (!cpumask_test_cpu(cpu, newdev->cpumask))
                 goto out_bc;
   
         td = &per_cpu(tick_cpu_device, cpu);
         curdev = td->evtdev;
   
         /* cpu local device ? */
-       if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) {
+       if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
   
                 /*
                  * If the cpu affinity of the device interrupt can not
@@@ -222,7 -222,7 +222,7 @@@
                  * If we have a cpu local device already, do not replace it
                  * by a non cpu local device
                  */
-               if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu)))
+               if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
                         goto out_bc;
         }
   
@@@ -254,7 -254,7 +254,7 @@@
                 curdev = NULL;
         }
         clockevents_exchange_device(curdev, newdev);
-       tick_setup_device(td, newdev, cpu, &cpumask_of_cpu(cpu));
+       tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
         if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
                 tick_oneshot_notify();
   
@@@ -273,21 -273,6 +273,21 @@@ out_bc
         return ret;
   }
   
-               int cpu = first_cpu(cpu_online_map);
+ +/*
+ + * Transfer the do_timer job away from a dying cpu.
+ + *
+ + * Called with interrupts disabled.
+ + */
+ +static void tick_handover_do_timer(int *cpup)
+ +{
+ +      if (*cpup == tick_do_timer_cpu) {
-               tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu :
++              int cpu = cpumask_first(cpu_online_mask);
+ +
++              tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
+ +                      TICK_DO_TIMER_NONE;
+ +      }
+ +}
+ +
   /*
    * Shutdown an event device on a given cpu:
    *
@@@ -312,6 -297,13 +312,6 @@@ static void tick_shutdown(unsigned int 
                 clockevents_exchange_device(dev, NULL);
                 td->evtdev = NULL;
         }
- -      /* Transfer the do_timer job away from this cpu */
- -      if (*cpup == tick_do_timer_cpu) {
- -              int cpu = cpumask_first(cpu_online_mask);
- -
- -              tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
- -                      TICK_DO_TIMER_NONE;
- -      }
         spin_unlock_irqrestore(&tick_device_lock, flags);
   }
   
@@@ -365,10 -357,6 +365,10 @@@ static int tick_notify(struct notifier_
                 tick_broadcast_oneshot_control(reason);
                 break;
   
+ +      case CLOCK_EVT_NOTIFY_CPU_DYING:
+ +              tick_handover_do_timer(dev);
+ +              break;
+ +
         case CLOCK_EVT_NOTIFY_CPU_DEAD:
                 tick_shutdown_broadcast_oneshot(dev);
                 tick_shutdown_broadcast(dev);
author	Ingo Molnar <mingo@elte.hu>
	Mon, 12 Jan 2009 10:32:03 +0000 (11:32 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Mon, 12 Jan 2009 10:32:03 +0000 (11:32 +0100)
		1	2
include/linux/clockchips.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/hrtimer.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/tick-common.c	patch \|	diff1 \|	diff2 \|	blob \| history