OMAP: dmtimer: enable all timers to be wakeup events

[linux-2.6-omap-h63xx.git] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 5f21658b0f674f5fb533ecb523a961d8a32d9c30..196d48babbef87c088214e4f252b4397d2bf25be 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3190,7 +3190,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
         return 0;
  }
  /********** Helpers for find_busiest_group ************************/
-/**
+/*
   * sd_lb_stats - Structure to store the statistics of a sched_domain
   *             during load balancing.
   */
@@ -3222,7 +3222,7 @@ struct sd_lb_stats {
  #endif
  };
  
-/**
+/*
   * sg_lb_stats - stats of a sched_group required for load_balancing
   */
  struct sg_lb_stats {
@@ -3360,16 +3360,17 @@ static inline void update_sd_power_savings_stats(struct sched_group *group,
  }
  
  /**
- * check_power_save_busiest_group - Check if we have potential to perform
- *     some power-savings balance. If yes, set the busiest group to be
- *     the least loaded group in the sched_domain, so that it's CPUs can
- *     be put to idle.
- *
+ * check_power_save_busiest_group - see if there is potential for some power-savings balance
   * @sds: Variable containing the statistics of the sched_domain
   *     under consideration.
   * @this_cpu: Cpu at which we're currently performing load-balancing.
   * @imbalance: Variable to store the imbalance.
   *
+ * Description:
+ * Check if we have potential to perform some power-savings balance.
+ * If yes, set the busiest group to be the least loaded group in the
+ * sched_domain, so that it's CPUs can be put to idle.
+ *
   * Returns 1 if there is potential to perform power-savings balance.
   * Else returns 0.
   */
@@ -3676,10 +3677,30 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
  }
  /******* find_busiest_group() helpers end here *********************/
  
-/*
- * find_busiest_group finds and returns the busiest CPU group within the
- * domain. It calculates and returns the amount of weighted load which
- * should be moved to restore balance via the imbalance parameter.
+/**
+ * find_busiest_group - Returns the busiest group within the sched_domain
+ * if there is an imbalance. If there isn't an imbalance, and
+ * the user has opted for power-savings, it returns a group whose
+ * CPUs can be put to idle by rebalancing those tasks elsewhere, if
+ * such a group exists.
+ *
+ * Also calculates the amount of weighted load which should be moved
+ * to restore balance.
+ *
+ * @sd: The sched_domain whose busiest group is to be returned.
+ * @this_cpu: The cpu for which load balancing is currently being performed.
+ * @imbalance: Variable which stores amount of weighted load which should
+ *             be moved to restore balance/put a group to idle.
+ * @idle: The idle status of this_cpu.
+ * @sd_idle: The idleness of sd
+ * @cpus: The set of CPUs under consideration for load-balancing.
+ * @balance: Pointer to a variable indicating if this_cpu
+ *     is the appropriate cpu to perform load balancing at this_level.
+ *
+ * Returns:    - the busiest group if imbalance exists.
+ *             - If no imbalance and user has opted for power-savings balance,
+ *                return the least loaded group whose CPUs can be
+ *                put to idle by rebalancing its tasks onto our group.
   */
  static struct sched_group *
  find_busiest_group(struct sched_domain *sd, int this_cpu,
@@ -3697,17 +3718,31 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
         update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus,
                                         balance, &sds);
  
+       /* Cases where imbalance does not exist from POV of this_cpu */
+       /* 1) this_cpu is not the appropriate cpu to perform load balancing
+        *    at this level.
+        * 2) There is no busy sibling group to pull from.
+        * 3) This group is the busiest group.
+        * 4) This group is more busy than the avg busieness at this
+        *    sched_domain.
+        * 5) The imbalance is within the specified limit.
+        * 6) Any rebalance would lead to ping-pong
+        */
         if (balance && !(*balance))
                 goto ret;
  
-       if (!sds.busiest || sds.this_load >= sds.max_load
-               || sds.busiest_nr_running == 0)
+       if (!sds.busiest || sds.busiest_nr_running == 0)
+               goto out_balanced;
+
+       if (sds.this_load >= sds.max_load)
                 goto out_balanced;
  
         sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
  
-       if (sds.this_load >= sds.avg_load ||
-                       100*sds.max_load <= sd->imbalance_pct * sds.this_load)
+       if (sds.this_load >= sds.avg_load)
+               goto out_balanced;
+
+       if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
                 goto out_balanced;
  
         sds.busiest_load_per_task /= sds.busiest_nr_running;
@@ -4907,15 +4942,13 @@ pick_next_task(struct rq *rq)
  /*
   * schedule() is the main scheduler function.
   */
-asmlinkage void __sched schedule(void)
+asmlinkage void __sched __schedule(void)
  {
         struct task_struct *prev, *next;
         unsigned long *switch_count;
         struct rq *rq;
         int cpu;
  
-need_resched:
-       preempt_disable();
         cpu = smp_processor_id();
         rq = cpu_rq(cpu);
         rcu_qsctr_inc(cpu);
@@ -4972,13 +5005,80 @@ need_resched_nonpreemptible:
  
         if (unlikely(reacquire_kernel_lock(current) < 0))
                 goto need_resched_nonpreemptible;
+}
  
+asmlinkage void __sched schedule(void)
+{
+need_resched:
+       preempt_disable();
+       __schedule();
         preempt_enable_no_resched();
         if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
                 goto need_resched;
  }
  EXPORT_SYMBOL(schedule);
  
+#ifdef CONFIG_SMP
+/*
+ * Look out! "owner" is an entirely speculative pointer
+ * access and not reliable.
+ */
+int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
+{
+       unsigned int cpu;
+       struct rq *rq;
+
+       if (!sched_feat(OWNER_SPIN))
+               return 0;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       /*
+        * Need to access the cpu field knowing that
+        * DEBUG_PAGEALLOC could have unmapped it if
+        * the mutex owner just released it and exited.
+        */
+       if (probe_kernel_address(&owner->cpu, cpu))
+               goto out;
+#else
+       cpu = owner->cpu;
+#endif
+
+       /*
+        * Even if the access succeeded (likely case),
+        * the cpu field may no longer be valid.
+        */
+       if (cpu >= nr_cpumask_bits)
+               goto out;
+
+       /*
+        * We need to validate that we can do a
+        * get_cpu() and that we have the percpu area.
+        */
+       if (!cpu_online(cpu))
+               goto out;
+
+       rq = cpu_rq(cpu);
+
+       for (;;) {
+               /*
+                * Owner changed, break to re-assess state.
+                */
+               if (lock->owner != owner)
+                       break;
+
+               /*
+                * Is that owner really running on that cpu?
+                */
+               if (task_thread_info(rq->curr) != owner || need_resched())
+                       return 0;
+
+               cpu_relax();
+       }
+out:
+       return 1;
+}
+#endif
+
  #ifdef CONFIG_PREEMPT
  /*
   * this is the entry point to schedule() from in-kernel preemption
@@ -6308,12 +6408,7 @@ void sched_show_task(struct task_struct *p)
                 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
  #endif
  #ifdef CONFIG_DEBUG_STACK_USAGE
-       {
-               unsigned long *n = end_of_stack(p);
-               while (!*n)
-                       n++;
-               free = (unsigned long)n - (unsigned long)end_of_stack(p);
-       }
+       free = stack_not_used(p);
  #endif
         printk(KERN_CONT "%5lu %5d %6d\n", free,
                 task_pid_nr(p), task_pid_nr(p->real_parent));
@@ -9858,7 +9953,7 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
  
  static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
  {
-       u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+       u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
         u64 data;
  
  #ifndef CONFIG_64BIT
@@ -9877,7 +9972,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
  
  static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
  {
-       u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+       u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
  
  #ifndef CONFIG_64BIT
         /*
@@ -9973,7 +10068,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
         ca = task_ca(tsk);
  
         for (; ca; ca = ca->parent) {
-               u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+               u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
                 *cpuusage += cputime;
         }
  }