]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - kernel/sched.c
Merge branch 'sched/latest' of git://git.kernel.org/pub/scm/linux/kernel/git/ghaskins...
[linux-2.6-omap-h63xx.git] / kernel / sched.c
index deb5ac8c12f37c44e71dcc46484149d073430948..dd1a1466c1e6a7412ca43022185c8aed0f76c88d 100644 (file)
@@ -464,11 +464,15 @@ struct rt_rq {
        struct rt_prio_array active;
        unsigned long rt_nr_running;
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-       int highest_prio; /* highest queued rt task prio */
+       struct {
+               int curr; /* highest queued rt task prio */
+               int next; /* next highest */
+       } highest_prio;
 #endif
 #ifdef CONFIG_SMP
        unsigned long rt_nr_migratory;
        int overloaded;
+       struct plist_head pushable_tasks;
 #endif
        int rt_throttled;
        u64 rt_time;
@@ -1607,21 +1611,42 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
 
 #endif
 
+#ifdef CONFIG_PREEMPT
+
 /*
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ * fair double_lock_balance: Safely acquires both rq->locks in a fair
+ * way at the expense of forcing extra atomic operations in all
+ * invocations.  This assures that the double_lock is acquired using the
+ * same underlying policy as the spinlock_t on this architecture, which
+ * reduces latency compared to the unfair variant below.  However, it
+ * also adds more overhead and therefore may reduce throughput.
  */
-static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+       __releases(this_rq->lock)
+       __acquires(busiest->lock)
+       __acquires(this_rq->lock)
+{
+       spin_unlock(&this_rq->lock);
+       double_rq_lock(this_rq, busiest);
+
+       return 1;
+}
+
+#else
+/*
+ * Unfair double_lock_balance: Optimizes throughput at the expense of
+ * latency by eliminating extra atomic operations when the locks are
+ * already in proper order on entry.  This favors lower cpu-ids and will
+ * grant the double lock to lower cpus over higher ids under contention,
+ * regardless of entry order into the function.
+ */
+static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(this_rq->lock)
        __acquires(busiest->lock)
        __acquires(this_rq->lock)
 {
        int ret = 0;
 
-       if (unlikely(!irqs_disabled())) {
-               /* printk() doesn't work good under rq->lock */
-               spin_unlock(&this_rq->lock);
-               BUG_ON(1);
-       }
        if (unlikely(!spin_trylock(&busiest->lock))) {
                if (busiest < this_rq) {
                        spin_unlock(&this_rq->lock);
@@ -1634,6 +1659,22 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
        return ret;
 }
 
+#endif /* CONFIG_PREEMPT */
+
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+{
+       if (unlikely(!irqs_disabled())) {
+               /* printk() doesn't work good under rq->lock */
+               spin_unlock(&this_rq->lock);
+               BUG_ON(1);
+       }
+
+       return _double_lock_balance(this_rq, busiest);
+}
+
 static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(busiest->lock)
 {
@@ -2445,6 +2486,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
        /* Want to start with kernel preemption disabled. */
        task_thread_info(p)->preempt_count = 1;
 #endif
+       plist_node_init(&p->pushable_tasks, MAX_PRIO);
+
        put_cpu();
 }
 
@@ -2585,6 +2628,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 {
        struct mm_struct *mm = rq->prev_mm;
        long prev_state;
+#ifdef CONFIG_SMP
+       int post_schedule = 0;
+
+       if (current->sched_class->needs_post_schedule)
+               post_schedule = current->sched_class->needs_post_schedule(rq);
+#endif
 
        rq->prev_mm = NULL;
 
@@ -2603,7 +2652,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
        finish_arch_switch(prev);
        finish_lock_switch(rq, prev);
 #ifdef CONFIG_SMP
-       if (current->sched_class->post_schedule)
+       if (post_schedule)
                current->sched_class->post_schedule(rq);
 #endif
 
@@ -2984,6 +3033,16 @@ next:
        pulled++;
        rem_load_move -= p->se.load.weight;
 
+#ifdef CONFIG_PREEMPT
+       /*
+        * NEWIDLE balancing is a source of latency, so preemptible kernels
+        * will stop after the first task is pulled to minimize the critical
+        * section.
+        */
+       if (idle == CPU_NEWLY_IDLE)
+               goto out;
+#endif
+
        /*
         * We only want to steal up to the prescribed amount of weighted load.
         */
@@ -3030,9 +3089,15 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
                                sd, idle, all_pinned, &this_best_prio);
                class = class->next;
 
+#ifdef CONFIG_PREEMPT
+               /*
+                * NEWIDLE balancing is a source of latency, so preemptible
+                * kernels will stop after the first task is pulled to minimize
+                * the critical section.
+                */
                if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
                        break;
-
+#endif
        } while (class && max_load_move > total_load_moved);
 
        return total_load_moved > 0;
@@ -8201,11 +8266,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
        __set_bit(MAX_RT_PRIO, array->bitmap);
 
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-       rt_rq->highest_prio = MAX_RT_PRIO;
+       rt_rq->highest_prio.curr = MAX_RT_PRIO;
+       rt_rq->highest_prio.next = MAX_RT_PRIO;
 #endif
 #ifdef CONFIG_SMP
        rt_rq->rt_nr_migratory = 0;
        rt_rq->overloaded = 0;
+       plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
 #endif
 
        rt_rq->rt_time = 0;