noticed by Peter Zijlstra:
fix: move the CPU check into ->task_new_fair(), this way we
can call place_entity() and get child ->vruntime right at
initial wakeup time.
(without this there can be large latencies)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
 {
        unsigned long flags;
        struct rq *rq;
-       int this_cpu;
 
        rq = task_rq_lock(p, &flags);
        BUG_ON(p->state != TASK_RUNNING);
-       this_cpu = smp_processor_id(); /* parent's CPU */
        update_rq_clock(rq);
 
        p->prio = effective_prio(p);
 
-       if (task_cpu(p) != this_cpu || !p->sched_class->task_new ||
-                                                       !current->se.on_rq) {
+       if (!p->sched_class->task_new || !current->se.on_rq || !rq->cfs.curr) {
                activate_task(rq, p, 0);
        } else {
                /*
 
 {
        struct cfs_rq *cfs_rq = task_cfs_rq(p);
        struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
+       int this_cpu = smp_processor_id();
 
        sched_info_queued(p);
 
        update_curr(cfs_rq);
        place_entity(cfs_rq, se, 1);
 
-       if (sysctl_sched_child_runs_first &&
+       if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
                        curr->vruntime < se->vruntime) {
                /*
                 * Upon rescheduling, sched_class::put_prev_task() will place