ACPI_MODULE_NAME("processor_idle");
 #define ACPI_PROCESSOR_FILE_POWER      "power"
 #define US_TO_PM_TIMER_TICKS(t)                ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
+#define PM_TIMER_TICK_NS               (1000000000ULL/PM_TIMER_FREQUENCY)
 #define C2_OVERHEAD                    4       /* 1us (3.579 ticks per us) */
 #define C3_OVERHEAD                    4       /* 1us (3.579 ticks per us) */
 static void (*pm_idle_save) (void) __read_mostly;
                 * TBD: Can't get time duration while in C1, as resumes
                 *      go to an ISR rather than here.  Need to instrument
                 *      base interrupt handler.
+                *
+                * Note: the TSC better not stop in C1, sched_clock() will
+                *       skew otherwise.
                 */
                sleep_ticks = 0xFFFFFFFF;
                break;
        case ACPI_STATE_C2:
                /* Get start time (ticks) */
                t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+               /* Tell the scheduler that we are going deep-idle: */
+               sched_clock_idle_sleep_event();
                /* Invoke C2 */
                acpi_state_timer_broadcast(pr, cx, 1);
                acpi_cstate_enter(cx);
                /* TSC halts in C2, so notify users */
                mark_tsc_unstable("possible TSC halt in C2");
 #endif
+               /* Compute time (ticks) that we were actually asleep */
+               sleep_ticks = ticks_elapsed(t1, t2);
+
+               /* Tell the scheduler how much we idled: */
+               sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
+
                /* Re-enable interrupts */
                local_irq_enable();
+               /* Do not account our idle-switching overhead: */
+               sleep_ticks -= cx->latency_ticks + C2_OVERHEAD;
+
                current_thread_info()->status |= TS_POLLING;
-               /* Compute time (ticks) that we were actually asleep */
-               sleep_ticks =
-                   ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
                acpi_state_timer_broadcast(pr, cx, 0);
                break;
 
        case ACPI_STATE_C3:
-
                /*
                 * disable bus master
                 * bm_check implies we need ARB_DIS
                t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
                /* Invoke C3 */
                acpi_state_timer_broadcast(pr, cx, 1);
+               /* Tell the scheduler that we are going deep-idle: */
+               sched_clock_idle_sleep_event();
                acpi_cstate_enter(cx);
                /* Get end time (ticks) */
                t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
                /* TSC halts in C3, so notify users */
                mark_tsc_unstable("TSC halts in C3");
 #endif
+               /* Compute time (ticks) that we were actually asleep */
+               sleep_ticks = ticks_elapsed(t1, t2);
+               /* Tell the scheduler how much we idled: */
+               sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
+
                /* Re-enable interrupts */
                local_irq_enable();
+               /* Do not account our idle-switching overhead: */
+               sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
+
                current_thread_info()->status |= TS_POLLING;
-               /* Compute time (ticks) that we were actually asleep */
-               sleep_ticks =
-                   ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
                acpi_state_timer_broadcast(pr, cx, 0);
                break;
 
 
        s64 clock_max_delta;
 
        unsigned int clock_warps, clock_overflows;
-       unsigned int clock_unstable_events;
+       u64 idle_clock;
+       unsigned int clock_deep_idle_events;
        u64 tick_timestamp;
 
        atomic_t nr_iowait;
 }
 
 /*
- * CPU frequency is/was unstable - start new by setting prev_clock_raw:
+ * We are going deep-idle (irqs are disabled):
  */
-void sched_clock_unstable_event(void)
+void sched_clock_idle_sleep_event(void)
 {
-       unsigned long flags;
-       struct rq *rq;
+       struct rq *rq = cpu_rq(smp_processor_id());
 
-       rq = task_rq_lock(current, &flags);
-       rq->prev_clock_raw = sched_clock();
-       rq->clock_unstable_events++;
-       task_rq_unlock(rq, &flags);
+       spin_lock(&rq->lock);
+       __update_rq_clock(rq);
+       spin_unlock(&rq->lock);
+       rq->clock_deep_idle_events++;
+}
+EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
+
+/*
+ * We just idled delta nanoseconds (called with irqs disabled):
+ */
+void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+       struct rq *rq = cpu_rq(smp_processor_id());
+       u64 now = sched_clock();
+
+       rq->idle_clock += delta_ns;
+       /*
+        * Override the previous timestamp and ignore all
+        * sched_clock() deltas that occured while we idled,
+        * and use the PM-provided delta_ns to advance the
+        * rq clock:
+        */
+       spin_lock(&rq->lock);
+       rq->prev_clock_raw = now;
+       rq->clock += delta_ns;
+       spin_unlock(&rq->lock);
 }
+EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
 /*
  * resched_task - mark a task 'to be rescheduled now'.