]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/i386/kernel/timers/timer_tsc.c
[CPUFREQ] Check for not initialized freq on cpufreq changes
[linux-2.6-omap-h63xx.git] / arch / i386 / kernel / timers / timer_tsc.c
1 /*
2  * This code largely moved from arch/i386/kernel/time.c.
3  * See comments there for proper credits.
4  *
5  * 2004-06-25    Jesper Juhl
6  *      moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7  *      failing to inline.
8  */
9
10 #include <linux/spinlock.h>
11 #include <linux/init.h>
12 #include <linux/timex.h>
13 #include <linux/errno.h>
14 #include <linux/cpufreq.h>
15 #include <linux/string.h>
16 #include <linux/jiffies.h>
17
18 #include <asm/timer.h>
19 #include <asm/io.h>
20 /* processor.h for distable_tsc flag */
21 #include <asm/processor.h>
22
23 #include "io_ports.h"
24 #include "mach_timer.h"
25
26 #include <asm/hpet.h>
27 #include <asm/i8253.h>
28
29 #ifdef CONFIG_HPET_TIMER
30 static unsigned long hpet_usec_quotient;
31 static unsigned long hpet_last;
32 static struct timer_opts timer_tsc;
33 #endif
34
35 static inline void cpufreq_delayed_get(void);
36
37 int tsc_disable __devinitdata = 0;
38
39 static int use_tsc;
40 /* Number of usecs that the last interrupt was delayed */
41 static int delay_at_last_interrupt;
42
43 static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
44 static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
45 static unsigned long long monotonic_base;
46 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
47
48 /* convert from cycles(64bits) => nanoseconds (64bits)
49  *  basic equation:
50  *              ns = cycles / (freq / ns_per_sec)
51  *              ns = cycles * (ns_per_sec / freq)
52  *              ns = cycles * (10^9 / (cpu_khz * 10^3))
53  *              ns = cycles * (10^6 / cpu_khz)
54  *
55  *      Then we use scaling math (suggested by george@mvista.com) to get:
56  *              ns = cycles * (10^6 * SC / cpu_khz) / SC
57  *              ns = cycles * cyc2ns_scale / SC
58  *
59  *      And since SC is a constant power of two, we can convert the div
60  *  into a shift.
61  *
62  *  We can use khz divisor instead of mhz to keep a better percision, since
63  *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
64  *  (mathieu.desnoyers@polymtl.ca)
65  *
66  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
67  */
68 static unsigned long cyc2ns_scale; 
69 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
70
71 static inline void set_cyc2ns_scale(unsigned long cpu_khz)
72 {
73         cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
74 }
75
76 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
77 {
78         return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
79 }
80
81 static int count2; /* counter for mark_offset_tsc() */
82
83 /* Cached *multiplier* to convert TSC counts to microseconds.
84  * (see the equation below).
85  * Equal to 2^32 * (1 / (clocks per usec) ).
86  * Initialized in time_init.
87  */
88 static unsigned long fast_gettimeoffset_quotient;
89
90 static unsigned long get_offset_tsc(void)
91 {
92         register unsigned long eax, edx;
93
94         /* Read the Time Stamp Counter */
95
96         rdtsc(eax,edx);
97
98         /* .. relative to previous jiffy (32 bits is enough) */
99         eax -= last_tsc_low;    /* tsc_low delta */
100
101         /*
102          * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
103          *             = (tsc_low delta) * (usecs_per_clock)
104          *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
105          *
106          * Using a mull instead of a divl saves up to 31 clock cycles
107          * in the critical path.
108          */
109
110         __asm__("mull %2"
111                 :"=a" (eax), "=d" (edx)
112                 :"rm" (fast_gettimeoffset_quotient),
113                  "0" (eax));
114
115         /* our adjusted time offset in microseconds */
116         return delay_at_last_interrupt + edx;
117 }
118
119 static unsigned long long monotonic_clock_tsc(void)
120 {
121         unsigned long long last_offset, this_offset, base;
122         unsigned seq;
123         
124         /* atomically read monotonic base & last_offset */
125         do {
126                 seq = read_seqbegin(&monotonic_lock);
127                 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
128                 base = monotonic_base;
129         } while (read_seqretry(&monotonic_lock, seq));
130
131         /* Read the Time Stamp Counter */
132         rdtscll(this_offset);
133
134         /* return the value in ns */
135         return base + cycles_2_ns(this_offset - last_offset);
136 }
137
138 /*
139  * Scheduler clock - returns current time in nanosec units.
140  */
141 unsigned long long sched_clock(void)
142 {
143         unsigned long long this_offset;
144
145         /*
146          * In the NUMA case we dont use the TSC as they are not
147          * synchronized across all CPUs.
148          */
149 #ifndef CONFIG_NUMA
150         if (!use_tsc)
151 #endif
152                 /* no locking but a rare wrong value is not a big deal */
153                 return jiffies_64 * (1000000000 / HZ);
154
155         /* Read the Time Stamp Counter */
156         rdtscll(this_offset);
157
158         /* return the value in ns */
159         return cycles_2_ns(this_offset);
160 }
161
162 static void delay_tsc(unsigned long loops)
163 {
164         unsigned long bclock, now;
165         
166         rdtscl(bclock);
167         do
168         {
169                 rep_nop();
170                 rdtscl(now);
171         } while ((now-bclock) < loops);
172 }
173
174 #ifdef CONFIG_HPET_TIMER
175 static void mark_offset_tsc_hpet(void)
176 {
177         unsigned long long this_offset, last_offset;
178         unsigned long offset, temp, hpet_current;
179
180         write_seqlock(&monotonic_lock);
181         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
182         /*
183          * It is important that these two operations happen almost at
184          * the same time. We do the RDTSC stuff first, since it's
185          * faster. To avoid any inconsistencies, we need interrupts
186          * disabled locally.
187          */
188         /*
189          * Interrupts are just disabled locally since the timer irq
190          * has the SA_INTERRUPT flag set. -arca
191          */
192         /* read Pentium cycle counter */
193
194         hpet_current = hpet_readl(HPET_COUNTER);
195         rdtsc(last_tsc_low, last_tsc_high);
196
197         /* lost tick compensation */
198         offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
199         if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
200                 int lost_ticks = (offset - hpet_last) / hpet_tick;
201                 jiffies_64 += lost_ticks;
202         }
203         hpet_last = hpet_current;
204
205         /* update the monotonic base value */
206         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
207         monotonic_base += cycles_2_ns(this_offset - last_offset);
208         write_sequnlock(&monotonic_lock);
209
210         /* calculate delay_at_last_interrupt */
211         /*
212          * Time offset = (hpet delta) * ( usecs per HPET clock )
213          *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
214          *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
215          * Where,
216          * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
217          */
218         delay_at_last_interrupt = hpet_current - offset;
219         ASM_MUL64_REG(temp, delay_at_last_interrupt,
220                         hpet_usec_quotient, delay_at_last_interrupt);
221 }
222 #endif
223
224
225 #ifdef CONFIG_CPU_FREQ
226 #include <linux/workqueue.h>
227
228 static unsigned int cpufreq_delayed_issched = 0;
229 static unsigned int cpufreq_init = 0;
230 static struct work_struct cpufreq_delayed_get_work;
231
232 static void handle_cpufreq_delayed_get(void *v)
233 {
234         unsigned int cpu;
235         for_each_online_cpu(cpu) {
236                 cpufreq_get(cpu);
237         }
238         cpufreq_delayed_issched = 0;
239 }
240
241 /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
242  * to verify the CPU frequency the timing core thinks the CPU is running
243  * at is still correct.
244  */
245 static inline void cpufreq_delayed_get(void) 
246 {
247         if (cpufreq_init && !cpufreq_delayed_issched) {
248                 cpufreq_delayed_issched = 1;
249                 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
250                 schedule_work(&cpufreq_delayed_get_work);
251         }
252 }
253
254 /* If the CPU frequency is scaled, TSC-based delays will need a different
255  * loops_per_jiffy value to function properly.
256  */
257
258 static unsigned int  ref_freq = 0;
259 static unsigned long loops_per_jiffy_ref = 0;
260
261 #ifndef CONFIG_SMP
262 static unsigned long fast_gettimeoffset_ref = 0;
263 static unsigned int cpu_khz_ref = 0;
264 #endif
265
266 static int
267 time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
268                        void *data)
269 {
270         struct cpufreq_freqs *freq = data;
271
272         if (val != CPUFREQ_RESUMECHANGE)
273                 write_seqlock_irq(&xtime_lock);
274         if (!ref_freq) {
275                 if (!freq->old){
276                         ref_freq = freq->new;
277                         goto end;
278                 }
279                 ref_freq = freq->old;
280                 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
281 #ifndef CONFIG_SMP
282                 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
283                 cpu_khz_ref = cpu_khz;
284 #endif
285         }
286
287         if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
288             (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
289             (val == CPUFREQ_RESUMECHANGE)) {
290                 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
291                         cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
292 #ifndef CONFIG_SMP
293                 if (cpu_khz)
294                         cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
295                 if (use_tsc) {
296                         if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
297                                 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
298                                 set_cyc2ns_scale(cpu_khz);
299                         }
300                 }
301 #endif
302         }
303
304 end:
305         if (val != CPUFREQ_RESUMECHANGE)
306                 write_sequnlock_irq(&xtime_lock);
307
308         return 0;
309 }
310
311 static struct notifier_block time_cpufreq_notifier_block = {
312         .notifier_call  = time_cpufreq_notifier
313 };
314
315
316 static int __init cpufreq_tsc(void)
317 {
318         int ret;
319         INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
320         ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
321                                         CPUFREQ_TRANSITION_NOTIFIER);
322         if (!ret)
323                 cpufreq_init = 1;
324         return ret;
325 }
326 core_initcall(cpufreq_tsc);
327
328 #else /* CONFIG_CPU_FREQ */
329 static inline void cpufreq_delayed_get(void) { return; }
330 #endif 
331
332 int recalibrate_cpu_khz(void)
333 {
334 #ifndef CONFIG_SMP
335         unsigned int cpu_khz_old = cpu_khz;
336
337         if (cpu_has_tsc) {
338                 local_irq_disable();
339                 init_cpu_khz();
340                 local_irq_enable();
341                 cpu_data[0].loops_per_jiffy =
342                     cpufreq_scale(cpu_data[0].loops_per_jiffy,
343                                   cpu_khz_old,
344                                   cpu_khz);
345                 return 0;
346         } else
347                 return -ENODEV;
348 #else
349         return -ENODEV;
350 #endif
351 }
352 EXPORT_SYMBOL(recalibrate_cpu_khz);
353
354 static void mark_offset_tsc(void)
355 {
356         unsigned long lost,delay;
357         unsigned long delta = last_tsc_low;
358         int count;
359         int countmp;
360         static int count1 = 0;
361         unsigned long long this_offset, last_offset;
362         static int lost_count = 0;
363
364         write_seqlock(&monotonic_lock);
365         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
366         /*
367          * It is important that these two operations happen almost at
368          * the same time. We do the RDTSC stuff first, since it's
369          * faster. To avoid any inconsistencies, we need interrupts
370          * disabled locally.
371          */
372
373         /*
374          * Interrupts are just disabled locally since the timer irq
375          * has the SA_INTERRUPT flag set. -arca
376          */
377
378         /* read Pentium cycle counter */
379
380         rdtsc(last_tsc_low, last_tsc_high);
381
382         spin_lock(&i8253_lock);
383         outb_p(0x00, PIT_MODE);     /* latch the count ASAP */
384
385         count = inb_p(PIT_CH0);    /* read the latched count */
386         count |= inb(PIT_CH0) << 8;
387
388         /*
389          * VIA686a test code... reset the latch if count > max + 1
390          * from timer_pit.c - cjb
391          */
392         if (count > LATCH) {
393                 outb_p(0x34, PIT_MODE);
394                 outb_p(LATCH & 0xff, PIT_CH0);
395                 outb(LATCH >> 8, PIT_CH0);
396                 count = LATCH - 1;
397         }
398
399         spin_unlock(&i8253_lock);
400
401         if (pit_latch_buggy) {
402                 /* get center value of last 3 time lutch */
403                 if ((count2 >= count && count >= count1)
404                     || (count1 >= count && count >= count2)) {
405                         count2 = count1; count1 = count;
406                 } else if ((count1 >= count2 && count2 >= count)
407                            || (count >= count2 && count2 >= count1)) {
408                         countmp = count;count = count2;
409                         count2 = count1;count1 = countmp;
410                 } else {
411                         count2 = count1; count1 = count; count = count1;
412                 }
413         }
414
415         /* lost tick compensation */
416         delta = last_tsc_low - delta;
417         {
418                 register unsigned long eax, edx;
419                 eax = delta;
420                 __asm__("mull %2"
421                 :"=a" (eax), "=d" (edx)
422                 :"rm" (fast_gettimeoffset_quotient),
423                  "0" (eax));
424                 delta = edx;
425         }
426         delta += delay_at_last_interrupt;
427         lost = delta/(1000000/HZ);
428         delay = delta%(1000000/HZ);
429         if (lost >= 2) {
430                 jiffies_64 += lost-1;
431
432                 /* sanity check to ensure we're not always losing ticks */
433                 if (lost_count++ > 100) {
434                         printk(KERN_WARNING "Losing too many ticks!\n");
435                         printk(KERN_WARNING "TSC cannot be used as a timesource.  \n");
436                         printk(KERN_WARNING "Possible reasons for this are:\n");
437                         printk(KERN_WARNING "  You're running with Speedstep,\n");
438                         printk(KERN_WARNING "  You don't have DMA enabled for your hard disk (see hdparm),\n");
439                         printk(KERN_WARNING "  Incorrect TSC synchronization on an SMP system (see dmesg).\n");
440                         printk(KERN_WARNING "Falling back to a sane timesource now.\n");
441
442                         clock_fallback();
443                 }
444                 /* ... but give the TSC a fair chance */
445                 if (lost_count > 25)
446                         cpufreq_delayed_get();
447         } else
448                 lost_count = 0;
449         /* update the monotonic base value */
450         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
451         monotonic_base += cycles_2_ns(this_offset - last_offset);
452         write_sequnlock(&monotonic_lock);
453
454         /* calculate delay_at_last_interrupt */
455         count = ((LATCH-1) - count) * TICK_SIZE;
456         delay_at_last_interrupt = (count + LATCH/2) / LATCH;
457
458         /* catch corner case where tick rollover occured
459          * between tsc and pit reads (as noted when
460          * usec delta is > 90% # of usecs/tick)
461          */
462         if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
463                 jiffies_64++;
464 }
465
466 static int __init init_tsc(char* override)
467 {
468
469         /* check clock override */
470         if (override[0] && strncmp(override,"tsc",3)) {
471 #ifdef CONFIG_HPET_TIMER
472                 if (is_hpet_enabled()) {
473                         printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
474                 } else
475 #endif
476                 {
477                         return -ENODEV;
478                 }
479         }
480
481         /*
482          * If we have APM enabled or the CPU clock speed is variable
483          * (CPU stops clock on HLT or slows clock to save power)
484          * then the TSC timestamps may diverge by up to 1 jiffy from
485          * 'real time' but nothing will break.
486          * The most frequent case is that the CPU is "woken" from a halt
487          * state by the timer interrupt itself, so we get 0 error. In the
488          * rare cases where a driver would "wake" the CPU and request a
489          * timestamp, the maximum error is < 1 jiffy. But timestamps are
490          * still perfectly ordered.
491          * Note that the TSC counter will be reset if APM suspends
492          * to disk; this won't break the kernel, though, 'cuz we're
493          * smart.  See arch/i386/kernel/apm.c.
494          */
495         /*
496          *      Firstly we have to do a CPU check for chips with
497          *      a potentially buggy TSC. At this point we haven't run
498          *      the ident/bugs checks so we must run this hook as it
499          *      may turn off the TSC flag.
500          *
501          *      NOTE: this doesn't yet handle SMP 486 machines where only
502          *      some CPU's have a TSC. Thats never worked and nobody has
503          *      moaned if you have the only one in the world - you fix it!
504          */
505
506         count2 = LATCH; /* initialize counter for mark_offset_tsc() */
507
508         if (cpu_has_tsc) {
509                 unsigned long tsc_quotient;
510 #ifdef CONFIG_HPET_TIMER
511                 if (is_hpet_enabled() && hpet_use_timer) {
512                         unsigned long result, remain;
513                         printk("Using TSC for gettimeofday\n");
514                         tsc_quotient = calibrate_tsc_hpet(NULL);
515                         timer_tsc.mark_offset = &mark_offset_tsc_hpet;
516                         /*
517                          * Math to calculate hpet to usec multiplier
518                          * Look for the comments at get_offset_tsc_hpet()
519                          */
520                         ASM_DIV64_REG(result, remain, hpet_tick,
521                                         0, KERNEL_TICK_USEC);
522                         if (remain > (hpet_tick >> 1))
523                                 result++; /* rounding the result */
524
525                         hpet_usec_quotient = result;
526                 } else
527 #endif
528                 {
529                         tsc_quotient = calibrate_tsc();
530                 }
531
532                 if (tsc_quotient) {
533                         fast_gettimeoffset_quotient = tsc_quotient;
534                         use_tsc = 1;
535                         /*
536                          *      We could be more selective here I suspect
537                          *      and just enable this for the next intel chips ?
538                          */
539                         /* report CPU clock rate in Hz.
540                          * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
541                          * clock/second. Our precision is about 100 ppm.
542                          */
543                         {       unsigned long eax=0, edx=1000;
544                                 __asm__("divl %2"
545                                 :"=a" (cpu_khz), "=d" (edx)
546                                 :"r" (tsc_quotient),
547                                 "0" (eax), "1" (edx));
548                                 printk("Detected %u.%03u MHz processor.\n",
549                                         cpu_khz / 1000, cpu_khz % 1000);
550                         }
551                         set_cyc2ns_scale(cpu_khz);
552                         return 0;
553                 }
554         }
555         return -ENODEV;
556 }
557
558 static int tsc_resume(void)
559 {
560         write_seqlock(&monotonic_lock);
561         /* Assume this is the last mark offset time */
562         rdtsc(last_tsc_low, last_tsc_high);
563 #ifdef CONFIG_HPET_TIMER
564         if (is_hpet_enabled() && hpet_use_timer)
565                 hpet_last = hpet_readl(HPET_COUNTER);
566 #endif
567         write_sequnlock(&monotonic_lock);
568         return 0;
569 }
570
571 #ifndef CONFIG_X86_TSC
572 /* disable flag for tsc.  Takes effect by clearing the TSC cpu flag
573  * in cpu/common.c */
574 static int __init tsc_setup(char *str)
575 {
576         tsc_disable = 1;
577         return 1;
578 }
579 #else
580 static int __init tsc_setup(char *str)
581 {
582         printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
583                                 "cannot disable TSC.\n");
584         return 1;
585 }
586 #endif
587 __setup("notsc", tsc_setup);
588
589
590
591 /************************************************************/
592
593 /* tsc timer_opts struct */
594 static struct timer_opts timer_tsc = {
595         .name = "tsc",
596         .mark_offset = mark_offset_tsc, 
597         .get_offset = get_offset_tsc,
598         .monotonic_clock = monotonic_clock_tsc,
599         .delay = delay_tsc,
600         .read_timer = read_timer_tsc,
601         .resume = tsc_resume,
602 };
603
604 struct init_timer_opts __initdata timer_tsc_init = {
605         .init = init_tsc,
606         .opts = &timer_tsc,
607 };