arch/x86/kernel/cpu/common_64.c

   1 #include <linux/init.h>
   2 #include <linux/kernel.h>
   3 #include <linux/sched.h>
   4 #include <linux/string.h>
   5 #include <linux/bootmem.h>
   6 #include <linux/bitops.h>
   7 #include <linux/module.h>
   8 #include <linux/kgdb.h>
   9 #include <linux/topology.h>
  10 #include <linux/delay.h>
  11 #include <linux/smp.h>
  12 #include <linux/percpu.h>
  13 #include <asm/i387.h>
  14 #include <asm/msr.h>
  15 #include <asm/io.h>
  16 #include <asm/linkage.h>
  17 #include <asm/mmu_context.h>
  18 #include <asm/mtrr.h>
  19 #include <asm/mce.h>
  20 #include <asm/pat.h>
  21 #include <asm/asm.h>
  22 #include <asm/numa.h>
  23 #ifdef CONFIG_X86_LOCAL_APIC
  24 #include <asm/mpspec.h>
  25 #include <asm/apic.h>
  26 #include <mach_apic.h>
  27 #endif
  28 #include <asm/pda.h>
  29 #include <asm/pgtable.h>
  30 #include <asm/processor.h>
  31 #include <asm/desc.h>
  32 #include <asm/atomic.h>
  33 #include <asm/proto.h>
  34 #include <asm/sections.h>
  35 #include <asm/setup.h>
  36 #include <asm/genapic.h>
  37
  38 #include "cpu.h"
  39
  40 static struct cpu_dev *this_cpu __cpuinitdata;
  41
  42 /* We need valid kernel segments for data and code in long mode too
  43  * IRET will check the segment types  kkeil 2000/10/28
  44  * Also sysret mandates a special GDT layout
  45  */
  46 /* The TLS descriptors are currently at a different place compared to i386.
  47    Hopefully nobody expects them at a fixed place (Wine?) */
  48 DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
  49         [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
  50         [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
  51         [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
  52         [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
  53         [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
  54         [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
  55 } };
  56 EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
  57
  58 __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
  59
  60 /* Current gdt points %fs at the "master" per-cpu area: after this,
  61  * it's on the real one. */
  62 void switch_to_new_gdt(void)
  63 {
  64         struct desc_ptr gdt_descr;
  65
  66         gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
  67         gdt_descr.size = GDT_SIZE - 1;
  68         load_gdt(&gdt_descr);
  69 }
  70
  71 static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
  72
  73 static void __cpuinit default_init(struct cpuinfo_x86 *c)
  74 {
  75         display_cacheinfo(c);
  76 }
  77
  78 static struct cpu_dev __cpuinitdata default_cpu = {
  79         .c_init = default_init,
  80         .c_vendor = "Unknown",
  81         .c_x86_vendor = X86_VENDOR_UNKNOWN,
  82 };
  83
  84 int __cpuinit get_model_name(struct cpuinfo_x86 *c)
  85 {
  86         unsigned int *v;
  87         char *p, *q;
  88
  89         if (c->extended_cpuid_level < 0x80000004)
  90                 return 0;
  91
  92         v = (unsigned int *) c->x86_model_id;
  93         cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
  94         cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
  95         cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
  96         c->x86_model_id[48] = 0;
  97
  98         /* Intel chips right-justify this string for some dumb reason;
  99            undo that brain damage */
 100         p = q = &c->x86_model_id[0];
 101         while (*p == ' ')
 102              p++;
 103         if (p != q) {
 104              while (*p)
 105                   *q++ = *p++;
 106              while (q <= &c->x86_model_id[48])
 107                   *q++ = '\0';  /* Zero-pad the rest */
 108         }
 109
 110         return 1;
 111 }
 112
 113
 114 void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 115 {
 116         unsigned int n, dummy, ebx, ecx, edx, l2size;
 117
 118         n = c->extended_cpuid_level;
 119
 120         if (n >= 0x80000005) {
 121                 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
 122                 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
 123                                 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
 124                 c->x86_cache_size = (ecx>>24) + (edx>>24);
 125                 /* On K8 L1 TLB is inclusive, so don't count it */
 126                 c->x86_tlbsize = 0;
 127         }
 128
 129         if (n < 0x80000006)     /* Some chips just has a large L1. */
 130                 return;
 131
 132         cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
 133         l2size = ecx >> 16;
 134         c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
 135
 136         c->x86_cache_size = l2size;
 137
 138         printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
 139                         l2size, ecx & 0xFF);
 140 }
 141
 142 void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 143 {
 144 #ifdef CONFIG_SMP
 145         u32 eax, ebx, ecx, edx;
 146         int index_msb, core_bits;
 147
 148         if (!cpu_has(c, X86_FEATURE_HT))
 149                 return;
 150         if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
 151                 goto out;
 152
 153         if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
 154                 return;
 155
 156         cpuid(1, &eax, &ebx, &ecx, &edx);
 157
 158         smp_num_siblings = (ebx & 0xff0000) >> 16;
 159
 160         if (smp_num_siblings == 1) {
 161                 printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
 162         } else if (smp_num_siblings > 1) {
 163
 164                 if (smp_num_siblings > NR_CPUS) {
 165                         printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
 166                                         smp_num_siblings);
 167                         smp_num_siblings = 1;
 168                         return;
 169                 }
 170
 171                 index_msb = get_count_order(smp_num_siblings);
 172                 c->phys_proc_id = phys_pkg_id(index_msb);
 173
 174                 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 175
 176                 index_msb = get_count_order(smp_num_siblings);
 177
 178                 core_bits = get_count_order(c->x86_max_cores);
 179
 180                 c->cpu_core_id = phys_pkg_id(index_msb) &
 181                                                ((1 << core_bits) - 1);
 182         }
 183
 184 out:
 185         if ((c->x86_max_cores * smp_num_siblings) > 1) {
 186                 printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
 187                        c->phys_proc_id);
 188                 printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
 189                        c->cpu_core_id);
 190         }
 191 #endif
 192 }
 193
 194 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 195 {
 196         char *v = c->x86_vendor_id;
 197         int i;
 198         static int printed;
 199
 200         for (i = 0; i < X86_VENDOR_NUM; i++) {
 201                 if (!cpu_devs[i])
 202                         break;
 203
 204                 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
 205                     (cpu_devs[i]->c_ident[1] &&
 206                      !strcmp(v, cpu_devs[i]->c_ident[1]))) {
 207                         this_cpu = cpu_devs[i];
 208                         c->x86_vendor = this_cpu->c_x86_vendor;
 209                         return;
 210                 }
 211         }
 212
 213         if (!printed) {
 214                 printed++;
 215                 printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
 216                 printk(KERN_ERR "CPU: Your system may be unstable.\n");
 217         }
 218
 219         c->x86_vendor = X86_VENDOR_UNKNOWN;
 220         this_cpu = &default_cpu;
 221 }
 222
 223 void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
 224 {
 225         /* Get vendor name */
 226         cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
 227               (unsigned int *)&c->x86_vendor_id[0],
 228               (unsigned int *)&c->x86_vendor_id[8],
 229               (unsigned int *)&c->x86_vendor_id[4]);
 230
 231         c->x86 = 4;
 232         /* Intel-defined flags: level 0x00000001 */
 233         if (c->cpuid_level >= 0x00000001) {
 234                 u32 junk, tfms, cap0, misc;
 235                 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
 236                 c->x86 = (tfms >> 8) & 0xf;
 237                 c->x86_model = (tfms >> 4) & 0xf;
 238                 c->x86_mask = tfms & 0xf;
 239                 if (c->x86 == 0xf)
 240                         c->x86 += (tfms >> 20) & 0xff;
 241                 if (c->x86 >= 0x6)
 242                         c->x86_model += ((tfms >> 16) & 0xf) << 4;
 243                 if (cap0 & (1<<19)) {
 244                         c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
 245                         c->x86_cache_alignment = c->x86_clflush_size;
 246                 }
 247         }
 248 }
 249
 250
 251 static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
 252 {
 253         u32 tfms, xlvl;
 254         u32 ebx;
 255
 256         /* Intel-defined flags: level 0x00000001 */
 257         if (c->cpuid_level >= 0x00000001) {
 258                 u32 capability, excap;
 259
 260                 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
 261                 c->x86_capability[0] = capability;
 262                 c->x86_capability[4] = excap;
 263         }
 264
 265         /* AMD-defined flags: level 0x80000001 */
 266         xlvl = cpuid_eax(0x80000000);
 267         c->extended_cpuid_level = xlvl;
 268         if ((xlvl & 0xffff0000) == 0x80000000) {
 269                 if (xlvl >= 0x80000001) {
 270                         c->x86_capability[1] = cpuid_edx(0x80000001);
 271                         c->x86_capability[6] = cpuid_ecx(0x80000001);
 272                 }
 273         }
 274
 275         /* Transmeta-defined flags: level 0x80860001 */
 276         xlvl = cpuid_eax(0x80860000);
 277         if ((xlvl & 0xffff0000) == 0x80860000) {
 278                 /* Don't set x86_cpuid_level here for now to not confuse. */
 279                 if (xlvl >= 0x80860001)
 280                         c->x86_capability[2] = cpuid_edx(0x80860001);
 281         }
 282
 283         if (c->extended_cpuid_level >= 0x80000007)
 284                 c->x86_power = cpuid_edx(0x80000007);
 285
 286         if (c->extended_cpuid_level >= 0x80000008) {
 287                 u32 eax = cpuid_eax(0x80000008);
 288
 289                 c->x86_virt_bits = (eax >> 8) & 0xff;
 290                 c->x86_phys_bits = eax & 0xff;
 291         }
 292 }
 293
 294 /* Do some early cpuid on the boot CPU to get some parameter that are
 295    needed before check_bugs. Everything advanced is in identify_cpu
 296    below. */
 297 static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 298 {
 299
 300         c->x86_clflush_size = 64;
 301         c->x86_cache_alignment = c->x86_clflush_size;
 302
 303         memset(&c->x86_capability, 0, sizeof c->x86_capability);
 304
 305         c->extended_cpuid_level = 0;
 306
 307         cpu_detect(c);
 308
 309         get_cpu_vendor(c);
 310
 311         get_cpu_cap(c);
 312
 313         if (this_cpu->c_early_init)
 314                 this_cpu->c_early_init(c);
 315
 316         validate_pat_support(c);
 317 }
 318
 319 void __init early_cpu_init(void)
 320 {
 321         struct cpu_dev **cdev;
 322         int count = 0;
 323
 324         printk("KERNEL supported cpus:\n");
 325         for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
 326                 struct cpu_dev *cpudev = *cdev;
 327                 unsigned int j;
 328
 329                 if (count >= X86_VENDOR_NUM)
 330                         break;
 331                 cpu_devs[count] = cpudev;
 332                 count++;
 333
 334                 for (j = 0; j < 2; j++) {
 335                         if (!cpudev->c_ident[j])
 336                                 continue;
 337                         printk("  %s %s\n", cpudev->c_vendor,
 338                                 cpudev->c_ident[j]);
 339                 }
 340         }
 341
 342         early_identify_cpu(&boot_cpu_data);
 343 }
 344
 345 /*
 346  * The NOPL instruction is supposed to exist on all CPUs with
 347  * family >= 6, unfortunately, that's not true in practice because
 348  * of early VIA chips and (more importantly) broken virtualizers that
 349  * are not easy to detect.  Hence, probe for it based on first
 350  * principles.
 351  *
 352  * Note: no 64-bit chip is known to lack these, but put the code here
 353  * for consistency with 32 bits, and to make it utterly trivial to
 354  * diagnose the problem should it ever surface.
 355  */
 356 static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
 357 {
 358         const u32 nopl_signature = 0x888c53b1; /* Random number */
 359         u32 has_nopl = nopl_signature;
 360
 361         clear_cpu_cap(c, X86_FEATURE_NOPL);
 362         if (c->x86 >= 6) {
 363                 asm volatile("\n"
 364                              "1:      .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
 365                              "2:\n"
 366                              "        .section .fixup,\"ax\"\n"
 367                              "3:      xor %0,%0\n"
 368                              "        jmp 2b\n"
 369                              "        .previous\n"
 370                              _ASM_EXTABLE(1b,3b)
 371                              : "+a" (has_nopl));
 372
 373                 if (has_nopl == nopl_signature)
 374                         set_cpu_cap(c, X86_FEATURE_NOPL);
 375         }
 376 }
 377
 378 static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 379 {
 380         c->extended_cpuid_level = 0;
 381
 382         cpu_detect(c);
 383
 384         get_cpu_vendor(c);
 385
 386         get_cpu_cap(c);
 387
 388         c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
 389 #ifdef CONFIG_SMP
 390         c->phys_proc_id = c->initial_apicid;
 391 #endif
 392
 393         if (c->extended_cpuid_level >= 0x80000004)
 394                 get_model_name(c); /* Default name */
 395
 396         init_scattered_cpuid_features(c);
 397         detect_nopl(c);
 398 }
 399
 400 /*
 401  * This does the hard work of actually picking apart the CPU stuff...
 402  */
 403 static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 404 {
 405         int i;
 406
 407         c->loops_per_jiffy = loops_per_jiffy;
 408         c->x86_cache_size = -1;
 409         c->x86_vendor = X86_VENDOR_UNKNOWN;
 410         c->x86_model = c->x86_mask = 0; /* So far unknown... */
 411         c->x86_vendor_id[0] = '\0'; /* Unset */
 412         c->x86_model_id[0] = '\0';  /* Unset */
 413         c->x86_max_cores = 1;
 414         c->x86_coreid_bits = 0;
 415         c->x86_clflush_size = 64;
 416         c->x86_cache_alignment = c->x86_clflush_size;
 417         memset(&c->x86_capability, 0, sizeof c->x86_capability);
 418
 419         generic_identify(c);
 420
 421         c->apicid = phys_pkg_id(0);
 422
 423         /*
 424          * Vendor-specific initialization.  In this section we
 425          * canonicalize the feature flags, meaning if there are
 426          * features a certain CPU supports which CPUID doesn't
 427          * tell us, CPUID claiming incorrect flags, or other bugs,
 428          * we handle them here.
 429          *
 430          * At the end of this section, c->x86_capability better
 431          * indicate the features this CPU genuinely supports!
 432          */
 433         if (this_cpu->c_init)
 434                 this_cpu->c_init(c);
 435
 436         detect_ht(c);
 437
 438         /*
 439          * On SMP, boot_cpu_data holds the common feature set between
 440          * all CPUs; so make sure that we indicate which features are
 441          * common between the CPUs.  The first time this routine gets
 442          * executed, c == &boot_cpu_data.
 443          */
 444         if (c != &boot_cpu_data) {
 445                 /* AND the already accumulated flags with these */
 446                 for (i = 0; i < NCAPINTS; i++)
 447                         boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 448         }
 449
 450         /* Clear all flags overriden by options */
 451         for (i = 0; i < NCAPINTS; i++)
 452                 c->x86_capability[i] &= ~cleared_cpu_caps[i];
 453
 454 #ifdef CONFIG_X86_MCE
 455         mcheck_init(c);
 456 #endif
 457         select_idle_routine(c);
 458
 459 #ifdef CONFIG_NUMA
 460         numa_add_cpu(smp_processor_id());
 461 #endif
 462
 463 }
 464
 465 void __init identify_boot_cpu(void)
 466 {
 467         identify_cpu(&boot_cpu_data);
 468 }
 469
 470 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
 471 {
 472         BUG_ON(c == &boot_cpu_data);
 473         identify_cpu(c);
 474         mtrr_ap_init();
 475 }
 476
 477 struct msr_range {
 478         unsigned min;
 479         unsigned max;
 480 };
 481
 482 static struct msr_range msr_range_array[] __cpuinitdata = {
 483         { 0x00000000, 0x00000418},
 484         { 0xc0000000, 0xc000040b},
 485         { 0xc0010000, 0xc0010142},
 486         { 0xc0011000, 0xc001103b},
 487 };
 488
 489 static void __cpuinit print_cpu_msr(void)
 490 {
 491         unsigned index;
 492         u64 val;
 493         int i;
 494         unsigned index_min, index_max;
 495
 496         for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
 497                 index_min = msr_range_array[i].min;
 498                 index_max = msr_range_array[i].max;
 499                 for (index = index_min; index < index_max; index++) {
 500                         if (rdmsrl_amd_safe(index, &val))
 501                                 continue;
 502                         printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
 503                 }
 504         }
 505 }
 506
 507 static int show_msr __cpuinitdata;
 508 static __init int setup_show_msr(char *arg)
 509 {
 510         int num;
 511
 512         get_option(&arg, &num);
 513
 514         if (num > 0)
 515                 show_msr = num;
 516         return 1;
 517 }
 518 __setup("show_msr=", setup_show_msr);
 519
 520 static __init int setup_noclflush(char *arg)
 521 {
 522         setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
 523         return 1;
 524 }
 525 __setup("noclflush", setup_noclflush);
 526
 527 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 528 {
 529         if (c->x86_model_id[0])
 530                 printk(KERN_CONT "%s", c->x86_model_id);
 531
 532         if (c->x86_mask || c->cpuid_level >= 0)
 533                 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
 534         else
 535                 printk(KERN_CONT "\n");
 536
 537 #ifdef CONFIG_SMP
 538         if (c->cpu_index < show_msr)
 539                 print_cpu_msr();
 540 #else
 541         if (show_msr)
 542                 print_cpu_msr();
 543 #endif
 544 }
 545
 546 static __init int setup_disablecpuid(char *arg)
 547 {
 548         int bit;
 549         if (get_option(&arg, &bit) && bit < NCAPINTS*32)
 550                 setup_clear_cpu_cap(bit);
 551         else
 552                 return 0;
 553         return 1;
 554 }
 555 __setup("clearcpuid=", setup_disablecpuid);
 556
 557 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 558
 559 struct x8664_pda **_cpu_pda __read_mostly;
 560 EXPORT_SYMBOL(_cpu_pda);
 561
 562 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 563
 564 char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
 565
 566 unsigned long __supported_pte_mask __read_mostly = ~0UL;
 567 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 568
 569 static int do_not_nx __cpuinitdata;
 570
 571 /* noexec=on|off
 572 Control non executable mappings for 64bit processes.
 573
 574 on      Enable(default)
 575 off     Disable
 576 */
 577 static int __init nonx_setup(char *str)
 578 {
 579         if (!str)
 580                 return -EINVAL;
 581         if (!strncmp(str, "on", 2)) {
 582                 __supported_pte_mask |= _PAGE_NX;
 583                 do_not_nx = 0;
 584         } else if (!strncmp(str, "off", 3)) {
 585                 do_not_nx = 1;
 586                 __supported_pte_mask &= ~_PAGE_NX;
 587         }
 588         return 0;
 589 }
 590 early_param("noexec", nonx_setup);
 591
 592 int force_personality32;
 593
 594 /* noexec32=on|off
 595 Control non executable heap for 32bit processes.
 596 To control the stack too use noexec=off
 597
 598 on      PROT_READ does not imply PROT_EXEC for 32bit processes (default)
 599 off     PROT_READ implies PROT_EXEC
 600 */
 601 static int __init nonx32_setup(char *str)
 602 {
 603         if (!strcmp(str, "on"))
 604                 force_personality32 &= ~READ_IMPLIES_EXEC;
 605         else if (!strcmp(str, "off"))
 606                 force_personality32 |= READ_IMPLIES_EXEC;
 607         return 1;
 608 }
 609 __setup("noexec32=", nonx32_setup);
 610
 611 void pda_init(int cpu)
 612 {
 613         struct x8664_pda *pda = cpu_pda(cpu);
 614
 615         /* Setup up data that may be needed in __get_free_pages early */
 616         loadsegment(fs, 0);
 617         loadsegment(gs, 0);
 618         /* Memory clobbers used to order PDA accessed */
 619         mb();
 620         wrmsrl(MSR_GS_BASE, pda);
 621         mb();
 622
 623         pda->cpunumber = cpu;
 624         pda->irqcount = -1;
 625         pda->kernelstack = (unsigned long)stack_thread_info() -
 626                                  PDA_STACKOFFSET + THREAD_SIZE;
 627         pda->active_mm = &init_mm;
 628         pda->mmu_state = 0;
 629
 630         if (cpu == 0) {
 631                 /* others are initialized in smpboot.c */
 632                 pda->pcurrent = &init_task;
 633                 pda->irqstackptr = boot_cpu_stack;
 634                 pda->irqstackptr += IRQSTACKSIZE - 64;
 635         } else {
 636                 if (!pda->irqstackptr) {
 637                         pda->irqstackptr = (char *)
 638                                 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
 639                         if (!pda->irqstackptr)
 640                                 panic("cannot allocate irqstack for cpu %d",
 641                                       cpu);
 642                         pda->irqstackptr += IRQSTACKSIZE - 64;
 643                 }
 644
 645                 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
 646                         pda->nodenumber = cpu_to_node(cpu);
 647         }
 648 }
 649
 650 char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
 651                            DEBUG_STKSZ] __page_aligned_bss;
 652
 653 extern asmlinkage void ignore_sysret(void);
 654
 655 /* May not be marked __init: used by software suspend */
 656 void syscall_init(void)
 657 {
 658         /*
 659          * LSTAR and STAR live in a bit strange symbiosis.
 660          * They both write to the same internal register. STAR allows to
 661          * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
 662          */
 663         wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
 664         wrmsrl(MSR_LSTAR, system_call);
 665         wrmsrl(MSR_CSTAR, ignore_sysret);
 666
 667 #ifdef CONFIG_IA32_EMULATION
 668         syscall32_cpu_init();
 669 #endif
 670
 671         /* Flags to clear on syscall */
 672         wrmsrl(MSR_SYSCALL_MASK,
 673                X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
 674 }
 675
 676 void __cpuinit check_efer(void)
 677 {
 678         unsigned long efer;
 679
 680         rdmsrl(MSR_EFER, efer);
 681         if (!(efer & EFER_NX) || do_not_nx)
 682                 __supported_pte_mask &= ~_PAGE_NX;
 683 }
 684
 685 unsigned long kernel_eflags;
 686
 687 /*
 688  * Copies of the original ist values from the tss are only accessed during
 689  * debugging, no special alignment required.
 690  */
 691 DEFINE_PER_CPU(struct orig_ist, orig_ist);
 692
 693 /*
 694  * cpu_init() initializes state that is per-CPU. Some data is already
 695  * initialized (naturally) in the bootstrap process, such as the GDT
 696  * and IDT. We reload them nevertheless, this function acts as a
 697  * 'CPU state barrier', nothing should get across.
 698  * A lot of state is already set up in PDA init.
 699  */
 700 void __cpuinit cpu_init(void)
 701 {
 702         int cpu = stack_smp_processor_id();
 703         struct tss_struct *t = &per_cpu(init_tss, cpu);
 704         struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
 705         unsigned long v;
 706         char *estacks = NULL;
 707         struct task_struct *me;
 708         int i;
 709
 710         /* CPU 0 is initialised in head64.c */
 711         if (cpu != 0)
 712                 pda_init(cpu);
 713         else
 714                 estacks = boot_exception_stacks;
 715
 716         me = current;
 717
 718         if (cpu_test_and_set(cpu, cpu_initialized))
 719                 panic("CPU#%d already initialized!\n", cpu);
 720
 721         printk(KERN_INFO "Initializing CPU#%d\n", cpu);
 722
 723         clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 724
 725         /*
 726          * Initialize the per-CPU GDT with the boot GDT,
 727          * and set up the GDT descriptor:
 728          */
 729
 730         switch_to_new_gdt();
 731         load_idt((const struct desc_ptr *)&idt_descr);
 732
 733         memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
 734         syscall_init();
 735
 736         wrmsrl(MSR_FS_BASE, 0);
 737         wrmsrl(MSR_KERNEL_GS_BASE, 0);
 738         barrier();
 739
 740         check_efer();
 741         if (cpu != 0 && x2apic)
 742                 enable_x2apic();
 743
 744         /*
 745          * set up and load the per-CPU TSS
 746          */
 747         if (!orig_ist->ist[0]) {
 748                 static const unsigned int order[N_EXCEPTION_STACKS] = {
 749                   [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
 750                   [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
 751                 };
 752                 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
 753                         if (cpu) {
 754                                 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
 755                                 if (!estacks)
 756                                         panic("Cannot allocate exception "
 757                                               "stack %ld %d\n", v, cpu);
 758                         }
 759                         estacks += PAGE_SIZE << order[v];
 760                         orig_ist->ist[v] = t->x86_tss.ist[v] =
 761                                         (unsigned long)estacks;
 762                 }
 763         }
 764
 765         t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
 766         /*
 767          * <= is required because the CPU will access up to
 768          * 8 bits beyond the end of the IO permission bitmap.
 769          */
 770         for (i = 0; i <= IO_BITMAP_LONGS; i++)
 771                 t->io_bitmap[i] = ~0UL;
 772
 773         atomic_inc(&init_mm.mm_count);
 774         me->active_mm = &init_mm;
 775         if (me->mm)
 776                 BUG();
 777         enter_lazy_tlb(&init_mm, me);
 778
 779         load_sp0(t, &current->thread);
 780         set_tss_desc(cpu, t);
 781         load_TR_desc();
 782         load_LDT(&init_mm.context);
 783
 784 #ifdef CONFIG_KGDB
 785         /*
 786          * If the kgdb is connected no debug regs should be altered.  This
 787          * is only applicable when KGDB and a KGDB I/O module are built
 788          * into the kernel and you are using early debugging with
 789          * kgdbwait. KGDB will control the kernel HW breakpoint registers.
 790          */
 791         if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
 792                 arch_kgdb_ops.correct_hw_break();
 793         else {
 794 #endif
 795         /*
 796          * Clear all 6 debug registers:
 797          */
 798
 799         set_debugreg(0UL, 0);
 800         set_debugreg(0UL, 1);
 801         set_debugreg(0UL, 2);
 802         set_debugreg(0UL, 3);
 803         set_debugreg(0UL, 6);
 804         set_debugreg(0UL, 7);
 805 #ifdef CONFIG_KGDB
 806         /* If the kgdb is connected no debug regs should be altered. */
 807         }
 808 #endif
 809
 810         fpu_init();
 811
 812         raw_local_save_flags(kernel_eflags);
 813
 814         if (is_uv_system())
 815                 uv_cpu_init();
 816 }