]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'x86/cpu' into x86/x2apic
authorIngo Molnar <mingo@elte.hu>
Sat, 23 Aug 2008 15:46:59 +0000 (17:46 +0200)
committerIngo Molnar <mingo@elte.hu>
Sat, 23 Aug 2008 15:46:59 +0000 (17:46 +0200)
12 files changed:
1  2 
arch/x86/Kconfig
arch/x86/kernel/apic_32.c
arch/x86/kernel/cpu/common_64.c
arch/x86/kernel/cpu/feature_names.c
arch/x86/kernel/io_apic_32.c
arch/x86/kernel/io_apic_64.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/vmi_32.c
include/asm-x86/cpufeature.h
include/asm-x86/hw_irq.h

diff --combined arch/x86/Kconfig
index 608a12ff483ac2a9066f39560d0fdab336ebf449,ac2fb0641a041e4a28055867140f458613673dc2..53ab36878736ab4c65bded4b22314256f2c54000
@@@ -22,7 -22,6 +22,6 @@@ config X8
        select HAVE_IDE
        select HAVE_OPROFILE
        select HAVE_IOREMAP_PROT
-       select HAVE_GET_USER_PAGES_FAST
        select HAVE_KPROBES
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select HAVE_KRETPROBES
@@@ -1658,14 -1657,6 +1657,14 @@@ config DMAR_FLOPPY_W
         workaround will setup a 1:1 mapping for the first
         16M to make floppy (an ISA device) work.
  
 +config INTR_REMAP
 +      bool "Support for Interrupt Remapping (EXPERIMENTAL)"
 +      depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
 +      help
 +       Supports Interrupt remapping for IO-APIC and MSI devices.
 +       To use x2apic mode in the CPU's which support x2APIC enhancements or
 +       to support platforms with CPU's having > 8 bit APIC ID, say Y.
 +
  source "drivers/pci/pcie/Kconfig"
  
  source "drivers/pci/Kconfig"
index f93c18f5b79dc2e5741fd932a391112eadedf07c,039a8d4aaf62db88eeb36040ad15ff64be9ed852..8228222ec9175a32e25b8a1fc20ffdeeed4b0a3e
@@@ -145,18 -145,13 +145,18 @@@ static int modern_apic(void
        return lapic_get_version() >= 0x14;
  }
  
 -void apic_wait_icr_idle(void)
 +/*
 + * Paravirt kernels also might be using these below ops. So we still
 + * use generic apic_read()/apic_write(), which might be pointing to different
 + * ops in PARAVIRT case.
 + */
 +void xapic_wait_icr_idle(void)
  {
        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
                cpu_relax();
  }
  
 -u32 safe_apic_wait_icr_idle(void)
 +u32 safe_xapic_wait_icr_idle(void)
  {
        u32 send_status;
        int timeout;
        return send_status;
  }
  
 +void xapic_icr_write(u32 low, u32 id)
 +{
 +      apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
 +      apic_write(APIC_ICR, low);
 +}
 +
 +u64 xapic_icr_read(void)
 +{
 +      u32 icr1, icr2;
 +
 +      icr2 = apic_read(APIC_ICR2);
 +      icr1 = apic_read(APIC_ICR);
 +
 +      return icr1 | ((u64)icr2 << 32);
 +}
 +
 +static struct apic_ops xapic_ops = {
 +      .read = native_apic_mem_read,
 +      .write = native_apic_mem_write,
 +      .icr_read = xapic_icr_read,
 +      .icr_write = xapic_icr_write,
 +      .wait_icr_idle = xapic_wait_icr_idle,
 +      .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
 +};
 +
 +struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 +EXPORT_SYMBOL_GPL(apic_ops);
 +
  /**
   * enable_NMI_through_LVT0 - enable NMI through local vector table 0
   */
@@@ -1238,7 -1205,7 +1238,7 @@@ void __init init_apic_mappings(void
         * default configuration (or the MP table is broken).
         */
        if (boot_cpu_physical_apicid == -1U)
 -              boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 +              boot_cpu_physical_apicid = read_apic_id();
  
  }
  
@@@ -1275,7 -1242,7 +1275,7 @@@ int __init APIC_init_uniprocessor(void
         * might be zero if read from MP tables. Get it from LAPIC.
         */
  #ifdef CONFIG_CRASH_DUMP
 -      boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 +      boot_cpu_physical_apicid = read_apic_id();
  #endif
        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
  
@@@ -1753,15 -1720,19 +1753,19 @@@ static int __init parse_lapic_timer_c2_
  }
  early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
  
- static int __init apic_set_verbosity(char *str)
+ static int __init apic_set_verbosity(char *arg)
  {
-       if (strcmp("debug", str) == 0)
+       if (!arg)
+               return -EINVAL;
+       if (strcmp(arg, "debug") == 0)
                apic_verbosity = APIC_DEBUG;
-       else if (strcmp("verbose", str) == 0)
+       else if (strcmp(arg, "verbose") == 0)
                apic_verbosity = APIC_VERBOSE;
-       return 1;
+       return 0;
  }
__setup("apic=", apic_set_verbosity);
early_param("apic", apic_set_verbosity);
  
  static int __init lapic_insert_resource(void)
  {
index 6f9b8924bdc088d5ff4811767683532e608a28cb,c3afba5a81a7bfb1780e715c153349de42106df2..af569a964e74369a577aef35144e66ae3c1b4021
@@@ -18,6 -18,7 +18,7 @@@
  #include <asm/mtrr.h>
  #include <asm/mce.h>
  #include <asm/pat.h>
+ #include <asm/asm.h>
  #include <asm/numa.h>
  #ifdef CONFIG_X86_LOCAL_APIC
  #include <asm/mpspec.h>
@@@ -215,6 -216,39 +216,39 @@@ static void __init early_cpu_support_pr
        }
  }
  
+ /*
+  * The NOPL instruction is supposed to exist on all CPUs with
+  * family >= 6, unfortunately, that's not true in practice because
+  * of early VIA chips and (more importantly) broken virtualizers that
+  * are not easy to detect.  Hence, probe for it based on first
+  * principles.
+  *
+  * Note: no 64-bit chip is known to lack these, but put the code here
+  * for consistency with 32 bits, and to make it utterly trivial to
+  * diagnose the problem should it ever surface.
+  */
+ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+ {
+       const u32 nopl_signature = 0x888c53b1; /* Random number */
+       u32 has_nopl = nopl_signature;
+       clear_cpu_cap(c, X86_FEATURE_NOPL);
+       if (c->x86 >= 6) {
+               asm volatile("\n"
+                            "1:      .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
+                            "2:\n"
+                            "        .section .fixup,\"ax\"\n"
+                            "3:      xor %0,%0\n"
+                            "        jmp 2b\n"
+                            "        .previous\n"
+                            _ASM_EXTABLE(1b,3b)
+                            : "+a" (has_nopl));
+               if (has_nopl == nopl_signature)
+                       set_cpu_cap(c, X86_FEATURE_NOPL);
+       }
+ }
  static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
  
  void __init early_cpu_init(void)
@@@ -313,6 -347,8 +347,8 @@@ static void __cpuinit early_identify_cp
                c->x86_phys_bits = eax & 0xff;
        }
  
+       detect_nopl(c);
        if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
            cpu_devs[c->x86_vendor]->c_early_init)
                cpu_devs[c->x86_vendor]->c_early_init(c);
@@@ -597,8 -633,6 +633,8 @@@ void __cpuinit cpu_init(void
        barrier();
  
        check_efer();
 +      if (cpu != 0 && x2apic)
 +              enable_x2apic();
  
        /*
         * set up and load the per-CPU TSS
index 0bf4d37a04833e5b5532fa497453eb6410938130,c9017799497c5f99db3df1ce96d010b6ea5e3230..b96b69545fbf8f3324cf2124425ce3446a787d82
@@@ -39,13 -39,14 +39,14 @@@ const char * const x86_cap_flags[NCAPIN
        NULL, NULL, NULL, NULL,
        "constant_tsc", "up", NULL, "arch_perfmon",
        "pebs", "bts", NULL, NULL,
-       "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+       "rep_good", NULL, NULL, NULL,
+       "nopl", NULL, NULL, NULL,
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  
        /* Intel-defined (#2) */
        "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
        "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
 -      NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
 +      NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt",
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  
        /* VIA/Cyrix/Centaur-defined */
index 39e063a9a28adee09a11d93fdbfea858d2b48de9,09cddb57bec45e9ddd60ba376b3a70aca647ad8d..923be1f4a6d0d0d36c9a75e244fb995c4c1e6944
@@@ -57,7 -57,7 +57,7 @@@ atomic_t irq_mis_count
  static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
  
  static DEFINE_SPINLOCK(ioapic_lock);
static DEFINE_SPINLOCK(vector_lock);
+ DEFINE_SPINLOCK(vector_lock);
  
  int timer_through_8259 __initdata;
  
@@@ -1209,10 -1209,6 +1209,6 @@@ static int assign_irq_vector(int irq
        return vector;
  }
  
- void setup_vector_irq(int cpu)
- {
- }
  static struct irq_chip ioapic_chip;
  
  #define IOAPIC_AUTO   -1
@@@ -1486,7 -1482,6 +1482,7 @@@ static void print_APIC_bitfield(int bas
  void /*__init*/ print_local_APIC(void *dummy)
  {
        unsigned int v, ver, maxlvt;
 +      u64 icr;
  
        if (apic_verbosity == APIC_QUIET)
                return;
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
        printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
 -                      GET_APIC_ID(read_apic_id()));
 +                      GET_APIC_ID(v));
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
                printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
        }
  
 -      v = apic_read(APIC_ICR);
 -      printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
 -      v = apic_read(APIC_ICR2);
 -      printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
 +      icr = apic_icr_read();
 +      printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
 +      printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
  
        v = apic_read(APIC_LVTT);
        printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@@ -1702,7 -1698,8 +1698,7 @@@ void disable_IO_APIC(void
                entry.dest_mode       = 0; /* Physical */
                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                entry.vector          = 0;
 -              entry.dest.physical.physical_dest =
 -                                      GET_APIC_ID(read_apic_id());
 +              entry.dest.physical.physical_dest = read_apic_id();
  
                /*
                 * Add it to the IO-APIC irq-routing table:
index b9950dae59b7f7ddc961c3d36669ae50af33eaf2,61a83b70c18fcc65ce60b965ee3a6e0456622dc4..e63282e788640402828dc17cf641a9a8c71c003a
@@@ -37,7 -37,6 +37,7 @@@
  #include <acpi/acpi_bus.h>
  #endif
  #include <linux/bootmem.h>
 +#include <linux/dmar.h>
  
  #include <asm/idle.h>
  #include <asm/io.h>
@@@ -50,7 -49,6 +50,7 @@@
  #include <asm/nmi.h>
  #include <asm/msidef.h>
  #include <asm/hypertransport.h>
 +#include <asm/irq_remapping.h>
  
  #include <mach_ipi.h>
  #include <mach_apic.h>
@@@ -103,16 -101,13 +103,16 @@@ int timer_through_8259 __initdata
  static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
  
  static DEFINE_SPINLOCK(ioapic_lock);
- DEFINE_SPINLOCK(vector_lock);
static DEFINE_SPINLOCK(vector_lock);
  
  /*
   * # of IRQ routing registers
   */
  int nr_ioapic_registers[MAX_IO_APICS];
  
 +/* I/O APIC RTE contents at the OS boot up */
 +struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
 +
  /* I/O APIC entries */
  struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
  int nr_ioapics;
@@@ -308,12 -303,7 +308,12 @@@ static void __target_IO_APIC_irq(unsign
                pin = entry->pin;
                if (pin == -1)
                        break;
 -              io_apic_write(apic, 0x11 + pin*2, dest);
 +              /*
 +               * With interrupt-remapping, destination information comes
 +               * from interrupt-remapping table entry.
 +               */
 +              if (!irq_remapped(irq))
 +                      io_apic_write(apic, 0x11 + pin*2, dest);
                reg = io_apic_read(apic, 0x10 + pin*2);
                reg &= ~IO_APIC_REDIR_VECTOR_MASK;
                reg |= vector;
@@@ -450,69 -440,6 +450,69 @@@ static void clear_IO_APIC (void
                        clear_IO_APIC_pin(apic, pin);
  }
  
 +/*
 + * Saves and masks all the unmasked IO-APIC RTE's
 + */
 +int save_mask_IO_APIC_setup(void)
 +{
 +      union IO_APIC_reg_01 reg_01;
 +      unsigned long flags;
 +      int apic, pin;
 +
 +      /*
 +       * The number of IO-APIC IRQ registers (== #pins):
 +       */
 +      for (apic = 0; apic < nr_ioapics; apic++) {
 +              spin_lock_irqsave(&ioapic_lock, flags);
 +              reg_01.raw = io_apic_read(apic, 1);
 +              spin_unlock_irqrestore(&ioapic_lock, flags);
 +              nr_ioapic_registers[apic] = reg_01.bits.entries+1;
 +      }
 +
 +      for (apic = 0; apic < nr_ioapics; apic++) {
 +              early_ioapic_entries[apic] =
 +                      kzalloc(sizeof(struct IO_APIC_route_entry) *
 +                              nr_ioapic_registers[apic], GFP_KERNEL);
 +              if (!early_ioapic_entries[apic])
 +                      return -ENOMEM;
 +      }
 +
 +      for (apic = 0; apic < nr_ioapics; apic++)
 +              for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 +                      struct IO_APIC_route_entry entry;
 +
 +                      entry = early_ioapic_entries[apic][pin] =
 +                              ioapic_read_entry(apic, pin);
 +                      if (!entry.mask) {
 +                              entry.mask = 1;
 +                              ioapic_write_entry(apic, pin, entry);
 +                      }
 +              }
 +      return 0;
 +}
 +
 +void restore_IO_APIC_setup(void)
 +{
 +      int apic, pin;
 +
 +      for (apic = 0; apic < nr_ioapics; apic++)
 +              for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
 +                      ioapic_write_entry(apic, pin,
 +                                         early_ioapic_entries[apic][pin]);
 +}
 +
 +void reinit_intr_remapped_IO_APIC(int intr_remapping)
 +{
 +      /*
 +       * for now plain restore of previous settings.
 +       * TBD: In the case of OS enabling interrupt-remapping,
 +       * IO-APIC RTE's need to be setup to point to interrupt-remapping
 +       * table entries. for now, do a plain restore, and wait for
 +       * the setup_IO_APIC_irqs() to do proper initialization.
 +       */
 +      restore_IO_APIC_setup();
 +}
 +
  int skip_ioapic_setup;
  int ioapic_force;
  
@@@ -770,6 -697,19 +770,19 @@@ static int pin_2_irq(int idx, int apic
        return irq;
  }
  
+ void lock_vector_lock(void)
+ {
+       /* Used to the online set of cpus does not change
+        * during assign_irq_vector.
+        */
+       spin_lock(&vector_lock);
+ }
+ void unlock_vector_lock(void)
+ {
+       spin_unlock(&vector_lock);
+ }
  static int __assign_irq_vector(int irq, cpumask_t mask)
  {
        /*
@@@ -875,7 -815,7 +888,7 @@@ static void __clear_irq_vector(int irq
        cpus_clear(cfg->domain);
  }
  
static void __setup_vector_irq(int cpu)
+ void __setup_vector_irq(int cpu)
  {
        /* Initialize vector_irq on a new cpu */
        /* This function must be called with vector_lock held */
        }
  }
  
- void setup_vector_irq(int cpu)
- {
-       spin_lock(&vector_lock);
-       __setup_vector_irq(smp_processor_id());
-       spin_unlock(&vector_lock);
- }
  static struct irq_chip ioapic_chip;
 +#ifdef CONFIG_INTR_REMAP
 +static struct irq_chip ir_ioapic_chip;
 +#endif
  
  static void ioapic_register_intr(int irq, unsigned long trigger)
  {
 -      if (trigger) {
 +      if (trigger)
                irq_desc[irq].status |= IRQ_LEVEL;
 -              set_irq_chip_and_handler_name(irq, &ioapic_chip,
 -                                            handle_fasteoi_irq, "fasteoi");
 -      } else {
 +      else
                irq_desc[irq].status &= ~IRQ_LEVEL;
 +
 +#ifdef CONFIG_INTR_REMAP
 +      if (irq_remapped(irq)) {
 +              irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
 +              if (trigger)
 +                      set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
 +                                                    handle_fasteoi_irq,
 +                                                   "fasteoi");
 +              else
 +                      set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
 +                                                    handle_edge_irq, "edge");
 +              return;
 +      }
 +#endif
 +      if (trigger)
 +              set_irq_chip_and_handler_name(irq, &ioapic_chip,
 +                                            handle_fasteoi_irq,
 +                                            "fasteoi");
 +      else
                set_irq_chip_and_handler_name(irq, &ioapic_chip,
                                              handle_edge_irq, "edge");
 +}
 +
 +static int setup_ioapic_entry(int apic, int irq,
 +                            struct IO_APIC_route_entry *entry,
 +                            unsigned int destination, int trigger,
 +                            int polarity, int vector)
 +{
 +      /*
 +       * add it to the IO-APIC irq-routing table:
 +       */
 +      memset(entry,0,sizeof(*entry));
 +
 +#ifdef CONFIG_INTR_REMAP
 +      if (intr_remapping_enabled) {
 +              struct intel_iommu *iommu = map_ioapic_to_ir(apic);
 +              struct irte irte;
 +              struct IR_IO_APIC_route_entry *ir_entry =
 +                      (struct IR_IO_APIC_route_entry *) entry;
 +              int index;
 +
 +              if (!iommu)
 +                      panic("No mapping iommu for ioapic %d\n", apic);
 +
 +              index = alloc_irte(iommu, irq, 1);
 +              if (index < 0)
 +                      panic("Failed to allocate IRTE for ioapic %d\n", apic);
 +
 +              memset(&irte, 0, sizeof(irte));
 +
 +              irte.present = 1;
 +              irte.dst_mode = INT_DEST_MODE;
 +              irte.trigger_mode = trigger;
 +              irte.dlvry_mode = INT_DELIVERY_MODE;
 +              irte.vector = vector;
 +              irte.dest_id = IRTE_DEST(destination);
 +
 +              modify_irte(irq, &irte);
 +
 +              ir_entry->index2 = (index >> 15) & 0x1;
 +              ir_entry->zero = 0;
 +              ir_entry->format = 1;
 +              ir_entry->index = (index & 0x7fff);
 +      } else
 +#endif
 +      {
 +              entry->delivery_mode = INT_DELIVERY_MODE;
 +              entry->dest_mode = INT_DEST_MODE;
 +              entry->dest = destination;
        }
 +
 +      entry->mask = 0;                                /* enable IRQ */
 +      entry->trigger = trigger;
 +      entry->polarity = polarity;
 +      entry->vector = vector;
 +
 +      /* Mask level triggered irqs.
 +       * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
 +       */
 +      if (trigger)
 +              entry->mask = 1;
 +      return 0;
  }
  
  static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
                    irq, trigger, polarity);
  
 -      /*
 -       * add it to the IO-APIC irq-routing table:
 -       */
 -      memset(&entry,0,sizeof(entry));
 -
 -      entry.delivery_mode = INT_DELIVERY_MODE;
 -      entry.dest_mode = INT_DEST_MODE;
 -      entry.dest = cpu_mask_to_apicid(mask);
 -      entry.mask = 0;                         /* enable IRQ */
 -      entry.trigger = trigger;
 -      entry.polarity = polarity;
 -      entry.vector = cfg->vector;
  
 -      /* Mask level triggered irqs.
 -       * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
 -       */
 -      if (trigger)
 -              entry.mask = 1;
 +      if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
 +                             cpu_mask_to_apicid(mask), trigger, polarity,
 +                             cfg->vector)) {
 +              printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 +                     mp_ioapics[apic].mp_apicid, pin);
 +              __clear_irq_vector(irq);
 +              return;
 +      }
  
        ioapic_register_intr(irq, trigger);
        if (irq < 16)
@@@ -1083,9 -944,6 +1088,9 @@@ static void __init setup_timer_IRQ0_pin
  {
        struct IO_APIC_route_entry entry;
  
 +      if (intr_remapping_enabled)
 +              return;
 +
        memset(&entry, 0, sizeof(entry));
  
        /*
@@@ -1232,7 -1090,6 +1237,7 @@@ static __apicdebuginit void print_APIC_
  void __apicdebuginit print_local_APIC(void * dummy)
  {
        unsigned int v, ver, maxlvt;
 +      unsigned long icr;
  
        if (apic_verbosity == APIC_QUIET)
                return;
        printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
 -      printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
 +      printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
        v = apic_read(APIC_ESR);
        printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
  
 -      v = apic_read(APIC_ICR);
 -      printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
 -      v = apic_read(APIC_ICR2);
 -      printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
 +      icr = apic_icr_read();
 +      printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
 +      printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
  
        v = apic_read(APIC_LVTT);
        printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@@ -1433,7 -1291,7 +1438,7 @@@ void disable_IO_APIC(void
                entry.dest_mode       = 0; /* Physical */
                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                entry.vector          = 0;
 -              entry.dest          = GET_APIC_ID(read_apic_id());
 +              entry.dest            = read_apic_id();
  
                /*
                 * Add it to the IO-APIC irq-routing table:
@@@ -1539,147 -1397,6 +1544,147 @@@ static int ioapic_retrigger_irq(unsigne
   */
  
  #ifdef CONFIG_SMP
 +
 +#ifdef CONFIG_INTR_REMAP
 +static void ir_irq_migration(struct work_struct *work);
 +
 +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
 +
 +/*
 + * Migrate the IO-APIC irq in the presence of intr-remapping.
 + *
 + * For edge triggered, irq migration is a simple atomic update(of vector
 + * and cpu destination) of IRTE and flush the hardware cache.
 + *
 + * For level triggered, we need to modify the io-apic RTE aswell with the update
 + * vector information, along with modifying IRTE with vector and destination.
 + * So irq migration for level triggered is little  bit more complex compared to
 + * edge triggered migration. But the good news is, we use the same algorithm
 + * for level triggered migration as we have today, only difference being,
 + * we now initiate the irq migration from process context instead of the
 + * interrupt context.
 + *
 + * In future, when we do a directed EOI (combined with cpu EOI broadcast
 + * suppression) to the IO-APIC, level triggered irq migration will also be
 + * as simple as edge triggered migration and we can do the irq migration
 + * with a simple atomic update to IO-APIC RTE.
 + */
 +static void migrate_ioapic_irq(int irq, cpumask_t mask)
 +{
 +      struct irq_cfg *cfg = irq_cfg + irq;
 +      struct irq_desc *desc = irq_desc + irq;
 +      cpumask_t tmp, cleanup_mask;
 +      struct irte irte;
 +      int modify_ioapic_rte = desc->status & IRQ_LEVEL;
 +      unsigned int dest;
 +      unsigned long flags;
 +
 +      cpus_and(tmp, mask, cpu_online_map);
 +      if (cpus_empty(tmp))
 +              return;
 +
 +      if (get_irte(irq, &irte))
 +              return;
 +
 +      if (assign_irq_vector(irq, mask))
 +              return;
 +
 +      cpus_and(tmp, cfg->domain, mask);
 +      dest = cpu_mask_to_apicid(tmp);
 +
 +      if (modify_ioapic_rte) {
 +              spin_lock_irqsave(&ioapic_lock, flags);
 +              __target_IO_APIC_irq(irq, dest, cfg->vector);
 +              spin_unlock_irqrestore(&ioapic_lock, flags);
 +      }
 +
 +      irte.vector = cfg->vector;
 +      irte.dest_id = IRTE_DEST(dest);
 +
 +      /*
 +       * Modified the IRTE and flushes the Interrupt entry cache.
 +       */
 +      modify_irte(irq, &irte);
 +
 +      if (cfg->move_in_progress) {
 +              cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 +              cfg->move_cleanup_count = cpus_weight(cleanup_mask);
 +              send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 +              cfg->move_in_progress = 0;
 +      }
 +
 +      irq_desc[irq].affinity = mask;
 +}
 +
 +static int migrate_irq_remapped_level(int irq)
 +{
 +      int ret = -1;
 +
 +      mask_IO_APIC_irq(irq);
 +
 +      if (io_apic_level_ack_pending(irq)) {
 +              /*
 +               * Interrupt in progress. Migrating irq now will change the
 +               * vector information in the IO-APIC RTE and that will confuse
 +               * the EOI broadcast performed by cpu.
 +               * So, delay the irq migration to the next instance.
 +               */
 +              schedule_delayed_work(&ir_migration_work, 1);
 +              goto unmask;
 +      }
 +
 +      /* everthing is clear. we have right of way */
 +      migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
 +
 +      ret = 0;
 +      irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
 +      cpus_clear(irq_desc[irq].pending_mask);
 +
 +unmask:
 +      unmask_IO_APIC_irq(irq);
 +      return ret;
 +}
 +
 +static void ir_irq_migration(struct work_struct *work)
 +{
 +      int irq;
 +
 +      for (irq = 0; irq < NR_IRQS; irq++) {
 +              struct irq_desc *desc = irq_desc + irq;
 +              if (desc->status & IRQ_MOVE_PENDING) {
 +                      unsigned long flags;
 +
 +                      spin_lock_irqsave(&desc->lock, flags);
 +                      if (!desc->chip->set_affinity ||
 +                          !(desc->status & IRQ_MOVE_PENDING)) {
 +                              desc->status &= ~IRQ_MOVE_PENDING;
 +                              spin_unlock_irqrestore(&desc->lock, flags);
 +                              continue;
 +                      }
 +
 +                      desc->chip->set_affinity(irq,
 +                                               irq_desc[irq].pending_mask);
 +                      spin_unlock_irqrestore(&desc->lock, flags);
 +              }
 +      }
 +}
 +
 +/*
 + * Migrates the IRQ destination in the process context.
 + */
 +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 +{
 +      if (irq_desc[irq].status & IRQ_LEVEL) {
 +              irq_desc[irq].status |= IRQ_MOVE_PENDING;
 +              irq_desc[irq].pending_mask = mask;
 +              migrate_irq_remapped_level(irq);
 +              return;
 +      }
 +
 +      migrate_ioapic_irq(irq, mask);
 +}
 +#endif
 +
  asmlinkage void smp_irq_move_cleanup_interrupt(void)
  {
        unsigned vector, me;
@@@ -1736,17 -1453,6 +1741,17 @@@ static void irq_complete_move(unsigned 
  #else
  static inline void irq_complete_move(unsigned int irq) {}
  #endif
 +#ifdef CONFIG_INTR_REMAP
 +static void ack_x2apic_level(unsigned int irq)
 +{
 +      ack_x2APIC_irq();
 +}
 +
 +static void ack_x2apic_edge(unsigned int irq)
 +{
 +      ack_x2APIC_irq();
 +}
 +#endif
  
  static void ack_apic_edge(unsigned int irq)
  {
@@@ -1821,21 -1527,6 +1826,21 @@@ static struct irq_chip ioapic_chip __re
        .retrigger      = ioapic_retrigger_irq,
  };
  
 +#ifdef CONFIG_INTR_REMAP
 +static struct irq_chip ir_ioapic_chip __read_mostly = {
 +      .name           = "IR-IO-APIC",
 +      .startup        = startup_ioapic_irq,
 +      .mask           = mask_IO_APIC_irq,
 +      .unmask         = unmask_IO_APIC_irq,
 +      .ack            = ack_x2apic_edge,
 +      .eoi            = ack_x2apic_level,
 +#ifdef CONFIG_SMP
 +      .set_affinity   = set_ir_ioapic_affinity_irq,
 +#endif
 +      .retrigger      = ioapic_retrigger_irq,
 +};
 +#endif
 +
  static inline void init_IO_APIC_traps(void)
  {
        int irq;
@@@ -2021,8 -1712,6 +2026,8 @@@ static inline void __init check_timer(v
         * 8259A.
         */
        if (pin1 == -1) {
 +              if (intr_remapping_enabled)
 +                      panic("BIOS bug: timer not connected to IO-APIC");
                pin1 = pin2;
                apic1 = apic2;
                no_pin1 = 1;
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
                }
 +              if (intr_remapping_enabled)
 +                      panic("timer doesn't work through Interrupt-remapped IO-APIC");
                clear_IO_APIC_pin(apic1, pin1);
                if (!no_pin1)
                        apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@@ -2290,9 -1977,6 +2295,9 @@@ void destroy_irq(unsigned int irq
  
        dynamic_irq_cleanup(irq);
  
 +#ifdef CONFIG_INTR_REMAP
 +      free_irte(irq);
 +#endif
        spin_lock_irqsave(&vector_lock, flags);
        __clear_irq_vector(irq);
        spin_unlock_irqrestore(&vector_lock, flags);
@@@ -2311,41 -1995,10 +2316,41 @@@ static int msi_compose_msg(struct pci_d
  
        tmp = TARGET_CPUS;
        err = assign_irq_vector(irq, tmp);
 -      if (!err) {
 -              cpus_and(tmp, cfg->domain, tmp);
 -              dest = cpu_mask_to_apicid(tmp);
 +      if (err)
 +              return err;
 +
 +      cpus_and(tmp, cfg->domain, tmp);
 +      dest = cpu_mask_to_apicid(tmp);
 +
 +#ifdef CONFIG_INTR_REMAP
 +      if (irq_remapped(irq)) {
 +              struct irte irte;
 +              int ir_index;
 +              u16 sub_handle;
 +
 +              ir_index = map_irq_to_irte_handle(irq, &sub_handle);
 +              BUG_ON(ir_index == -1);
 +
 +              memset (&irte, 0, sizeof(irte));
 +
 +              irte.present = 1;
 +              irte.dst_mode = INT_DEST_MODE;
 +              irte.trigger_mode = 0; /* edge */
 +              irte.dlvry_mode = INT_DELIVERY_MODE;
 +              irte.vector = cfg->vector;
 +              irte.dest_id = IRTE_DEST(dest);
 +
 +              modify_irte(irq, &irte);
  
 +              msg->address_hi = MSI_ADDR_BASE_HI;
 +              msg->data = sub_handle;
 +              msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
 +                                MSI_ADDR_IR_SHV |
 +                                MSI_ADDR_IR_INDEX1(ir_index) |
 +                                MSI_ADDR_IR_INDEX2(ir_index);
 +      } else
 +#endif
 +      {
                msg->address_hi = MSI_ADDR_BASE_HI;
                msg->address_lo =
                        MSI_ADDR_BASE_LO |
@@@ -2396,55 -2049,6 +2401,55 @@@ static void set_msi_irq_affinity(unsign
        write_msi_msg(irq, &msg);
        irq_desc[irq].affinity = mask;
  }
 +
 +#ifdef CONFIG_INTR_REMAP
 +/*
 + * Migrate the MSI irq to another cpumask. This migration is
 + * done in the process context using interrupt-remapping hardware.
 + */
 +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 +{
 +      struct irq_cfg *cfg = irq_cfg + irq;
 +      unsigned int dest;
 +      cpumask_t tmp, cleanup_mask;
 +      struct irte irte;
 +
 +      cpus_and(tmp, mask, cpu_online_map);
 +      if (cpus_empty(tmp))
 +              return;
 +
 +      if (get_irte(irq, &irte))
 +              return;
 +
 +      if (assign_irq_vector(irq, mask))
 +              return;
 +
 +      cpus_and(tmp, cfg->domain, mask);
 +      dest = cpu_mask_to_apicid(tmp);
 +
 +      irte.vector = cfg->vector;
 +      irte.dest_id = IRTE_DEST(dest);
 +
 +      /*
 +       * atomically update the IRTE with the new destination and vector.
 +       */
 +      modify_irte(irq, &irte);
 +
 +      /*
 +       * After this point, all the interrupts will start arriving
 +       * at the new destination. So, time to cleanup the previous
 +       * vector allocation.
 +       */
 +      if (cfg->move_in_progress) {
 +              cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 +              cfg->move_cleanup_count = cpus_weight(cleanup_mask);
 +              send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 +              cfg->move_in_progress = 0;
 +      }
 +
 +      irq_desc[irq].affinity = mask;
 +}
 +#endif
  #endif /* CONFIG_SMP */
  
  /*
@@@ -2462,157 -2066,26 +2467,157 @@@ static struct irq_chip msi_chip = 
        .retrigger      = ioapic_retrigger_irq,
  };
  
 -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 +#ifdef CONFIG_INTR_REMAP
 +static struct irq_chip msi_ir_chip = {
 +      .name           = "IR-PCI-MSI",
 +      .unmask         = unmask_msi_irq,
 +      .mask           = mask_msi_irq,
 +      .ack            = ack_x2apic_edge,
 +#ifdef CONFIG_SMP
 +      .set_affinity   = ir_set_msi_irq_affinity,
 +#endif
 +      .retrigger      = ioapic_retrigger_irq,
 +};
 +
 +/*
 + * Map the PCI dev to the corresponding remapping hardware unit
 + * and allocate 'nvec' consecutive interrupt-remapping table entries
 + * in it.
 + */
 +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
  {
 +      struct intel_iommu *iommu;
 +      int index;
 +
 +      iommu = map_dev_to_ir(dev);
 +      if (!iommu) {
 +              printk(KERN_ERR
 +                     "Unable to map PCI %s to iommu\n", pci_name(dev));
 +              return -ENOENT;
 +      }
 +
 +      index = alloc_irte(iommu, irq, nvec);
 +      if (index < 0) {
 +              printk(KERN_ERR
 +                     "Unable to allocate %d IRTE for PCI %s\n", nvec,
 +                      pci_name(dev));
 +              return -ENOSPC;
 +      }
 +      return index;
 +}
 +#endif
 +
 +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 +{
 +      int ret;
        struct msi_msg msg;
 +
 +      ret = msi_compose_msg(dev, irq, &msg);
 +      if (ret < 0)
 +              return ret;
 +
 +      set_irq_msi(irq, desc);
 +      write_msi_msg(irq, &msg);
 +
 +#ifdef CONFIG_INTR_REMAP
 +      if (irq_remapped(irq)) {
 +              struct irq_desc *desc = irq_desc + irq;
 +              /*
 +               * irq migration in process context
 +               */
 +              desc->status |= IRQ_MOVE_PCNTXT;
 +              set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
 +      } else
 +#endif
 +              set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 +
 +      return 0;
 +}
 +
 +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 +{
        int irq, ret;
 +
        irq = create_irq();
        if (irq < 0)
                return irq;
  
 -      ret = msi_compose_msg(dev, irq, &msg);
 +#ifdef CONFIG_INTR_REMAP
 +      if (!intr_remapping_enabled)
 +              goto no_ir;
 +
 +      ret = msi_alloc_irte(dev, irq, 1);
 +      if (ret < 0)
 +              goto error;
 +no_ir:
 +#endif
 +      ret = setup_msi_irq(dev, desc, irq);
        if (ret < 0) {
                destroy_irq(irq);
                return ret;
        }
 +      return 0;
  
 -      set_irq_msi(irq, desc);
 -      write_msi_msg(irq, &msg);
 +#ifdef CONFIG_INTR_REMAP
 +error:
 +      destroy_irq(irq);
 +      return ret;
 +#endif
 +}
  
 -      set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 +{
 +      int irq, ret, sub_handle;
 +      struct msi_desc *desc;
 +#ifdef CONFIG_INTR_REMAP
 +      struct intel_iommu *iommu = 0;
 +      int index = 0;
 +#endif
 +
 +      sub_handle = 0;
 +      list_for_each_entry(desc, &dev->msi_list, list) {
 +              irq = create_irq();
 +              if (irq < 0)
 +                      return irq;
 +#ifdef CONFIG_INTR_REMAP
 +              if (!intr_remapping_enabled)
 +                      goto no_ir;
  
 +              if (!sub_handle) {
 +                      /*
 +                       * allocate the consecutive block of IRTE's
 +                       * for 'nvec'
 +                       */
 +                      index = msi_alloc_irte(dev, irq, nvec);
 +                      if (index < 0) {
 +                              ret = index;
 +                              goto error;
 +                      }
 +              } else {
 +                      iommu = map_dev_to_ir(dev);
 +                      if (!iommu) {
 +                              ret = -ENOENT;
 +                              goto error;
 +                      }
 +                      /*
 +                       * setup the mapping between the irq and the IRTE
 +                       * base index, the sub_handle pointing to the
 +                       * appropriate interrupt remap table entry.
 +                       */
 +                      set_irte_irq(irq, iommu, index, sub_handle);
 +              }
 +no_ir:
 +#endif
 +              ret = setup_msi_irq(dev, desc, irq);
 +              if (ret < 0)
 +                      goto error;
 +              sub_handle++;
 +      }
        return 0;
 +
 +error:
 +      destroy_irq(irq);
 +      return ret;
  }
  
  void arch_teardown_msi_irq(unsigned int irq)
@@@ -2860,10 -2333,6 +2865,10 @@@ void __init setup_ioapic_dest(void
                                setup_IO_APIC_irq(ioapic, pin, irq,
                                                  irq_trigger(irq_entry),
                                                  irq_polarity(irq_entry));
 +#ifdef CONFIG_INTR_REMAP
 +                      else if (intr_remapping_enabled)
 +                              set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
 +#endif
                        else
                                set_ioapic_affinity_irq(irq, TARGET_CPUS);
                }
index e362c6ab4d3596038e3ab30d30253a47168f487e,678090508a6240996aee5eedbdd5774cc86ab629..e5d23675bb7cc9acb106e323a82724f1e5b40409
@@@ -83,7 -83,7 +83,7 @@@ static void __init MP_bus_info(struct m
        if (x86_quirks->mpc_oem_bus_info)
                x86_quirks->mpc_oem_bus_info(m, str);
        else
-               printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
+               apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
  
  #if MAX_MP_BUSSES < 256
        if (m->mpc_busid >= MAX_MP_BUSSES) {
@@@ -154,7 -154,7 +154,7 @@@ static void __init MP_ioapic_info(struc
  
  static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
  {
-       printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+       apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
                " IRQ %02x, APIC ID %x, APIC INT %02x\n",
                m->mpc_irqtype, m->mpc_irqflag & 3,
                (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
  
  static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
  {
-       printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+       apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
                " IRQ %02x, APIC ID %x, APIC INT %02x\n",
                mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
                (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
@@@ -235,7 -235,7 +235,7 @@@ static void __init MP_intsrc_info(struc
  
  static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
  {
-       printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x,"
+       apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
                " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
                m->mpc_irqtype, m->mpc_irqflag & 3,
                (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@@ -397,9 -397,7 +397,9 @@@ static int __init smp_read_mpc(struct m
         generic_bigsmp_probe();
  #endif
  
 +#ifdef CONFIG_X86_32
        setup_apic_routing();
 +#endif
        if (!num_processors)
                printk(KERN_ERR "MPTABLE: no processors registered!\n");
        return num_processors;
@@@ -697,7 -695,8 +697,8 @@@ static int __init smp_scan_config(unsig
        unsigned int *bp = phys_to_virt(base);
        struct intel_mp_floating *mpf;
  
-       printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length);
+       apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
+                       bp, length);
        BUILD_BUG_ON(sizeof(*mpf) != 16);
  
        while (length > 0) {
diff --combined arch/x86/kernel/setup.c
index 60e8de19644b0aae3e2763dfa6c08fe3275053dc,68b48e3fbcbd92b7115ac90cb59cd9c1d3bc4054..59f07e14d0830a01a7098e337dc2ab554aa76207
@@@ -604,6 -604,14 +604,14 @@@ void __init setup_arch(char **cmdline_p
        early_cpu_init();
        early_ioremap_init();
  
+ #if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
+       /*
+        * Must be before kernel pagetables are setup
+        * or fixmap area is touched.
+        */
+       vmi_init();
+ #endif
        ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
        screen_info = boot_params.screen_info;
        edid_info = boot_params.edid_info;
        num_physpages = max_pfn;
  
        check_efer();
 +      if (cpu_has_x2apic)
 +              check_x2apic();
  
        /* How many end-of-memory variables you have, grandma! */
        /* need this before calling reserve_initrd */
        kvmclock_init();
  #endif
  
- #if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
-       /*
-        * Must be after max_low_pfn is determined, and before kernel
-        * pagetables are setup.
-        */
-       vmi_init();
- #endif
        paravirt_pagetable_setup_start(swapper_pg_dir);
        paging_init();
        paravirt_pagetable_setup_done(swapper_pg_dir);
        init_apic_mappings();
        ioapic_init_mappings();
  
- #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
-       if (def_to_bigsmp)
-               printk(KERN_WARNING "More than 8 CPUs detected and "
-                       "CONFIG_X86_PC cannot handle it.\nUse "
-                       "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
- #endif
        kvm_guest_init();
  
        e820_reserve_resources();
index 626618bf2f819f378053ba4be9476c0bf9a70903,91055d7fc1b0444d3e1446181216a06db2ce6978..04f78ab51b45a8a7121c82e960c133a2d6ddc81c
@@@ -123,6 -123,7 +123,6 @@@ EXPORT_PER_CPU_SYMBOL(cpu_info)
  
  static atomic_t init_deasserted;
  
 -static int boot_cpu_logical_apicid;
  
  /* representing cpus for which sibling maps can be computed */
  static cpumask_t cpu_sibling_setup_map;
@@@ -164,8 -165,6 +164,8 @@@ static void unmap_cpu_to_node(int cpu
  #endif
  
  #ifdef CONFIG_X86_32
 +static int boot_cpu_logical_apicid;
 +
  u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
                                        { [0 ... NR_CPUS-1] = BAD_APICID };
  
@@@ -211,7 -210,7 +211,7 @@@ static void __cpuinit smp_callin(void
        /*
         * (This works even if the APIC is not enabled.)
         */
 -      phys_id = GET_APIC_ID(read_apic_id());
 +      phys_id = read_apic_id();
        cpuid = smp_processor_id();
        if (cpu_isset(cpuid, cpu_callin_map)) {
                panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@@ -327,12 -326,16 +327,16 @@@ static void __cpuinit start_secondary(v
         * for which cpus receive the IPI. Holding this
         * lock helps us to not include this cpu in a currently in progress
         * smp_call_function().
+        *
+        * We need to hold vector_lock so there the set of online cpus
+        * does not change while we are assigning vectors to cpus.  Holding
+        * this lock ensures we don't half assign or remove an irq from a cpu.
         */
        ipi_call_lock_irq();
- #ifdef CONFIG_X86_IO_APIC
-       setup_vector_irq(smp_processor_id());
- #endif
+       lock_vector_lock();
+       __setup_vector_irq(smp_processor_id());
        cpu_set(smp_processor_id(), cpu_online_map);
+       unlock_vector_lock();
        ipi_call_unlock_irq();
        per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
  
@@@ -547,7 -550,8 +551,7 @@@ static inline void __inquire_remote_api
                        printk(KERN_CONT
                               "a previous APIC delivery may have failed\n");
  
 -              apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
 -              apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
 +              apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
  
                timeout = 0;
                do {
@@@ -579,9 -583,11 +583,9 @@@ wakeup_secondary_cpu(int logical_apicid
        int maxlvt;
  
        /* Target chip */
 -      apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
 -
        /* Boot on the stack */
        /* Kick the second */
 -      apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
 +      apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
  
        pr_debug("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
@@@ -634,11 -640,13 +638,11 @@@ wakeup_secondary_cpu(int phys_apicid, u
        /*
         * Turn INIT on target chip
         */
 -      apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 -
        /*
         * Send IPI
         */
 -      apic_write(APIC_ICR,
 -                 APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
 +      apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
 +                     phys_apicid);
  
        pr_debug("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
        pr_debug("Deasserting INIT.\n");
  
        /* Target chip */
 -      apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 -
        /* Send IPI */
 -      apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
 +      apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
  
        pr_debug("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
                 */
  
                /* Target chip */
 -              apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 -
                /* Boot on the stack */
                /* Kick the second */
 -              apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
 +              apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
 +                             phys_apicid);
  
                /*
                 * Give the other CPU some time to accept the IPI.
@@@ -983,7 -994,17 +987,17 @@@ int __cpuinit native_cpu_up(unsigned in
        flush_tlb_all();
        low_mappings = 1;
  
+ #ifdef CONFIG_X86_PC
+       if (def_to_bigsmp && apicid > 8) {
+               printk(KERN_WARNING
+                       "More than 8 CPUs detected - skipping them.\n"
+                       "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
+               err = -1;
+       } else
+               err = do_boot_cpu(apicid, cpu);
+ #else
        err = do_boot_cpu(apicid, cpu);
+ #endif
  
        zap_low_mappings();
        low_mappings = 0;
@@@ -1129,17 -1150,10 +1143,17 @@@ void __init native_smp_prepare_cpus(uns
         * Setup boot CPU information
         */
        smp_store_cpu_info(0); /* Final full version of the data */
 +#ifdef CONFIG_X86_32
        boot_cpu_logical_apicid = logical_smp_processor_id();
 +#endif
        current_thread_info()->cpu = 0;  /* needed? */
        set_cpu_sibling_map(0);
  
 +#ifdef CONFIG_X86_64
 +      enable_IR_x2apic();
 +      setup_apic_routing();
 +#endif
 +
        if (smp_sanity_check(max_cpus) < 0) {
                printk(KERN_INFO "SMP disabled\n");
                disable_smp();
        }
  
        preempt_disable();
 -      if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
 +      if (read_apic_id() != boot_cpu_physical_apicid) {
                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
 -                   GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
 +                   read_apic_id(), boot_cpu_physical_apicid);
                /* Or can we switch back to PIC here? */
        }
        preempt_enable();
@@@ -1336,7 -1350,9 +1350,9 @@@ int __cpu_disable(void
        remove_siblinginfo(cpu);
  
        /* It's now safe to remove this processor from the online map */
+       lock_vector_lock();
        remove_cpu_from_maps(cpu);
+       unlock_vector_lock();
        fixup_irqs(cpu_online_map);
        return 0;
  }
diff --combined arch/x86/kernel/vmi_32.c
index 45c27c4e2a6ec03fce1430dd210507b92386bcac,6ca515d6db543b9a7739b9f2f9ab39d181ed3f9b..61531d5c9507e2ce3eea7e177c2e1a7a985cda27
@@@ -37,6 -37,7 +37,7 @@@
  #include <asm/timer.h>
  #include <asm/vmi_time.h>
  #include <asm/kmap_types.h>
+ #include <asm/setup.h>
  
  /* Convenient for calling VMI functions indirectly in the ROM */
  typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@@ -683,7 -684,7 +684,7 @@@ void vmi_bringup(void
  {
        /* We must establish the lowmem mapping for MMU ops to work */
        if (vmi_ops.set_linear_mapping)
-               vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0);
+               vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0);
  }
  
  /*
@@@ -904,8 -905,8 +905,8 @@@ static inline int __init activate_vmi(v
  #endif
  
  #ifdef CONFIG_X86_LOCAL_APIC
 -      para_fill(pv_apic_ops.apic_read, APICRead);
 -      para_fill(pv_apic_ops.apic_write, APICWrite);
 +       para_fill(apic_ops->read, APICRead);
 +       para_fill(apic_ops->write, APICWrite);
  #endif
  
        /*
index 89a7af37e37e567c4a53923ea7e39f1c23062a34,be8b2ad5d411addee3e436ad0043800ef2d37515..5fc4d55906d46461a1aac760df66efc72b9dc06a
  #define X86_FEATURE_UP                (3*32+ 9) /* smp kernel running on up */
  #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
  #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
- #define X86_FEATURE_PEBS      (3*32+12)  /* Precise-Event Based Sampling */
- #define X86_FEATURE_BTS               (3*32+13)  /* Branch Trace Store */
- #define X86_FEATURE_SYSCALL32 (3*32+14)  /* syscall in ia32 userspace */
- #define X86_FEATURE_SYSENTER32        (3*32+15)  /* sysenter in ia32 userspace */
+ #define X86_FEATURE_PEBS      (3*32+12) /* Precise-Event Based Sampling */
+ #define X86_FEATURE_BTS               (3*32+13) /* Branch Trace Store */
+ #define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */
+ #define X86_FEATURE_SYSENTER32        (3*32+15) /* sysenter in ia32 userspace */
  #define X86_FEATURE_REP_GOOD  (3*32+16) /* rep microcode works well on this CPU */
  #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
  #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
- #define X86_FEATURE_11AP      (3*32+19)  /* Bad local APIC aka 11AP */
+ #define X86_FEATURE_11AP      (3*32+19) /* Bad local APIC aka 11AP */
+ #define X86_FEATURE_NOPL      (3*32+20) /* The NOPL (0F 1F) instructions */
  
  /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
  #define X86_FEATURE_XMM3      (4*32+ 0) /* Streaming SIMD Extensions-3 */
@@@ -91,7 -92,6 +92,7 @@@
  #define X86_FEATURE_CX16      (4*32+13) /* CMPXCHG16B */
  #define X86_FEATURE_XTPR      (4*32+14) /* Send Task Priority Messages */
  #define X86_FEATURE_DCA               (4*32+18) /* Direct Cache Access */
 +#define X86_FEATURE_X2APIC    (4*32+21) /* x2APIC */
  
  /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
  #define X86_FEATURE_XSTORE    (5*32+ 2) /* on-CPU RNG present (xstore insn) */
@@@ -190,7 -190,6 +191,7 @@@ extern const char * const x86_power_fla
  #define cpu_has_gbpages               boot_cpu_has(X86_FEATURE_GBPAGES)
  #define cpu_has_arch_perfmon  boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
  #define cpu_has_pat           boot_cpu_has(X86_FEATURE_PAT)
 +#define cpu_has_x2apic                boot_cpu_has(X86_FEATURE_X2APIC)
  
  #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
  # define cpu_has_invlpg               1
diff --combined include/asm-x86/hw_irq.h
index ef7a995ee81fe320a650bd32cddcd5e522d581c5,edd0b95f14d0df6dd9b4f4cfb01d02548d485c9b..6c3651759e44053ce5200ced61477b18b15bd7e3
@@@ -73,9 -73,7 +73,9 @@@ extern void enable_IO_APIC(void)
  #endif
  
  /* IPI functions */
 +#ifdef CONFIG_X86_32
  extern void send_IPI_self(int vector);
 +#endif
  extern void send_IPI(int dest, int vector);
  
  /* Statistics */
@@@ -100,9 -98,17 +100,17 @@@ extern void (*const interrupt[NR_IRQS])
  #else
  typedef int vector_irq_t[NR_VECTORS];
  DECLARE_PER_CPU(vector_irq_t, vector_irq);
- extern spinlock_t vector_lock;
  #endif
- extern void setup_vector_irq(int cpu);
+ #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_X86_64)
+ extern void lock_vector_lock(void);
+ extern void unlock_vector_lock(void);
+ extern void __setup_vector_irq(int cpu);
+ #else
+ static inline void lock_vector_lock(void) {}
+ static inline void unlock_vector_lock(void) {}
+ static inline void __setup_vector_irq(int cpu) {}
+ #endif
  
  #endif /* !ASSEMBLY_ */