]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'linus' into x86/x2apic
authorIngo Molnar <mingo@elte.hu>
Tue, 22 Jul 2008 07:06:21 +0000 (09:06 +0200)
committerIngo Molnar <mingo@elte.hu>
Tue, 22 Jul 2008 07:06:21 +0000 (09:06 +0200)
16 files changed:
1  2 
Documentation/kernel-parameters.txt
arch/x86/Kconfig
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/apic_32.c
arch/x86/kernel/apic_64.c
arch/x86/kernel/cpu/common_64.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/xen/enlighten.c
drivers/pci/intel-iommu.c
include/asm-x86/apic.h
include/asm-x86/paravirt.h
include/asm-x86/smp.h

index 556b4187d016bb7eba08de99d4ffa173f44313b1,30d44b78171a53e5dcc9e3b42d67266f61975a75..7432ba98cfecb266470ae94fa68808ccbcdaff02
@@@ -1206,7 -1206,7 +1206,7 @@@ and is between 256 and 4096 characters
                                 or
                                 memmap=0x10000$0x18690000
  
-       memtest=        [KNL,X86_64] Enable memtest
+       memtest=        [KNL,X86] Enable memtest
                        Format: <integer>
                        range: 0,4 : pattern number
                        default : 0 <disable>
                        This usage is only documented in each driver source
                        file if at all.
  
+       nf_conntrack.acct=
+                       [NETFILTER] Enable connection tracking flow accounting
+                       0 to disable accounting
+                       1 to enable accounting
+                       Default value depends on CONFIG_NF_CT_ACCT that is
+                       going to be removed in 2.6.29.
        nfsaddrs=       [NFS]
                        See Documentation/filesystems/nfsroot.txt.
  
  
        nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
  
 +      nox2apic        [X86-64,APIC] Do not enable x2APIC mode.
 +
 +      x2apic_phys     [X86-64,APIC] Use x2apic physical mode instead of
 +                      default x2apic cluster mode on platforms
 +                      supporting x2apic.
 +
        noltlbs         [PPC] Do not use large page/tlb entries for kernel
                        lowmem mapping on PPC40x.
  
                        Note that genuine overcurrent events won't be
                        reported either.
  
+       unknown_nmi_panic
+                       [X86-32,X86-64]
+                       Set unknown_nmi_panic=1 early on boot.
        usbcore.autosuspend=
                        [USB] The autosuspend time delay (in seconds) used
                        for newly-detected USB devices (default 2).  This
diff --combined arch/x86/Kconfig
index baca5545500548d4f1e65c7083d9f5e0b9f89c4c,03980cb042916c6f37bd131c1d43553f51c8ade6..e32960c447a655c97bd639845d4c68e7f6c6ac84
@@@ -447,7 -447,6 +447,6 @@@ config PARAVIRT_DEBU
  
  config MEMTEST
        bool "Memtest"
-       depends on X86_64
        help
          This option adds a kernel parameter 'memtest', which allows memtest
          to be set.
@@@ -1650,14 -1649,6 +1649,14 @@@ config DMAR_FLOPPY_W
         workaround will setup a 1:1 mapping for the first
         16M to make floppy (an ISA device) work.
  
 +config INTR_REMAP
 +      bool "Support for Interrupt Remapping (EXPERIMENTAL)"
 +      depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
 +      help
 +       Supports Interrupt remapping for IO-APIC and MSI devices.
 +       To use x2apic mode in the CPU's which support x2APIC enhancements or
 +       to support platforms with CPU's having > 8 bit APIC ID, say Y.
 +
  source "drivers/pci/pcie/Kconfig"
  
  source "drivers/pci/Kconfig"
diff --combined arch/x86/kernel/Makefile
index ea000467553f21fcdd54334473cbdcf7668ffac2,3db651fc8ec5828468355151fae160ae65b7a267..a07ec14f33122c5531586d15a7e48251f31f9464
@@@ -7,9 -7,10 +7,10 @@@ extra-y                := head_$(BITS).
  CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
  
  ifdef CONFIG_FTRACE
- # Do not profile debug utilities
+ # Do not profile debug and lowlevel utilities
  CFLAGS_REMOVE_tsc.o = -pg
  CFLAGS_REMOVE_rtc.o = -pg
+ CFLAGS_REMOVE_paravirt.o = -pg
  endif
  
  #
@@@ -103,8 -104,6 +104,8 @@@ obj-$(CONFIG_OLPC)         += olpc.
  ifeq ($(CONFIG_X86_64),y)
          obj-y                         += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
        obj-y                           += bios_uv.o
 +        obj-y                         += genx2apic_cluster.o
 +        obj-y                         += genx2apic_phys.o
          obj-$(CONFIG_X86_PM_TIMER)    += pmtimer_64.o
          obj-$(CONFIG_AUDIT)           += audit_64.o
  
index b41b27af33e6cb9cf31e8196397f2ab9f17fc1f4,fa88a1d7129094fcc85b1b2425234a80e2418581..12e260e8fb2a9c35a2434bf13d156187191fc257
@@@ -761,7 -761,7 +761,7 @@@ static void __init acpi_register_lapic_
  
        set_fixmap_nocache(FIX_APIC_BASE, address);
        if (boot_cpu_physical_apicid == -1U) {
 -              boot_cpu_physical_apicid  = GET_APIC_ID(read_apic_id());
 +              boot_cpu_physical_apicid  = read_apic_id();
  #ifdef CONFIG_X86_32
                apic_version[boot_cpu_physical_apicid] =
                         GET_APIC_VERSION(apic_read(APIC_LVR));
@@@ -1021,7 -1021,7 +1021,7 @@@ void __init mp_config_acpi_legacy_irqs(
        mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
  #endif
        set_bit(MP_ISA_BUS, mp_bus_not_pci);
-       Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+       pr_debug("Bus #%d is ISA\n", MP_ISA_BUS);
  
  #ifdef CONFIG_X86_ES7000
        /*
@@@ -1127,8 -1127,8 +1127,8 @@@ int mp_register_gsi(u32 gsi, int trigge
                return gsi;
        }
        if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
-               Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
-                       mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+               pr_debug(KERN_DEBUG "Pin %d-%d already programmed\n",
+                        mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
  #ifdef CONFIG_X86_32
                return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
  #else
@@@ -1337,9 -1337,7 +1337,9 @@@ static void __init acpi_process_madt(vo
                                acpi_ioapic = 1;
  
                                smp_found_config = 1;
 +#ifdef CONFIG_X86_32
                                setup_apic_routing();
 +#endif
                        }
                }
                if (error == -EINVAL) {
index 8728f54a93d8dd4f02e7f40aadbe9daf26401018,d6c8983583713d747790587861318a5fb58eb342..f93c18f5b79dc2e5741fd932a391112eadedf07c
@@@ -145,18 -145,13 +145,18 @@@ static int modern_apic(void
        return lapic_get_version() >= 0x14;
  }
  
 -void apic_wait_icr_idle(void)
 +/*
 + * Paravirt kernels also might be using these below ops. So we still
 + * use generic apic_read()/apic_write(), which might be pointing to different
 + * ops in PARAVIRT case.
 + */
 +void xapic_wait_icr_idle(void)
  {
        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
                cpu_relax();
  }
  
 -u32 safe_apic_wait_icr_idle(void)
 +u32 safe_xapic_wait_icr_idle(void)
  {
        u32 send_status;
        int timeout;
        return send_status;
  }
  
 +void xapic_icr_write(u32 low, u32 id)
 +{
 +      apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
 +      apic_write(APIC_ICR, low);
 +}
 +
 +u64 xapic_icr_read(void)
 +{
 +      u32 icr1, icr2;
 +
 +      icr2 = apic_read(APIC_ICR2);
 +      icr1 = apic_read(APIC_ICR);
 +
 +      return icr1 | ((u64)icr2 << 32);
 +}
 +
 +static struct apic_ops xapic_ops = {
 +      .read = native_apic_mem_read,
 +      .write = native_apic_mem_write,
 +      .icr_read = xapic_icr_read,
 +      .icr_write = xapic_icr_write,
 +      .wait_icr_idle = xapic_wait_icr_idle,
 +      .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
 +};
 +
 +struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 +EXPORT_SYMBOL_GPL(apic_ops);
 +
  /**
   * enable_NMI_through_LVT0 - enable NMI through local vector table 0
   */
@@@ -1238,7 -1205,7 +1238,7 @@@ void __init init_apic_mappings(void
         * default configuration (or the MP table is broken).
         */
        if (boot_cpu_physical_apicid == -1U)
 -              boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 +              boot_cpu_physical_apicid = read_apic_id();
  
  }
  
@@@ -1251,9 -1218,6 +1251,6 @@@ int apic_version[MAX_APICS]
  
  int __init APIC_init_uniprocessor(void)
  {
-       if (disable_apic)
-               clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
        if (!smp_found_config && !cpu_has_apic)
                return -1;
  
         * might be zero if read from MP tables. Get it from LAPIC.
         */
  #ifdef CONFIG_CRASH_DUMP
 -      boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 +      boot_cpu_physical_apicid = read_apic_id();
  #endif
        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
  
@@@ -1737,7 -1701,7 +1734,7 @@@ early_param("lapic", parse_lapic)
  static int __init parse_nolapic(char *arg)
  {
        disable_apic = 1;
-       clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+       setup_clear_cpu_cap(X86_FEATURE_APIC);
        return 0;
  }
  early_param("nolapic", parse_nolapic);
index a850bc63fb1c2578f384383a9831243c5f4fc93a,7f1f030da7ee4c048990eecf9dc00229472028ca..cd63c0bc61802a5415a3f8917af5e02bffd459ed
@@@ -27,7 -27,6 +27,7 @@@
  #include <linux/clockchips.h>
  #include <linux/acpi_pmtmr.h>
  #include <linux/module.h>
 +#include <linux/dmar.h>
  
  #include <asm/atomic.h>
  #include <asm/smp.h>
@@@ -40,7 -39,6 +40,7 @@@
  #include <asm/proto.h>
  #include <asm/timex.h>
  #include <asm/apic.h>
 +#include <asm/i8259.h>
  
  #include <mach_ipi.h>
  #include <mach_apic.h>
  static int disable_apic_timer __cpuinitdata;
  static int apic_calibrate_pmtmr __initdata;
  int disable_apic;
 +int disable_x2apic;
 +int x2apic;
 +
 +/* x2apic enabled before OS handover */
 +int x2apic_preenabled;
  
  /* Local APIC timer works in C2 */
  int local_apic_timer_c2_ok;
@@@ -126,13 -119,13 +126,13 @@@ static int modern_apic(void
        return lapic_get_version() >= 0x14;
  }
  
 -void apic_wait_icr_idle(void)
 +void xapic_wait_icr_idle(void)
  {
        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
                cpu_relax();
  }
  
 -u32 safe_apic_wait_icr_idle(void)
 +u32 safe_xapic_wait_icr_idle(void)
  {
        u32 send_status;
        int timeout;
        return send_status;
  }
  
 +void xapic_icr_write(u32 low, u32 id)
 +{
 +      apic_write(APIC_ICR2, id << 24);
 +      apic_write(APIC_ICR, low);
 +}
 +
 +u64 xapic_icr_read(void)
 +{
 +      u32 icr1, icr2;
 +
 +      icr2 = apic_read(APIC_ICR2);
 +      icr1 = apic_read(APIC_ICR);
 +
 +      return (icr1 | ((u64)icr2 << 32));
 +}
 +
 +static struct apic_ops xapic_ops = {
 +      .read = native_apic_mem_read,
 +      .write = native_apic_mem_write,
 +      .icr_read = xapic_icr_read,
 +      .icr_write = xapic_icr_write,
 +      .wait_icr_idle = xapic_wait_icr_idle,
 +      .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
 +};
 +
 +struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 +
 +EXPORT_SYMBOL_GPL(apic_ops);
 +
 +static void x2apic_wait_icr_idle(void)
 +{
 +      /* no need to wait for icr idle in x2apic */
 +      return;
 +}
 +
 +static u32 safe_x2apic_wait_icr_idle(void)
 +{
 +      /* no need to wait for icr idle in x2apic */
 +      return 0;
 +}
 +
 +void x2apic_icr_write(u32 low, u32 id)
 +{
 +      wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
 +}
 +
 +u64 x2apic_icr_read(void)
 +{
 +      unsigned long val;
 +
 +      rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
 +      return val;
 +}
 +
 +static struct apic_ops x2apic_ops = {
 +      .read = native_apic_msr_read,
 +      .write = native_apic_msr_write,
 +      .icr_read = x2apic_icr_read,
 +      .icr_write = x2apic_icr_write,
 +      .wait_icr_idle = x2apic_wait_icr_idle,
 +      .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
 +};
 +
  /**
   * enable_NMI_through_LVT0 - enable NMI through local vector table 0
   */
@@@ -700,10 -630,10 +700,10 @@@ int __init verify_local_APIC(void
        /*
         * The ID register is read/write in a real APIC.
         */
 -      reg0 = read_apic_id();
 +      reg0 = apic_read(APIC_ID);
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
        apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
 -      reg1 = read_apic_id();
 +      reg1 = apic_read(APIC_ID);
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
        apic_write(APIC_ID, reg0);
        if (reg1 != (reg0 ^ APIC_ID_MASK))
@@@ -904,125 -834,6 +904,125 @@@ void __cpuinit end_local_APIC_setup(voi
        apic_pm_activate();
  }
  
 +void check_x2apic(void)
 +{
 +      int msr, msr2;
 +
 +      rdmsr(MSR_IA32_APICBASE, msr, msr2);
 +
 +      if (msr & X2APIC_ENABLE) {
 +              printk("x2apic enabled by BIOS, switching to x2apic ops\n");
 +              x2apic_preenabled = x2apic = 1;
 +              apic_ops = &x2apic_ops;
 +      }
 +}
 +
 +void enable_x2apic(void)
 +{
 +      int msr, msr2;
 +
 +      rdmsr(MSR_IA32_APICBASE, msr, msr2);
 +      if (!(msr & X2APIC_ENABLE)) {
 +              printk("Enabling x2apic\n");
 +              wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
 +      }
 +}
 +
 +void enable_IR_x2apic(void)
 +{
 +#ifdef CONFIG_INTR_REMAP
 +      int ret;
 +      unsigned long flags;
 +
 +      if (!cpu_has_x2apic)
 +              return;
 +
 +      if (!x2apic_preenabled && disable_x2apic) {
 +              printk(KERN_INFO
 +                     "Skipped enabling x2apic and Interrupt-remapping "
 +                     "because of nox2apic\n");
 +              return;
 +      }
 +
 +      if (x2apic_preenabled && disable_x2apic)
 +              panic("Bios already enabled x2apic, can't enforce nox2apic");
 +
 +      if (!x2apic_preenabled && skip_ioapic_setup) {
 +              printk(KERN_INFO
 +                     "Skipped enabling x2apic and Interrupt-remapping "
 +                     "because of skipping io-apic setup\n");
 +              return;
 +      }
 +
 +      ret = dmar_table_init();
 +      if (ret) {
 +              printk(KERN_INFO
 +                     "dmar_table_init() failed with %d:\n", ret);
 +
 +              if (x2apic_preenabled)
 +                      panic("x2apic enabled by bios. But IR enabling failed");
 +              else
 +                      printk(KERN_INFO
 +                             "Not enabling x2apic,Intr-remapping\n");
 +              return;
 +      }
 +
 +      local_irq_save(flags);
 +      mask_8259A();
 +      save_mask_IO_APIC_setup();
 +
 +      ret = enable_intr_remapping(1);
 +
 +      if (ret && x2apic_preenabled) {
 +              local_irq_restore(flags);
 +              panic("x2apic enabled by bios. But IR enabling failed");
 +      }
 +
 +      if (ret)
 +              goto end;
 +
 +      if (!x2apic) {
 +              x2apic = 1;
 +              apic_ops = &x2apic_ops;
 +              enable_x2apic();
 +      }
 +end:
 +      if (ret)
 +              /*
 +               * IR enabling failed
 +               */
 +              restore_IO_APIC_setup();
 +      else
 +              reinit_intr_remapped_IO_APIC(x2apic_preenabled);
 +
 +      unmask_8259A();
 +      local_irq_restore(flags);
 +
 +      if (!ret) {
 +              if (!x2apic_preenabled)
 +                      printk(KERN_INFO
 +                             "Enabled x2apic and interrupt-remapping\n");
 +              else
 +                      printk(KERN_INFO
 +                             "Enabled Interrupt-remapping\n");
 +      } else
 +              printk(KERN_ERR
 +                     "Failed to enable Interrupt-remapping and x2apic\n");
 +#else
 +      if (!cpu_has_x2apic)
 +              return;
 +
 +      if (x2apic_preenabled)
 +              panic("x2apic enabled prior OS handover,"
 +                    " enable CONFIG_INTR_REMAP");
 +
 +      printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
 +             " and x2apic\n");
 +#endif
 +
 +      return;
 +}
 +
  /*
   * Detect and enable local APICs on non-SMP boards.
   * Original code written by Keir Fraser.
@@@ -1062,7 -873,7 +1062,7 @@@ void __init early_init_lapic_mapping(vo
         * Fetch the APIC ID of the BSP in case we have a
         * default configuration (or the MP table is broken).
         */
 -      boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 +      boot_cpu_physical_apicid = read_apic_id();
  }
  
  /**
   */
  void __init init_apic_mappings(void)
  {
 +      if (x2apic) {
 +              boot_cpu_physical_apicid = read_apic_id();
 +              return;
 +      }
 +
        /*
         * If no local APIC can be found then set up a fake all
         * zeroes page to simulate the local APIC and another
         * Fetch the APIC ID of the BSP in case we have a
         * default configuration (or the MP table is broken).
         */
 -      boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 +      boot_cpu_physical_apicid = read_apic_id();
  }
  
  /*
@@@ -1113,9 -919,6 +1113,9 @@@ int __init APIC_init_uniprocessor(void
                return -1;
        }
  
 +      enable_IR_x2apic();
 +      setup_apic_routing();
 +
        verify_local_APIC();
  
        connect_bsp_APIC();
@@@ -1297,11 -1100,6 +1297,11 @@@ void __cpuinit generic_processor_info(i
        cpu_set(cpu, cpu_present_map);
  }
  
 +int hard_smp_processor_id(void)
 +{
 +      return read_apic_id();
 +}
 +
  /*
   * Power management
   */
@@@ -1338,7 -1136,7 +1338,7 @@@ static int lapic_suspend(struct sys_dev
  
        maxlvt = lapic_get_maxlvt();
  
 -      apic_pm_state.apic_id = read_apic_id();
 +      apic_pm_state.apic_id = apic_read(APIC_ID);
        apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
        apic_pm_state.apic_ldr = apic_read(APIC_LDR);
        apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@@ -1373,14 -1171,10 +1373,14 @@@ static int lapic_resume(struct sys_devi
        maxlvt = lapic_get_maxlvt();
  
        local_irq_save(flags);
 -      rdmsr(MSR_IA32_APICBASE, l, h);
 -      l &= ~MSR_IA32_APICBASE_BASE;
 -      l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
 -      wrmsr(MSR_IA32_APICBASE, l, h);
 +      if (!x2apic) {
 +              rdmsr(MSR_IA32_APICBASE, l, h);
 +              l &= ~MSR_IA32_APICBASE_BASE;
 +              l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
 +              wrmsr(MSR_IA32_APICBASE, l, h);
 +      } else
 +              enable_x2apic();
 +
        apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
        apic_write(APIC_ID, apic_pm_state.apic_id);
        apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@@ -1520,15 -1314,6 +1520,15 @@@ __cpuinit int apic_is_clustered_box(voi
        return (clusters > 2);
  }
  
 +static __init int setup_nox2apic(char *str)
 +{
 +      disable_x2apic = 1;
 +      clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
 +      return 0;
 +}
 +early_param("nox2apic", setup_nox2apic);
 +
 +
  /*
   * APIC command line parameters
   */
@@@ -1556,7 -1341,7 +1556,7 @@@ early_param("apic", apic_set_verbosity)
  static __init int setup_disableapic(char *str)
  {
        disable_apic = 1;
-       clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+       setup_clear_cpu_cap(X86_FEATURE_APIC);
        return 0;
  }
  early_param("disableapic", setup_disableapic);
index c6bee77ca9e6797fbd87f292aefb2f9dc4c0c892,dd6e3f15017eb87b04885fd5164681bbdd9a3073..6f9b8924bdc088d5ff4811767683532e608a28cb
@@@ -7,15 -7,13 +7,13 @@@
  #include <linux/module.h>
  #include <linux/kgdb.h>
  #include <linux/topology.h>
- #include <linux/string.h>
  #include <linux/delay.h>
  #include <linux/smp.h>
- #include <linux/module.h>
  #include <linux/percpu.h>
- #include <asm/processor.h>
  #include <asm/i387.h>
  #include <asm/msr.h>
  #include <asm/io.h>
+ #include <asm/linkage.h>
  #include <asm/mmu_context.h>
  #include <asm/mtrr.h>
  #include <asm/mce.h>
@@@ -305,7 -303,6 +303,6 @@@ static void __cpuinit early_identify_cp
                        c->x86_capability[2] = cpuid_edx(0x80860001);
        }
  
-       c->extended_cpuid_level = cpuid_eax(0x80000000);
        if (c->extended_cpuid_level >= 0x80000007)
                c->x86_power = cpuid_edx(0x80000007);
  
                c->x86_phys_bits = eax & 0xff;
        }
  
-       /* Assume all 64-bit CPUs support 32-bit syscall */
-       set_cpu_cap(c, X86_FEATURE_SYSCALL32);
        if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
            cpu_devs[c->x86_vendor]->c_early_init)
                cpu_devs[c->x86_vendor]->c_early_init(c);
  
        validate_pat_support(c);
-       /* early_param could clear that, but recall get it set again */
-       if (disable_apic)
-               clear_cpu_cap(c, X86_FEATURE_APIC);
  }
  
  /*
@@@ -517,8 -507,7 +507,7 @@@ void pda_init(int cpu
  }
  
  char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
-                          DEBUG_STKSZ]
- __attribute__((section(".bss.page_aligned")));
+                          DEBUG_STKSZ] __page_aligned_bss;
  
  extern asmlinkage void ignore_sysret(void);
  
@@@ -608,8 -597,6 +597,8 @@@ void __cpuinit cpu_init(void
        barrier();
  
        check_efer();
 +      if (cpu != 0 && x2apic)
 +              enable_x2apic();
  
        /*
         * set up and load the per-CPU TSS
index 70e1f3e287fbeff08ddf497565570192b9d892f5,6ae005ccaed83bc46a6666f1d65b80baf7737f8d..e362c6ab4d3596038e3ab30d30253a47168f487e
@@@ -27,6 -27,7 +27,7 @@@
  #include <asm/bios_ebda.h>
  #include <asm/e820.h>
  #include <asm/trampoline.h>
+ #include <asm/setup.h>
  
  #include <mach_apic.h>
  #ifdef CONFIG_X86_32
@@@ -48,76 -49,6 +49,6 @@@ static int __init mpf_checksum(unsigne
        return sum & 0xFF;
  }
  
- #ifdef CONFIG_X86_NUMAQ
- int found_numaq;
- /*
-  * Have to match translation table entries to main table entries by counter
-  * hence the mpc_record variable .... can't see a less disgusting way of
-  * doing this ....
-  */
- struct mpc_config_translation {
-       unsigned char mpc_type;
-       unsigned char trans_len;
-       unsigned char trans_type;
-       unsigned char trans_quad;
-       unsigned char trans_global;
-       unsigned char trans_local;
-       unsigned short trans_reserved;
- };
- static int mpc_record;
- static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
-     __cpuinitdata;
- static inline int generate_logical_apicid(int quad, int phys_apicid)
- {
-       return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
- }
- static inline int mpc_apic_id(struct mpc_config_processor *m,
-                       struct mpc_config_translation *translation_record)
- {
-       int quad = translation_record->trans_quad;
-       int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
-       printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
-              m->mpc_apicid,
-              (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-              (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-              m->mpc_apicver, quad, logical_apicid);
-       return logical_apicid;
- }
- int mp_bus_id_to_node[MAX_MP_BUSSES];
- int mp_bus_id_to_local[MAX_MP_BUSSES];
- static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
-       struct mpc_config_translation *translation)
- {
-       int quad = translation->trans_quad;
-       int local = translation->trans_local;
-       mp_bus_id_to_node[m->mpc_busid] = quad;
-       mp_bus_id_to_local[m->mpc_busid] = local;
-       printk(KERN_INFO "Bus #%d is %s (node %d)\n",
-              m->mpc_busid, name, quad);
- }
- int quad_local_to_mp_bus_id [NR_CPUS/4][4];
- static void mpc_oem_pci_bus(struct mpc_config_bus *m,
-       struct mpc_config_translation *translation)
- {
-       int quad = translation->trans_quad;
-       int local = translation->trans_local;
-       quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
- }
- #endif
  static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
  {
        int apicid;
                disabled_cpus++;
                return;
        }
- #ifdef CONFIG_X86_NUMAQ
-       if (found_numaq)
-               apicid = mpc_apic_id(m, translation_table[mpc_record]);
+       if (x86_quirks->mpc_apic_id)
+               apicid = x86_quirks->mpc_apic_id(m);
        else
                apicid = m->mpc_apicid;
- #else
-       apicid = m->mpc_apicid;
- #endif
        if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
                bootup_cpu = " (Bootup-CPU)";
                boot_cpu_physical_apicid = m->mpc_apicid;
@@@ -151,12 -80,10 +80,10 @@@ static void __init MP_bus_info(struct m
        memcpy(str, m->mpc_bustype, 6);
        str[6] = 0;
  
- #ifdef CONFIG_X86_NUMAQ
-       if (found_numaq)
-               mpc_oem_bus_info(m, str, translation_table[mpc_record]);
- #else
-       printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
- #endif
+       if (x86_quirks->mpc_oem_bus_info)
+               x86_quirks->mpc_oem_bus_info(m, str);
+       else
+               printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
  
  #if MAX_MP_BUSSES < 256
        if (m->mpc_busid >= MAX_MP_BUSSES) {
                mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
  #endif
        } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
- #ifdef CONFIG_X86_NUMAQ
-               if (found_numaq)
-                       mpc_oem_pci_bus(m, translation_table[mpc_record]);
- #endif
+               if (x86_quirks->mpc_oem_pci_bus)
+                       x86_quirks->mpc_oem_pci_bus(m);
                clear_bit(m->mpc_busid, mp_bus_not_pci);
  #if defined(CONFIG_EISA) || defined (CONFIG_MCA)
                mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
@@@ -316,83 -242,6 +242,6 @@@ static void __init MP_lintsrc_info(stru
                m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
  }
  
- #ifdef CONFIG_X86_NUMAQ
- static void __init MP_translation_info(struct mpc_config_translation *m)
- {
-       printk(KERN_INFO
-              "Translation: record %d, type %d, quad %d, global %d, local %d\n",
-              mpc_record, m->trans_type, m->trans_quad, m->trans_global,
-              m->trans_local);
-       if (mpc_record >= MAX_MPC_ENTRY)
-               printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
-       else
-               translation_table[mpc_record] = m;      /* stash this for later */
-       if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
-               node_set_online(m->trans_quad);
- }
- /*
-  * Read/parse the MPC oem tables
-  */
- static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
-                                   unsigned short oemsize)
- {
-       int count = sizeof(*oemtable);  /* the header size */
-       unsigned char *oemptr = ((unsigned char *)oemtable) + count;
-       mpc_record = 0;
-       printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
-              oemtable);
-       if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
-               printk(KERN_WARNING
-                      "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
-                      oemtable->oem_signature[0], oemtable->oem_signature[1],
-                      oemtable->oem_signature[2], oemtable->oem_signature[3]);
-               return;
-       }
-       if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
-               printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
-               return;
-       }
-       while (count < oemtable->oem_length) {
-               switch (*oemptr) {
-               case MP_TRANSLATION:
-                       {
-                               struct mpc_config_translation *m =
-                                   (struct mpc_config_translation *)oemptr;
-                               MP_translation_info(m);
-                               oemptr += sizeof(*m);
-                               count += sizeof(*m);
-                               ++mpc_record;
-                               break;
-                       }
-               default:
-                       {
-                               printk(KERN_WARNING
-                                      "Unrecognised OEM table entry type! - %d\n",
-                                      (int)*oemptr);
-                               return;
-                       }
-               }
-       }
- }
- void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
-                                char *productid)
- {
-       if (strncmp(oem, "IBM NUMA", 8))
-               printk("Warning!  Not a NUMA-Q system!\n");
-       else
-               found_numaq = 1;
-       if (mpc->mpc_oemptr)
-               smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
-                                mpc->mpc_oemsize);
- }
- #endif /* CONFIG_X86_NUMAQ */
  /*
   * Read/parse the MPC
   */
@@@ -457,7 -306,6 +306,6 @@@ static int __init smp_read_mpc(struct m
        } else
                mps_oem_check(mpc, oem, str);
  #endif
        /* save the local APIC address, it might be non-default */
        if (!acpi_lapic)
                mp_lapic_addr = mpc->mpc_lapic;
        if (early)
                return 1;
  
+       if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
+               struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr;
+               x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
+       }
        /*
         *      Now process the configuration blocks.
         */
- #ifdef CONFIG_X86_NUMAQ
-       mpc_record = 0;
- #endif
+       if (x86_quirks->mpc_record)
+               *x86_quirks->mpc_record = 0;
        while (count < mpc->mpc_length) {
                switch (*mpt) {
                case MP_PROCESSOR:
                        count = mpc->mpc_length;
                        break;
                }
- #ifdef CONFIG_X86_NUMAQ
-               ++mpc_record;
- #endif
+               if (x86_quirks->mpc_record)
+                       (*x86_quirks->mpc_record)++;
        }
  
  #ifdef CONFIG_X86_GENERICARCH
         generic_bigsmp_probe();
  #endif
  
 +#ifdef CONFIG_X86_32
        setup_apic_routing();
 +#endif
        if (!num_processors)
                printk(KERN_ERR "MPTABLE: no processors registered!\n");
        return num_processors;
@@@ -727,12 -577,6 +579,6 @@@ static inline void __init construct_def
  
  static struct intel_mp_floating *mpf_found;
  
- /*
-  * Machine specific quirk for finding the SMP config before other setup
-  * activities destroy the table:
-  */
- int (*mach_get_smp_config_quirk)(unsigned int early);
  /*
   * Scan the memory blocks for an SMP configuration block.
   */
@@@ -740,8 -584,8 +586,8 @@@ static void __init __get_smp_config(uns
  {
        struct intel_mp_floating *mpf = mpf_found;
  
-       if (mach_get_smp_config_quirk) {
-               if (mach_get_smp_config_quirk(early))
+       if (x86_quirks->mach_get_smp_config) {
+               if (x86_quirks->mach_get_smp_config(early))
                        return;
        }
        if (acpi_lapic && early)
@@@ -901,14 -745,12 +747,12 @@@ static int __init smp_scan_config(unsig
        return 0;
  }
  
- int (*mach_find_smp_config_quirk)(unsigned int reserve);
  static void __init __find_smp_config(unsigned int reserve)
  {
        unsigned int address;
  
-       if (mach_find_smp_config_quirk) {
-               if (mach_find_smp_config_quirk(reserve))
+       if (x86_quirks->mach_find_smp_config) {
+               if (x86_quirks->mach_find_smp_config(reserve))
                        return;
        }
        /*
index e0f139106c7e9288202c9aec1dc48344ebae07c7,097d8a6797fa8142882d817a5221419cf4aeb591..00c53a049756b3af0f41563c68103d35a3b4d8b9
@@@ -29,6 -29,7 +29,7 @@@
  #include <asm/desc.h>
  #include <asm/setup.h>
  #include <asm/arch_hooks.h>
+ #include <asm/pgtable.h>
  #include <asm/time.h>
  #include <asm/pgalloc.h>
  #include <asm/irq.h>
@@@ -123,6 -124,7 +124,7 @@@ static void *get_call_destination(u8 ty
                .pv_irq_ops = pv_irq_ops,
                .pv_apic_ops = pv_apic_ops,
                .pv_mmu_ops = pv_mmu_ops,
+               .pv_lock_ops = pv_lock_ops,
        };
        return *((void **)&tmpl + type);
  }
@@@ -266,6 -268,17 +268,17 @@@ enum paravirt_lazy_mode paravirt_get_la
        return __get_cpu_var(paravirt_lazy_mode);
  }
  
+ void __init paravirt_use_bytelocks(void)
+ {
+ #ifdef CONFIG_SMP
+       pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
+       pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
+       pv_lock_ops.spin_lock = __byte_spin_lock;
+       pv_lock_ops.spin_trylock = __byte_spin_trylock;
+       pv_lock_ops.spin_unlock = __byte_spin_unlock;
+ #endif
+ }
  struct pv_info pv_info = {
        .name = "bare hardware",
        .paravirt_enabled = 0,
@@@ -360,6 -373,8 +373,6 @@@ struct pv_cpu_ops pv_cpu_ops = 
  
  struct pv_apic_ops pv_apic_ops = {
  #ifdef CONFIG_X86_LOCAL_APIC
 -      .apic_write = native_apic_write,
 -      .apic_read = native_apic_read,
        .setup_boot_clock = setup_boot_APIC_clock,
        .setup_secondary_clock = setup_secondary_APIC_clock,
        .startup_ipi_hook = paravirt_nop,
@@@ -370,6 -385,9 +383,9 @@@ struct pv_mmu_ops pv_mmu_ops = 
  #ifndef CONFIG_X86_64
        .pagetable_setup_start = native_pagetable_setup_start,
        .pagetable_setup_done = native_pagetable_setup_done,
+ #else
+       .pagetable_setup_start = paravirt_nop,
+       .pagetable_setup_done = paravirt_nop,
  #endif
  
        .read_cr2 = native_read_cr2,
        .set_fixmap = native_set_fixmap,
  };
  
+ struct pv_lock_ops pv_lock_ops = {
+ #ifdef CONFIG_SMP
+       .spin_is_locked = __ticket_spin_is_locked,
+       .spin_is_contended = __ticket_spin_is_contended,
+       .spin_lock = __ticket_spin_lock,
+       .spin_trylock = __ticket_spin_trylock,
+       .spin_unlock = __ticket_spin_unlock,
+ #endif
+ };
+ EXPORT_SYMBOL_GPL(pv_lock_ops);
  EXPORT_SYMBOL_GPL(pv_time_ops);
  EXPORT_SYMBOL    (pv_cpu_ops);
  EXPORT_SYMBOL    (pv_mmu_ops);
diff --combined arch/x86/kernel/setup.c
index 6121ffd46b9eb587ebbd7e380eadb29f698ec638,ec952aa5394a403a42308de114a370a656968a34..a50f9550cbec9e867fc47e5053b6c825abb7b941
  #include <linux/slab.h>
  #include <linux/user.h>
  #include <linux/delay.h>
- #include <linux/highmem.h>
  
  #include <linux/kallsyms.h>
- #include <linux/edd.h>
- #include <linux/iscsi_ibft.h>
- #include <linux/kexec.h>
  #include <linux/cpufreq.h>
  #include <linux/dma-mapping.h>
  #include <linux/ctype.h>
@@@ -96,7 -92,7 +92,7 @@@
  #include <asm/smp.h>
  #include <asm/desc.h>
  #include <asm/dma.h>
- #include <asm/gart.h>
+ #include <asm/iommu.h>
  #include <asm/mmu_context.h>
  #include <asm/proto.h>
  
  #include <asm/paravirt.h>
  
  #include <asm/percpu.h>
- #include <asm/sections.h>
  #include <asm/topology.h>
  #include <asm/apicdef.h>
  #ifdef CONFIG_X86_64
@@@ -579,6 -574,10 +574,10 @@@ static int __init setup_elfcorehdr(cha
  early_param("elfcorehdr", setup_elfcorehdr);
  #endif
  
+ static struct x86_quirks default_x86_quirks __initdata;
+ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
  /*
   * Determine if we were loaded by an EFI loader.  If so, then we have also been
   * passed the efi memmap, systab, etc., so we should use these data structures
@@@ -735,8 -734,6 +734,8 @@@ void __init setup_arch(char **cmdline_p
        num_physpages = max_pfn;
  
        check_efer();
 +      if (cpu_has_x2apic)
 +              check_x2apic();
  
        /* How many end-of-memory variables you have, grandma! */
        /* need this before calling reserve_initrd */
        vmi_init();
  #endif
  
+       paravirt_pagetable_setup_start(swapper_pg_dir);
        paging_init();
+       paravirt_pagetable_setup_done(swapper_pg_dir);
+       paravirt_post_allocator_init();
  
  #ifdef CONFIG_X86_64
        map_vsyscall();
        init_cpu_to_node();
  #endif
  
- #ifdef CONFIG_X86_NUMAQ
-       /*
-        * need to check online nodes num, call it
-        * here before time_init/tsc_init
-        */
-       numaq_tsc_disable();
- #endif
        init_apic_mappings();
        ioapic_init_mappings();
  
index a4f2d8f06e4824f2f7f038429c72241bcf2914ab,4b53a647bc0affd6bbfc3a47d7a57419b8449f24..52eb1484a48ac1d106c2c6920d0ff309f35d5d42
@@@ -123,6 -123,7 +123,6 @@@ EXPORT_PER_CPU_SYMBOL(cpu_info)
  
  static atomic_t init_deasserted;
  
 -static int boot_cpu_logical_apicid;
  
  /* representing cpus for which sibling maps can be computed */
  static cpumask_t cpu_sibling_setup_map;
@@@ -164,8 -165,6 +164,8 @@@ static void unmap_cpu_to_node(int cpu
  #endif
  
  #ifdef CONFIG_X86_32
 +static int boot_cpu_logical_apicid;
 +
  u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
                                        { [0 ... NR_CPUS-1] = BAD_APICID };
  
@@@ -211,13 -210,13 +211,13 @@@ static void __cpuinit smp_callin(void
        /*
         * (This works even if the APIC is not enabled.)
         */
 -      phys_id = GET_APIC_ID(read_apic_id());
 +      phys_id = read_apic_id();
        cpuid = smp_processor_id();
        if (cpu_isset(cpuid, cpu_callin_map)) {
                panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
                                        phys_id, cpuid);
        }
-       Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+       pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
  
        /*
         * STARTUP IPIs are fragile beasts as they might sometimes
         * boards)
         */
  
-       Dprintk("CALLIN, before setup_local_APIC().\n");
+       pr_debug("CALLIN, before setup_local_APIC().\n");
        smp_callin_clear_local_apic();
        setup_local_APIC();
        end_local_APIC_setup();
        local_irq_enable();
        calibrate_delay();
        local_irq_disable();
-       Dprintk("Stack at about %p\n", &cpuid);
+       pr_debug("Stack at about %p\n", &cpuid);
  
        /*
         * Save our processor parameters
@@@ -514,7 -513,7 +514,7 @@@ static void impress_friends(void
        /*
         * Allow the user to impress friends.
         */
-       Dprintk("Before bogomips.\n");
+       pr_debug("Before bogomips.\n");
        for_each_possible_cpu(cpu)
                if (cpu_isset(cpu, cpu_callout_map))
                        bogosum += cpu_data(cpu).loops_per_jiffy;
                bogosum/(500000/HZ),
                (bogosum/(5000/HZ))%100);
  
-       Dprintk("Before bogocount - setting activated=1.\n");
+       pr_debug("Before bogocount - setting activated=1.\n");
  }
  
  static inline void __inquire_remote_apic(int apicid)
                        printk(KERN_CONT
                               "a previous APIC delivery may have failed\n");
  
 -              apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
 -              apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
 +              apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
  
                timeout = 0;
                do {
@@@ -579,11 -579,13 +579,11 @@@ wakeup_secondary_cpu(int logical_apicid
        int maxlvt;
  
        /* Target chip */
 -      apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
 -
        /* Boot on the stack */
        /* Kick the second */
 -      apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
 +      apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
  
-       Dprintk("Waiting for send to finish...\n");
+       pr_debug("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
  
        /*
        if (maxlvt > 3)                 /* Due to the Pentium erratum 3AP.  */
                apic_write(APIC_ESR, 0);
        accept_status = (apic_read(APIC_ESR) & 0xEF);
-       Dprintk("NMI sent.\n");
+       pr_debug("NMI sent.\n");
  
        if (send_status)
                printk(KERN_ERR "APIC never delivered???\n");
@@@ -629,29 -631,33 +629,29 @@@ wakeup_secondary_cpu(int phys_apicid, u
                apic_read(APIC_ESR);
        }
  
-       Dprintk("Asserting INIT.\n");
+       pr_debug("Asserting INIT.\n");
  
        /*
         * Turn INIT on target chip
         */
 -      apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 -
        /*
         * Send IPI
         */
 -      apic_write(APIC_ICR,
 -                 APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
 +      apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
 +                     phys_apicid);
  
-       Dprintk("Waiting for send to finish...\n");
+       pr_debug("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
  
        mdelay(10);
  
-       Dprintk("Deasserting INIT.\n");
+       pr_debug("Deasserting INIT.\n");
  
        /* Target chip */
 -      apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 -
        /* Send IPI */
 -      apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
 +      apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
  
-       Dprintk("Waiting for send to finish...\n");
+       pr_debug("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
  
        mb();
        /*
         * Run STARTUP IPI loop.
         */
-       Dprintk("#startup loops: %d.\n", num_starts);
+       pr_debug("#startup loops: %d.\n", num_starts);
  
        for (j = 1; j <= num_starts; j++) {
-               Dprintk("Sending STARTUP #%d.\n", j);
+               pr_debug("Sending STARTUP #%d.\n", j);
                if (maxlvt > 3)         /* Due to the Pentium erratum 3AP.  */
                        apic_write(APIC_ESR, 0);
                apic_read(APIC_ESR);
-               Dprintk("After apic_write.\n");
+               pr_debug("After apic_write.\n");
  
                /*
                 * STARTUP IPI
                 */
  
                /* Target chip */
 -              apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 -
                /* Boot on the stack */
                /* Kick the second */
 -              apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
 +              apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
 +                             phys_apicid);
  
                /*
                 * Give the other CPU some time to accept the IPI.
                 */
                udelay(300);
  
-               Dprintk("Startup point 1.\n");
+               pr_debug("Startup point 1.\n");
  
-               Dprintk("Waiting for send to finish...\n");
+               pr_debug("Waiting for send to finish...\n");
                send_status = safe_apic_wait_icr_idle();
  
                /*
                if (send_status || accept_status)
                        break;
        }
-       Dprintk("After Startup.\n");
+       pr_debug("After Startup.\n");
  
        if (send_status)
                printk(KERN_ERR "APIC never delivered???\n");
@@@ -750,7 -757,7 +750,7 @@@ static void __cpuinit do_fork_idle(stru
   *
   * Must be called after the _cpu_pda pointer table is initialized.
   */
static int __cpuinit get_local_pda(int cpu)
+ int __cpuinit get_local_pda(int cpu)
  {
        struct x8664_pda *oldpda, *newpda;
        unsigned long size = sizeof(struct x8664_pda);
@@@ -868,7 -875,7 +868,7 @@@ do_rest
  
        if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
  
-               Dprintk("Setting warm reset code and vector.\n");
+               pr_debug("Setting warm reset code and vector.\n");
  
                store_NMI_vector(&nmi_high, &nmi_low);
  
                /*
                 * allow APs to start initializing.
                 */
-               Dprintk("Before Callout %d.\n", cpu);
+               pr_debug("Before Callout %d.\n", cpu);
                cpu_set(cpu, cpu_callout_map);
-               Dprintk("After Callout %d.\n", cpu);
+               pr_debug("After Callout %d.\n", cpu);
  
                /*
                 * Wait 5s total for a response
  
                if (cpu_isset(cpu, cpu_callin_map)) {
                        /* number CPUs logically, starting from 1 (BSP is 0) */
-                       Dprintk("OK.\n");
+                       pr_debug("OK.\n");
                        printk(KERN_INFO "CPU%d: ", cpu);
                        print_cpu_info(&cpu_data(cpu));
-                       Dprintk("CPU has booted.\n");
+                       pr_debug("CPU has booted.\n");
                } else {
                        boot_error = 1;
                        if (*((volatile unsigned char *)trampoline_base)
@@@ -952,7 -959,7 +952,7 @@@ int __cpuinit native_cpu_up(unsigned in
  
        WARN_ON(irqs_disabled());
  
-       Dprintk("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
+       pr_debug("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
  
        if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
            !physid_isset(apicid, phys_cpu_present_map)) {
         * Already booted CPU?
         */
        if (cpu_isset(cpu, cpu_callin_map)) {
-               Dprintk("do_boot_cpu %d Already started\n", cpu);
+               pr_debug("do_boot_cpu %d Already started\n", cpu);
                return -ENOSYS;
        }
  
        err = do_boot_cpu(apicid, cpu);
  #endif
        if (err) {
-               Dprintk("do_boot_cpu failed %d\n", err);
+               pr_debug("do_boot_cpu failed %d\n", err);
                return -EIO;
        }
  
@@@ -1129,17 -1136,10 +1129,17 @@@ void __init native_smp_prepare_cpus(uns
         * Setup boot CPU information
         */
        smp_store_cpu_info(0); /* Final full version of the data */
 +#ifdef CONFIG_X86_32
        boot_cpu_logical_apicid = logical_smp_processor_id();
 +#endif
        current_thread_info()->cpu = 0;  /* needed? */
        set_cpu_sibling_map(0);
  
 +#ifdef CONFIG_X86_64
 +      enable_IR_x2apic();
 +      setup_apic_routing();
 +#endif
 +
        if (smp_sanity_check(max_cpus) < 0) {
                printk(KERN_INFO "SMP disabled\n");
                disable_smp();
        }
  
        preempt_disable();
 -      if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
 +      if (read_apic_id() != boot_cpu_physical_apicid) {
                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
 -                   GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
 +                   read_apic_id(), boot_cpu_physical_apicid);
                /* Or can we switch back to PIC here? */
        }
        preempt_enable();
@@@ -1202,7 -1202,7 +1202,7 @@@ void __init native_smp_prepare_boot_cpu
  
  void __init native_smp_cpus_done(unsigned int max_cpus)
  {
-       Dprintk("Boot done.\n");
+       pr_debug("Boot done.\n");
  
        impress_friends();
        smp_checks();
@@@ -1300,7 -1300,7 +1300,7 @@@ static void __ref remove_cpu_from_maps(
        cpu_clear(cpu, cpu_callout_map);
        cpu_clear(cpu, cpu_callin_map);
        /* was set by cpu_init() */
-       clear_bit(cpu, (unsigned long *)&cpu_initialized);
+       cpu_clear(cpu, cpu_initialized);
        numa_remove_cpu(cpu);
  }
  
@@@ -1379,7 -1379,8 +1379,8 @@@ static int __init parse_maxcpus(char *a
  {
        extern unsigned int maxcpus;
  
-       maxcpus = simple_strtoul(arg, NULL, 0);
+       if (arg)
+               maxcpus = simple_strtoul(arg, NULL, 0);
        return 0;
  }
  early_param("maxcpus", parse_maxcpus);
diff --combined arch/x86/xen/enlighten.c
index e4d1459a63dfc635a642fd27446b326ba5440c16,194bbd6e32410dd4c00c1aa360bca80b312d41b1..c910345860c3d29a36bef692864d02b52f47cd94
  #include <xen/interface/sched.h>
  #include <xen/features.h>
  #include <xen/page.h>
+ #include <xen/hvc-console.h>
  
  #include <asm/paravirt.h>
 +#include <asm/apic.h>
  #include <asm/page.h>
  #include <asm/xen/hypercall.h>
  #include <asm/xen/hypervisor.h>
  #include <asm/fixmap.h>
  #include <asm/processor.h>
+ #include <asm/msr-index.h>
  #include <asm/setup.h>
  #include <asm/desc.h>
  #include <asm/pgtable.h>
  #include <asm/tlbflush.h>
  #include <asm/reboot.h>
- #include <asm/pgalloc.h>
  
  #include "xen-ops.h"
  #include "mmu.h"
@@@ -57,6 -57,18 +58,18 @@@ EXPORT_SYMBOL_GPL(hypercall_page)
  DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
  DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
  
+ /*
+  * Identity map, in addition to plain kernel map.  This needs to be
+  * large enough to allocate page table pages to allocate the rest.
+  * Each page can map 2MB.
+  */
+ static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
+ #ifdef CONFIG_X86_64
+ /* l3 pud for userspace vsyscall mapping */
+ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
+ #endif /* CONFIG_X86_64 */
  /*
   * Note about cr3 (pagetable base) values:
   *
@@@ -168,10 -180,14 +181,14 @@@ void xen_vcpu_restore(void
  
  static void __init xen_banner(void)
  {
+       unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
+       struct xen_extraversion extra;
+       HYPERVISOR_xen_version(XENVER_extraversion, &extra);
        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
               pv_info.name);
-       printk(KERN_INFO "Hypervisor signature: %s%s\n",
-              xen_start_info->magic,
+       printk(KERN_INFO "Xen version: %d.%d%s%s\n",
+              version >> 16, version & 0xffff, extra.extraversion,
               xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
  }
  
@@@ -364,14 -380,6 +381,6 @@@ static void load_TLS_descriptor(struct 
  
  static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
  {
-       xen_mc_batch();
-       load_TLS_descriptor(t, cpu, 0);
-       load_TLS_descriptor(t, cpu, 1);
-       load_TLS_descriptor(t, cpu, 2);
-       xen_mc_issue(PARAVIRT_LAZY_CPU);
        /*
         * XXX sleazy hack: If we're being called in a lazy-cpu zone,
         * it means we're in a context switch, and %gs has just been
         * Either way, it has been saved, and the new value will get
         * loaded properly.  This will go away as soon as Xen has been
         * modified to not save/restore %gs for normal hypercalls.
+        *
+        * On x86_64, this hack is not used for %gs, because gs points
+        * to KERNEL_GS_BASE (and uses it for PDA references), so we
+        * must not zero %gs on x86_64
+        *
+        * For x86_64, we need to zero %fs, otherwise we may get an
+        * exception between the new %fs descriptor being loaded and
+        * %fs being effectively cleared at __switch_to().
         */
-       if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
+       if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+ #ifdef CONFIG_X86_32
                loadsegment(gs, 0);
+ #else
+               loadsegment(fs, 0);
+ #endif
+       }
+       xen_mc_batch();
+       load_TLS_descriptor(t, cpu, 0);
+       load_TLS_descriptor(t, cpu, 1);
+       load_TLS_descriptor(t, cpu, 2);
+       xen_mc_issue(PARAVIRT_LAZY_CPU);
+ }
+ #ifdef CONFIG_X86_64
+ static void xen_load_gs_index(unsigned int idx)
+ {
+       if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
+               BUG();
  }
+ #endif
  
  static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
                                const void *ptr)
        preempt_enable();
  }
  
- static int cvt_gate_to_trap(int vector, u32 low, u32 high,
+ static int cvt_gate_to_trap(int vector, const gate_desc *val,
                            struct trap_info *info)
  {
-       u8 type, dpl;
-       type = (high >> 8) & 0x1f;
-       dpl = (high >> 13) & 3;
-       if (type != 0xf && type != 0xe)
+       if (val->type != 0xf && val->type != 0xe)
                return 0;
  
        info->vector = vector;
-       info->address = (high & 0xffff0000) | (low & 0x0000ffff);
-       info->cs = low >> 16;
-       info->flags = dpl;
+       info->address = gate_offset(*val);
+       info->cs = gate_segment(*val);
+       info->flags = val->dpl;
        /* interrupt gates clear IF */
-       if (type == 0xe)
+       if (val->type == 0xe)
                info->flags |= 4;
  
        return 1;
@@@ -444,11 -476,10 +477,10 @@@ static void xen_write_idt_entry(gate_de
  
        if (p >= start && (p + 8) <= end) {
                struct trap_info info[2];
-               u32 *desc = (u32 *)g;
  
                info[1].address = 0;
  
-               if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0]))
+               if (cvt_gate_to_trap(entrynum, g, &info[0]))
                        if (HYPERVISOR_set_trap_table(info))
                                BUG();
        }
@@@ -461,13 -492,13 +493,13 @@@ static void xen_convert_trap_info(cons
  {
        unsigned in, out, count;
  
-       count = (desc->size+1) / 8;
+       count = (desc->size+1) / sizeof(gate_desc);
        BUG_ON(count > 256);
  
        for (in = out = 0; in < count; in++) {
-               const u32 *entry = (u32 *)(desc->address + in * 8);
+               gate_desc *entry = (gate_desc*)(desc->address) + in;
  
-               if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out]))
+               if (cvt_gate_to_trap(in, entry, &traps[out]))
                        out++;
        }
        traps[out].address = 0;
@@@ -549,47 -580,16 +581,47 @@@ static void xen_io_delay(void
  }
  
  #ifdef CONFIG_X86_LOCAL_APIC
 -static u32 xen_apic_read(unsigned long reg)
 +static u32 xen_apic_read(u32 reg)
  {
        return 0;
  }
  
 -static void xen_apic_write(unsigned long reg, u32 val)
 +static void xen_apic_write(u32 reg, u32 val)
  {
        /* Warn to see if there's any stray references */
        WARN_ON(1);
  }
 +
 +static u64 xen_apic_icr_read(void)
 +{
 +      return 0;
 +}
 +
 +static void xen_apic_icr_write(u32 low, u32 id)
 +{
 +      /* Warn to see if there's any stray references */
 +      WARN_ON(1);
 +}
 +
 +static void xen_apic_wait_icr_idle(void)
 +{
 +        return;
 +}
 +
 +static u32 xen_safe_apic_wait_icr_idle(void)
 +{
 +        return 0;
 +}
 +
 +static struct apic_ops xen_basic_apic_ops = {
 +      .read = xen_apic_read,
 +      .write = xen_apic_write,
 +      .icr_read = xen_apic_icr_read,
 +      .icr_write = xen_apic_icr_write,
 +      .wait_icr_idle = xen_apic_wait_icr_idle,
 +      .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
 +};
 +
  #endif
  
  static void xen_flush_tlb(void)
@@@ -727,33 -727,89 +759,89 @@@ static void set_current_cr3(void *v
        x86_write_percpu(xen_current_cr3, (unsigned long)v);
  }
  
- static void xen_write_cr3(unsigned long cr3)
+ static void __xen_write_cr3(bool kernel, unsigned long cr3)
  {
        struct mmuext_op *op;
        struct multicall_space mcs;
-       unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
+       unsigned long mfn;
  
-       BUG_ON(preemptible());
+       if (cr3)
+               mfn = pfn_to_mfn(PFN_DOWN(cr3));
+       else
+               mfn = 0;
  
-       mcs = xen_mc_entry(sizeof(*op));  /* disables interrupts */
+       WARN_ON(mfn == 0 && kernel);
  
-       /* Update while interrupts are disabled, so its atomic with
-          respect to ipis */
-       x86_write_percpu(xen_cr3, cr3);
+       mcs = __xen_mc_entry(sizeof(*op));
  
        op = mcs.args;
-       op->cmd = MMUEXT_NEW_BASEPTR;
+       op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
        op->arg1.mfn = mfn;
  
        MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
  
-       /* Update xen_update_cr3 once the batch has actually
-          been submitted. */
-       xen_mc_callback(set_current_cr3, (void *)cr3);
+       if (kernel) {
+               x86_write_percpu(xen_cr3, cr3);
+               /* Update xen_current_cr3 once the batch has actually
+                  been submitted. */
+               xen_mc_callback(set_current_cr3, (void *)cr3);
+       }
+ }
+ static void xen_write_cr3(unsigned long cr3)
+ {
+       BUG_ON(preemptible());
+       xen_mc_batch();  /* disables interrupts */
+       /* Update while interrupts are disabled, so its atomic with
+          respect to ipis */
+       x86_write_percpu(xen_cr3, cr3);
+       __xen_write_cr3(true, cr3);
+ #ifdef CONFIG_X86_64
+       {
+               pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
+               if (user_pgd)
+                       __xen_write_cr3(false, __pa(user_pgd));
+               else
+                       __xen_write_cr3(false, 0);
+       }
+ #endif
  
        xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
  }
  
+ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+ {
+       int ret;
+       ret = 0;
+       switch(msr) {
+ #ifdef CONFIG_X86_64
+               unsigned which;
+               u64 base;
+       case MSR_FS_BASE:               which = SEGBASE_FS; goto set;
+       case MSR_KERNEL_GS_BASE:        which = SEGBASE_GS_USER; goto set;
+       case MSR_GS_BASE:               which = SEGBASE_GS_KERNEL; goto set;
+       set:
+               base = ((u64)high << 32) | low;
+               if (HYPERVISOR_set_segment_base(which, base) != 0)
+                       ret = -EFAULT;
+               break;
+ #endif
+       default:
+               ret = native_write_msr_safe(msr, low, high);
+       }
+       return ret;
+ }
  /* Early in boot, while setting up the initial pagetable, assume
     everything is pinned. */
  static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
@@@ -810,6 -866,48 +898,48 @@@ static void xen_alloc_pmd(struct mm_str
        xen_alloc_ptpage(mm, pfn, PT_PMD);
  }
  
+ static int xen_pgd_alloc(struct mm_struct *mm)
+ {
+       pgd_t *pgd = mm->pgd;
+       int ret = 0;
+       BUG_ON(PagePinned(virt_to_page(pgd)));
+ #ifdef CONFIG_X86_64
+       {
+               struct page *page = virt_to_page(pgd);
+               pgd_t *user_pgd;
+               BUG_ON(page->private != 0);
+               ret = -ENOMEM;
+               user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+               page->private = (unsigned long)user_pgd;
+               if (user_pgd != NULL) {
+                       user_pgd[pgd_index(VSYSCALL_START)] =
+                               __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
+                       ret = 0;
+               }
+               BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
+       }
+ #endif
+       return ret;
+ }
+ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+ {
+ #ifdef CONFIG_X86_64
+       pgd_t *user_pgd = xen_get_user_pgd(pgd);
+       if (user_pgd)
+               free_page((unsigned long)user_pgd);
+ #endif
+ }
  /* This should never happen until we're OK to use struct page */
  static void xen_release_ptpage(u32 pfn, unsigned level)
  {
@@@ -835,6 -933,18 +965,18 @@@ static void xen_release_pmd(u32 pfn
        xen_release_ptpage(pfn, PT_PMD);
  }
  
+ #if PAGETABLE_LEVELS == 4
+ static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
+ {
+       xen_alloc_ptpage(mm, pfn, PT_PUD);
+ }
+ static void xen_release_pud(u32 pfn)
+ {
+       xen_release_ptpage(pfn, PT_PUD);
+ }
+ #endif
  #ifdef CONFIG_HIGHPTE
  static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
  {
@@@ -873,68 -983,16 +1015,16 @@@ static __init void xen_set_pte_init(pte
  
  static __init void xen_pagetable_setup_start(pgd_t *base)
  {
-       pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
-       int i;
-       /* special set_pte for pagetable initialization */
-       pv_mmu_ops.set_pte = xen_set_pte_init;
-       init_mm.pgd = base;
-       /*
-        * copy top-level of Xen-supplied pagetable into place.  This
-        * is a stand-in while we copy the pmd pages.
-        */
-       memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
-       /*
-        * For PAE, need to allocate new pmds, rather than
-        * share Xen's, since Xen doesn't like pmd's being
-        * shared between address spaces.
-        */
-       for (i = 0; i < PTRS_PER_PGD; i++) {
-               if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
-                       pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-                       memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
-                              PAGE_SIZE);
-                       make_lowmem_page_readonly(pmd);
-                       set_pgd(&base[i], __pgd(1 + __pa(pmd)));
-               } else
-                       pgd_clear(&base[i]);
-       }
-       /* make sure zero_page is mapped RO so we can use it in pagetables */
-       make_lowmem_page_readonly(empty_zero_page);
-       make_lowmem_page_readonly(base);
-       /*
-        * Switch to new pagetable.  This is done before
-        * pagetable_init has done anything so that the new pages
-        * added to the table can be prepared properly for Xen.
-        */
-       xen_write_cr3(__pa(base));
-       /* Unpin initial Xen pagetable */
-       pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
-                         PFN_DOWN(__pa(xen_start_info->pt_base)));
  }
  
  void xen_setup_shared_info(void)
  {
        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-               unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
-               /*
-                * Create a mapping for the shared info page.
-                * Should be set_fixmap(), but shared_info is a machine
-                * address with no corresponding pseudo-phys address.
-                */
-               set_pte_mfn(addr,
-                           PFN_DOWN(xen_start_info->shared_info),
-                           PAGE_KERNEL);
-               HYPERVISOR_shared_info = (struct shared_info *)addr;
+               set_fixmap(FIX_PARAVIRT_BOOTMAP,
+                          xen_start_info->shared_info);
+               HYPERVISOR_shared_info =
+                       (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
        } else
                HYPERVISOR_shared_info =
                        (struct shared_info *)__va(xen_start_info->shared_info);
  
  static __init void xen_pagetable_setup_done(pgd_t *base)
  {
-       /* This will work as long as patching hasn't happened yet
-          (which it hasn't) */
-       pv_mmu_ops.alloc_pte = xen_alloc_pte;
-       pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
-       pv_mmu_ops.release_pte = xen_release_pte;
-       pv_mmu_ops.release_pmd = xen_release_pmd;
-       pv_mmu_ops.set_pte = xen_set_pte;
        xen_setup_shared_info();
-       /* Actually pin the pagetable down, but we can't set PG_pinned
-          yet because the page structures don't exist yet. */
-       pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
  }
  
  static __init void xen_post_allocator_init(void)
  {
+       pv_mmu_ops.set_pte = xen_set_pte;
        pv_mmu_ops.set_pmd = xen_set_pmd;
        pv_mmu_ops.set_pud = xen_set_pud;
+ #if PAGETABLE_LEVELS == 4
+       pv_mmu_ops.set_pgd = xen_set_pgd;
+ #endif
+       /* This will work as long as patching hasn't happened yet
+          (which it hasn't) */
+       pv_mmu_ops.alloc_pte = xen_alloc_pte;
+       pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
+       pv_mmu_ops.release_pte = xen_release_pte;
+       pv_mmu_ops.release_pmd = xen_release_pmd;
+ #if PAGETABLE_LEVELS == 4
+       pv_mmu_ops.alloc_pud = xen_alloc_pud;
+       pv_mmu_ops.release_pud = xen_release_pud;
+ #endif
  
+ #ifdef CONFIG_X86_64
+       SetPagePinned(virt_to_page(level3_user_vsyscall));
+ #endif
        xen_mark_init_mm_pinned();
  }
  
@@@ -982,6 -1046,7 +1078,7 @@@ void xen_setup_vcpu_info_placement(void
  
        /* xen_vcpu_setup managed to place the vcpu_info within the
           percpu area for all cpus, so make use of it */
+ #ifdef CONFIG_X86_32
        if (have_vcpu_info_placement) {
                printk(KERN_INFO "Xen: using vcpu_info placement\n");
  
                pv_irq_ops.irq_enable = xen_irq_enable_direct;
                pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
        }
+ #endif
  }
  
  static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
        goto patch_site
  
        switch (type) {
+ #ifdef CONFIG_X86_32
                SITE(pv_irq_ops, irq_enable);
                SITE(pv_irq_ops, irq_disable);
                SITE(pv_irq_ops, save_fl);
                SITE(pv_irq_ops, restore_fl);
+ #endif /* CONFIG_X86_32 */
  #undef SITE
  
        patch_site:
@@@ -1057,8 -1125,15 +1157,15 @@@ static void xen_set_fixmap(unsigned idx
  #ifdef CONFIG_X86_F00F_BUG
        case FIX_F00F_IDT:
  #endif
+ #ifdef CONFIG_X86_32
        case FIX_WP_TEST:
        case FIX_VDSO:
+ # ifdef CONFIG_HIGHMEM
+       case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+ # endif
+ #else
+       case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+ #endif
  #ifdef CONFIG_X86_LOCAL_APIC
        case FIX_APIC_BASE:     /* maps dummy local APIC */
  #endif
        }
  
        __native_set_fixmap(idx, pte);
+ #ifdef CONFIG_X86_64
+       /* Replicate changes to map the vsyscall page into the user
+          pagetable vsyscall mapping. */
+       if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+               unsigned long vaddr = __fix_to_virt(idx);
+               set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
+       }
+ #endif
  }
  
  static const struct pv_info xen_info __initdata = {
@@@ -1116,18 -1200,25 +1232,25 @@@ static const struct pv_cpu_ops xen_cpu_
        .wbinvd = native_wbinvd,
  
        .read_msr = native_read_msr_safe,
-       .write_msr = native_write_msr_safe,
+       .write_msr = xen_write_msr_safe,
        .read_tsc = native_read_tsc,
        .read_pmc = native_read_pmc,
  
        .iret = xen_iret,
        .irq_enable_sysexit = xen_sysexit,
+ #ifdef CONFIG_X86_64
+       .usergs_sysret32 = xen_sysret32,
+       .usergs_sysret64 = xen_sysret64,
+ #endif
  
        .load_tr_desc = paravirt_nop,
        .set_ldt = xen_set_ldt,
        .load_gdt = xen_load_gdt,
        .load_idt = xen_load_idt,
        .load_tls = xen_load_tls,
+ #ifdef CONFIG_X86_64
+       .load_gs_index = xen_load_gs_index,
+ #endif
  
        .store_gdt = native_store_gdt,
        .store_idt = native_store_idt,
        .set_iopl_mask = xen_set_iopl_mask,
        .io_delay = xen_io_delay,
  
+       /* Xen takes care of %gs when switching to usermode for us */
+       .swapgs = paravirt_nop,
        .lazy_mode = {
                .enter = paravirt_enter_lazy_cpu,
                .leave = xen_leave_lazy,
        },
  };
  
+ static void __init __xen_init_IRQ(void)
+ {
+ #ifdef CONFIG_X86_64
+       int i;
+       /* Create identity vector->irq map */
+       for(i = 0; i < NR_VECTORS; i++) {
+               int cpu;
+               for_each_possible_cpu(cpu)
+                       per_cpu(vector_irq, cpu)[i] = i;
+       }
+ #endif        /* CONFIG_X86_64 */
+       xen_init_IRQ();
+ }
  static const struct pv_irq_ops xen_irq_ops __initdata = {
-       .init_IRQ = xen_init_IRQ,
+       .init_IRQ = __xen_init_IRQ,
        .save_fl = xen_save_fl,
        .restore_fl = xen_restore_fl,
        .irq_disable = xen_irq_disable,
        .safe_halt = xen_safe_halt,
        .halt = xen_halt,
  #ifdef CONFIG_X86_64
-       .adjust_exception_frame = paravirt_nop,
+       .adjust_exception_frame = xen_adjust_exception_frame,
  #endif
  };
  
  static const struct pv_apic_ops xen_apic_ops __initdata = {
  #ifdef CONFIG_X86_LOCAL_APIC
 -      .apic_write = xen_apic_write,
 -      .apic_read = xen_apic_read,
        .setup_boot_clock = paravirt_nop,
        .setup_secondary_clock = paravirt_nop,
        .startup_ipi_hook = paravirt_nop,
@@@ -1186,8 -1299,8 +1329,8 @@@ static const struct pv_mmu_ops xen_mmu_
        .pte_update = paravirt_nop,
        .pte_update_defer = paravirt_nop,
  
-       .pgd_alloc = __paravirt_pgd_alloc,
-       .pgd_free = paravirt_nop,
+       .pgd_alloc = xen_pgd_alloc,
+       .pgd_free = xen_pgd_free,
  
        .alloc_pte = xen_alloc_pte_init,
        .release_pte = xen_release_pte_init,
        .kmap_atomic_pte = xen_kmap_atomic_pte,
  #endif
  
-       .set_pte = NULL,        /* see xen_pagetable_setup_* */
+ #ifdef CONFIG_X86_64
+       .set_pte = xen_set_pte,
+ #else
+       .set_pte = xen_set_pte_init,
+ #endif
        .set_pte_at = xen_set_pte_at,
        .set_pmd = xen_set_pmd_hyper,
  
        .make_pte = xen_make_pte,
        .make_pgd = xen_make_pgd,
  
+ #ifdef CONFIG_X86_PAE
        .set_pte_atomic = xen_set_pte_atomic,
        .set_pte_present = xen_set_pte_at,
-       .set_pud = xen_set_pud_hyper,
        .pte_clear = xen_pte_clear,
        .pmd_clear = xen_pmd_clear,
+ #endif        /* CONFIG_X86_PAE */
+       .set_pud = xen_set_pud_hyper,
  
        .make_pmd = xen_make_pmd,
        .pmd_val = xen_pmd_val,
  
+ #if PAGETABLE_LEVELS == 4
+       .pud_val = xen_pud_val,
+       .make_pud = xen_make_pud,
+       .set_pgd = xen_set_pgd_hyper,
+       .alloc_pud = xen_alloc_pte_init,
+       .release_pud = xen_release_pte_init,
+ #endif        /* PAGETABLE_LEVELS == 4 */
        .activate_mm = xen_activate_mm,
        .dup_mmap = xen_dup_mmap,
        .exit_mmap = xen_exit_mmap,
        .set_fixmap = xen_set_fixmap,
  };
  
- #ifdef CONFIG_SMP
- static const struct smp_ops xen_smp_ops __initdata = {
-       .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
-       .smp_prepare_cpus = xen_smp_prepare_cpus,
-       .cpu_up = xen_cpu_up,
-       .smp_cpus_done = xen_smp_cpus_done,
-       .smp_send_stop = xen_smp_send_stop,
-       .smp_send_reschedule = xen_smp_send_reschedule,
-       .send_call_func_ipi = xen_smp_send_call_function_ipi,
-       .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
- };
- #endif        /* CONFIG_SMP */
  static void xen_reboot(int reason)
  {
        struct sched_shutdown r = { .reason = reason };
@@@ -1293,6 -1406,7 +1436,7 @@@ static const struct machine_ops __initd
  
  static void __init xen_reserve_top(void)
  {
+ #ifdef CONFIG_X86_32
        unsigned long top = HYPERVISOR_VIRT_START;
        struct xen_platform_parameters pp;
  
                top = pp.virt_start;
  
        reserve_top_address(-top + 2 * PAGE_SIZE);
+ #endif        /* CONFIG_X86_32 */
+ }
+ /*
+  * Like __va(), but returns address in the kernel mapping (which is
+  * all we have until the physical memory mapping has been set up.
+  */
+ static void *__ka(phys_addr_t paddr)
+ {
+ #ifdef CONFIG_X86_64
+       return (void *)(paddr + __START_KERNEL_map);
+ #else
+       return __va(paddr);
+ #endif
  }
  
+ /* Convert a machine address to physical address */
+ static unsigned long m2p(phys_addr_t maddr)
+ {
+       phys_addr_t paddr;
+       maddr &= PTE_MASK;
+       paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
+       return paddr;
+ }
+ /* Convert a machine address to kernel virtual */
+ static void *m2v(phys_addr_t maddr)
+ {
+       return __ka(m2p(maddr));
+ }
+ #ifdef CONFIG_X86_64
+ static void walk(pgd_t *pgd, unsigned long addr)
+ {
+       unsigned l4idx = pgd_index(addr);
+       unsigned l3idx = pud_index(addr);
+       unsigned l2idx = pmd_index(addr);
+       unsigned l1idx = pte_index(addr);
+       pgd_t l4;
+       pud_t l3;
+       pmd_t l2;
+       pte_t l1;
+       xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
+                      pgd, addr, l4idx, l3idx, l2idx, l1idx);
+       l4 = pgd[l4idx];
+       xen_raw_printk("  l4: %016lx\n", l4.pgd);
+       xen_raw_printk("      %016lx\n", pgd_val(l4));
+       l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
+       xen_raw_printk("  l3: %016lx\n", l3.pud);
+       xen_raw_printk("      %016lx\n", pud_val(l3));
+       l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
+       xen_raw_printk("  l2: %016lx\n", l2.pmd);
+       xen_raw_printk("      %016lx\n", pmd_val(l2));
+       l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
+       xen_raw_printk("  l1: %016lx\n", l1.pte);
+       xen_raw_printk("      %016lx\n", pte_val(l1));
+ }
+ #endif
+ static void set_page_prot(void *addr, pgprot_t prot)
+ {
+       unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
+       pte_t pte = pfn_pte(pfn, prot);
+       xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
+                      addr, pfn, get_phys_to_machine(pfn),
+                      pgprot_val(prot), pte.pte);
+       if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+               BUG();
+ }
+ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+ {
+       unsigned pmdidx, pteidx;
+       unsigned ident_pte;
+       unsigned long pfn;
+       ident_pte = 0;
+       pfn = 0;
+       for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
+               pte_t *pte_page;
+               /* Reuse or allocate a page of ptes */
+               if (pmd_present(pmd[pmdidx]))
+                       pte_page = m2v(pmd[pmdidx].pmd);
+               else {
+                       /* Check for free pte pages */
+                       if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
+                               break;
+                       pte_page = &level1_ident_pgt[ident_pte];
+                       ident_pte += PTRS_PER_PTE;
+                       pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
+               }
+               /* Install mappings */
+               for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
+                       pte_t pte;
+                       if (pfn > max_pfn_mapped)
+                               max_pfn_mapped = pfn;
+                       if (!pte_none(pte_page[pteidx]))
+                               continue;
+                       pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
+                       pte_page[pteidx] = pte;
+               }
+       }
+       for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
+               set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+       set_page_prot(pmd, PAGE_KERNEL_RO);
+ }
+ #ifdef CONFIG_X86_64
+ static void convert_pfn_mfn(void *v)
+ {
+       pte_t *pte = v;
+       int i;
+       /* All levels are converted the same way, so just treat them
+          as ptes. */
+       for(i = 0; i < PTRS_PER_PTE; i++)
+               pte[i] = xen_make_pte(pte[i].pte);
+ }
+ /*
+  * Set up the inital kernel pagetable.
+  *
+  * We can construct this by grafting the Xen provided pagetable into
+  * head_64.S's preconstructed pagetables.  We copy the Xen L2's into
+  * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt.  This
+  * means that only the kernel has a physical mapping to start with -
+  * but that's enough to get __va working.  We need to fill in the rest
+  * of the physical mapping once some sort of allocator has been set
+  * up.
+  */
+ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+ {
+       pud_t *l3;
+       pmd_t *l2;
+       /* Zap identity mapping */
+       init_level4_pgt[0] = __pgd(0);
+       /* Pre-constructed entries are in pfn, so convert to mfn */
+       convert_pfn_mfn(init_level4_pgt);
+       convert_pfn_mfn(level3_ident_pgt);
+       convert_pfn_mfn(level3_kernel_pgt);
+       l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
+       l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
+       memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+       memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+       l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
+       l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
+       memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+       /* Set up identity map */
+       xen_map_identity_early(level2_ident_pgt, max_pfn);
+       /* Make pagetable pieces RO */
+       set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+       set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
+       set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+       set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
+       set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+       set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+       /* Pin down new L4 */
+       pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+                         PFN_DOWN(__pa_symbol(init_level4_pgt)));
+       /* Unpin Xen-provided one */
+       pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+       /* Switch over */
+       pgd = init_level4_pgt;
+       /*
+        * At this stage there can be no user pgd, and no page
+        * structure to attach it to, so make sure we just set kernel
+        * pgd.
+        */
+       xen_mc_batch();
+       __xen_write_cr3(true, __pa(pgd));
+       xen_mc_issue(PARAVIRT_LAZY_CPU);
+       reserve_early(__pa(xen_start_info->pt_base),
+                     __pa(xen_start_info->pt_base +
+                          xen_start_info->nr_pt_frames * PAGE_SIZE),
+                     "XEN PAGETABLES");
+       return pgd;
+ }
+ #else /* !CONFIG_X86_64 */
+ static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
+ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+ {
+       pmd_t *kernel_pmd;
+       init_pg_tables_start = __pa(pgd);
+       init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
+       max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+       kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
+       memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+       xen_map_identity_early(level2_kernel_pgt, max_pfn);
+       memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+       set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
+                       __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+       set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+       set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+       set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
+       pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+       xen_write_cr3(__pa(swapper_pg_dir));
+       pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+       return swapper_pg_dir;
+ }
+ #endif        /* CONFIG_X86_64 */
  /* First C function to be called on Xen boot */
  asmlinkage void __init xen_start_kernel(void)
  {
        pv_apic_ops = xen_apic_ops;
        pv_mmu_ops = xen_mmu_ops;
  
 +#ifdef CONFIG_X86_LOCAL_APIC
 +      /*
 +       * set up the basic apic ops.
 +       */
 +      apic_ops = &xen_basic_apic_ops;
 +#endif
 +
        if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
                pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
                pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
  
        machine_ops = xen_machine_ops;
  
- #ifdef CONFIG_SMP
-       smp_ops = xen_smp_ops;
+ #ifdef CONFIG_X86_64
+       /* Disable until direct per-cpu data access. */
+       have_vcpu_info_placement = 0;
+       x86_64_init_pda();
  #endif
  
+       xen_smp_init();
        /* Get mfn list */
        if (!xen_feature(XENFEAT_auto_translated_physmap))
                xen_build_dynamic_phys_to_machine();
  
        pgd = (pgd_t *)xen_start_info->pt_base;
  
-       init_pg_tables_start = __pa(pgd);
-       init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
-       max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
-       init_mm.pgd = pgd; /* use the Xen pagetables to start */
-       /* keep using Xen gdt for now; no urgent need to change it */
-       x86_write_percpu(xen_cr3, __pa(pgd));
-       x86_write_percpu(xen_current_cr3, __pa(pgd));
+       /* Prevent unwanted bits from being set in PTEs. */
+       __supported_pte_mask &= ~_PAGE_GLOBAL;
+       if (!is_initial_xendomain())
+               __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
  
        /* Don't do the full vcpu_info placement stuff until we have a
           possible map and a non-dummy shared_info. */
        per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
  
+       xen_raw_console_write("mapping kernel into physical memory\n");
+       pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
+       init_mm.pgd = pgd;
+       /* keep using Xen gdt for now; no urgent need to change it */
        pv_info.kernel_rpl = 1;
        if (xen_feature(XENFEAT_supervisor_mode_kernel))
                pv_info.kernel_rpl = 0;
  
-       /* Prevent unwanted bits from being set in PTEs. */
-       __supported_pte_mask &= ~_PAGE_GLOBAL;
-       if (!is_initial_xendomain())
-               __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
        /* set the limit of our address space */
        xen_reserve_top();
  
+ #ifdef CONFIG_X86_32
        /* set up basic CPUID stuff */
        cpu_detect(&new_cpu_data);
        new_cpu_data.hard_math = 1;
        new_cpu_data.x86_capability[0] = cpuid_edx(1);
+ #endif
  
        /* Poke various useful things into boot_params */
        boot_params.hdr.type_of_loader = (9 << 4) | 0;
        boot_params.hdr.ramdisk_image = xen_start_info->mod_start
                ? __pa(xen_start_info->mod_start) : 0;
        boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
+       boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
  
        if (!is_initial_xendomain()) {
                add_preferred_console("xenboot", 0, NULL);
                add_preferred_console("hvc", 0, NULL);
        }
  
+       xen_raw_console_write("about to get started...\n");
+ #if 0
+       xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
+                      &boot_params, __pa_symbol(&boot_params),
+                      __va(__pa_symbol(&boot_params)));
+       walk(pgd, &boot_params);
+       walk(pgd, __va(__pa(&boot_params)));
+ #endif
        /* Start the world */
+ #ifdef CONFIG_X86_32
        i386_start_kernel();
+ #else
+       x86_64_start_reservations((char *)__pa_symbol(&boot_params));
+ #endif
  }
index ffccf2341b98e6bff0f4b6750de185a1b0287c99,8d0e60ac849cb5f34e609fdaf9215688091728a7..389fdd6f4a9f6afcdd0877150887dc1f1571a914
@@@ -37,7 -37,7 +37,7 @@@
  #include "intel-iommu.h"
  #include <asm/proto.h> /* force_iommu in this header in x86-64*/
  #include <asm/cacheflush.h>
- #include <asm/gart.h>
+ #include <asm/iommu.h>
  #include "pci.h"
  
  #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
@@@ -49,6 -49,8 +49,6 @@@
  
  #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
  
 -#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
 -
  #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
  
  
@@@ -56,6 -58,8 +56,6 @@@ static void flush_unmaps_timeout(unsign
  
  DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
  
 -static struct intel_iommu *g_iommus;
 -
  #define HIGH_WATER_MARK 250
  struct deferred_flush_tables {
        int next;
@@@ -76,7 -80,7 +76,7 @@@ static long list_size
  
  static void domain_remove_dev_info(struct dmar_domain *domain);
  
 -static int dmar_disabled;
 +int dmar_disabled;
  static int __initdata dmar_map_gfx = 1;
  static int dmar_forcedac;
  static int intel_iommu_strict;
@@@ -181,6 -185,13 +181,6 @@@ void free_iova_mem(struct iova *iova
        kmem_cache_free(iommu_iova_cache, iova);
  }
  
 -static inline void __iommu_flush_cache(
 -      struct intel_iommu *iommu, void *addr, int size)
 -{
 -      if (!ecap_coherent(iommu->ecap))
 -              clflush_cache_range(addr, size);
 -}
 -
  /* Gets context entry for a given bus and devfn */
  static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
                u8 bus, u8 devfn)
@@@ -477,6 -488,19 +477,6 @@@ static int iommu_alloc_root_entry(struc
        return 0;
  }
  
 -#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
 -{\
 -      cycles_t start_time = get_cycles();\
 -      while (1) {\
 -              sts = op (iommu->reg + offset);\
 -              if (cond)\
 -                      break;\
 -              if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
 -                      panic("DMAR hardware is malfunctioning\n");\
 -              cpu_relax();\
 -      }\
 -}
 -
  static void iommu_set_root_entry(struct intel_iommu *iommu)
  {
        void *addr;
@@@ -966,8 -990,6 +966,8 @@@ static int iommu_init_domains(struct in
                return -ENOMEM;
        }
  
 +      spin_lock_init(&iommu->lock);
 +
        /*
         * if Caching mode is set, then invalid translations are tagged
         * with domainid 0. Hence we need to pre-allocate it.
                set_bit(0, iommu->domain_ids);
        return 0;
  }
 -static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
 -                                      struct dmar_drhd_unit *drhd)
 -{
 -      int ret;
 -      int map_size;
 -      u32 ver;
 -
 -      iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
 -      if (!iommu->reg) {
 -              printk(KERN_ERR "IOMMU: can't map the region\n");
 -              goto error;
 -      }
 -      iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
 -      iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
 -
 -      /* the registers might be more than one page */
 -      map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
 -              cap_max_fault_reg_offset(iommu->cap));
 -      map_size = PAGE_ALIGN_4K(map_size);
 -      if (map_size > PAGE_SIZE_4K) {
 -              iounmap(iommu->reg);
 -              iommu->reg = ioremap(drhd->reg_base_addr, map_size);
 -              if (!iommu->reg) {
 -                      printk(KERN_ERR "IOMMU: can't map the region\n");
 -                      goto error;
 -              }
 -      }
 -
 -      ver = readl(iommu->reg + DMAR_VER_REG);
 -      pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
 -              drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
 -              iommu->cap, iommu->ecap);
 -      ret = iommu_init_domains(iommu);
 -      if (ret)
 -              goto error_unmap;
 -      spin_lock_init(&iommu->lock);
 -      spin_lock_init(&iommu->register_lock);
  
 -      drhd->iommu = iommu;
 -      return iommu;
 -error_unmap:
 -      iounmap(iommu->reg);
 -error:
 -      kfree(iommu);
 -      return NULL;
 -}
  
  static void domain_exit(struct dmar_domain *domain);
 -static void free_iommu(struct intel_iommu *iommu)
 +
 +void free_dmar_iommu(struct intel_iommu *iommu)
  {
        struct dmar_domain *domain;
        int i;
  
 -      if (!iommu)
 -              return;
 -
        i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
        for (; i < cap_ndoms(iommu->cap); ) {
                domain = iommu->domains[i];
  
        /* free context mapping */
        free_context_table(iommu);
 -
 -      if (iommu->reg)
 -              iounmap(iommu->reg);
 -      kfree(iommu);
  }
  
  static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@@ -1353,6 -1426,37 +1353,6 @@@ find_domain(struct pci_dev *pdev
        return NULL;
  }
  
 -static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
 -     struct pci_dev *dev)
 -{
 -      int index;
 -
 -      while (dev) {
 -              for (index = 0; index < cnt; index++)
 -                      if (dev == devices[index])
 -                              return 1;
 -
 -              /* Check our parent */
 -              dev = dev->bus->self;
 -      }
 -
 -      return 0;
 -}
 -
 -static struct dmar_drhd_unit *
 -dmar_find_matched_drhd_unit(struct pci_dev *dev)
 -{
 -      struct dmar_drhd_unit *drhd = NULL;
 -
 -      list_for_each_entry(drhd, &dmar_drhd_units, list) {
 -              if (drhd->include_all || dmar_pci_device_match(drhd->devices,
 -                                              drhd->devices_cnt, dev))
 -                      return drhd;
 -      }
 -
 -      return NULL;
 -}
 -
  /* domain is initialized */
  static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
  {
@@@ -1625,6 -1729,8 +1625,6 @@@ int __init init_dmars(void
         * endfor
         */
        for_each_drhd_unit(drhd) {
 -              if (drhd->ignored)
 -                      continue;
                g_num_of_iommus++;
                /*
                 * lock not needed as this is only incremented in the single
                 */
        }
  
 -      g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
 -      if (!g_iommus) {
 -              ret = -ENOMEM;
 -              goto error;
 -      }
 -
        deferred_flush = kzalloc(g_num_of_iommus *
                sizeof(struct deferred_flush_tables), GFP_KERNEL);
        if (!deferred_flush) {
                goto error;
        }
  
 -      i = 0;
        for_each_drhd_unit(drhd) {
                if (drhd->ignored)
                        continue;
 -              iommu = alloc_iommu(&g_iommus[i], drhd);
 -              i++;
 -              if (!iommu) {
 -                      ret = -ENOMEM;
 +
 +              iommu = drhd->iommu;
 +
 +              ret = iommu_init_domains(iommu);
 +              if (ret)
                        goto error;
 -              }
  
                /*
                 * TBD:
@@@ -1732,6 -1845,7 +1732,6 @@@ error
                iommu = drhd->iommu;
                free_iommu(iommu);
        }
 -      kfree(g_iommus);
        return ret;
  }
  
@@@ -1888,10 -2002,7 +1888,10 @@@ static void flush_unmaps(void
        /* just flush them all */
        for (i = 0; i < g_num_of_iommus; i++) {
                if (deferred_flush[i].next) {
 -                      iommu_flush_iotlb_global(&g_iommus[i], 0);
 +                      struct intel_iommu *iommu =
 +                              deferred_flush[i].domain[0]->iommu;
 +
 +                      iommu_flush_iotlb_global(iommu, 0);
                        for (j = 0; j < deferred_flush[i].next; j++) {
                                __free_iova(&deferred_flush[i].domain[j]->iovad,
                                                deferred_flush[i].iova[j]);
@@@ -1921,8 -2032,7 +1921,8 @@@ static void add_unmap(struct dmar_domai
        if (list_size == HIGH_WATER_MARK)
                flush_unmaps();
  
 -      iommu_id = dom->iommu - g_iommus;
 +      iommu_id = dom->iommu->seq_id;
 +
        next = deferred_flush[iommu_id].next;
        deferred_flush[iommu_id].domain[next] = dom;
        deferred_flush[iommu_id].iova[next] = iova;
@@@ -2238,6 -2348,15 +2238,6 @@@ static void __init iommu_exit_mempool(v
  
  }
  
 -void __init detect_intel_iommu(void)
 -{
 -      if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
 -              return;
 -      if (early_dmar_detect()) {
 -              iommu_detected = 1;
 -      }
 -}
 -
  static void __init init_no_remapping_devices(void)
  {
        struct dmar_drhd_unit *drhd;
@@@ -2284,19 -2403,12 +2284,19 @@@ int __init intel_iommu_init(void
  {
        int ret = 0;
  
 -      if (no_iommu || swiotlb || dmar_disabled)
 -              return -ENODEV;
 -
        if (dmar_table_init())
                return  -ENODEV;
  
 +      if (dmar_dev_scope_init())
 +              return  -ENODEV;
 +
 +      /*
 +       * Check the need for DMA-remapping initialization now.
 +       * Above initialization will also be used by Interrupt-remapping.
 +       */
 +      if (no_iommu || swiotlb || dmar_disabled)
 +              return -ENODEV;
 +
        iommu_init_mempool();
        dmar_init_reserved_ranges();
  
diff --combined include/asm-x86/apic.h
index 300b65e57240902d8b7527724de5c7c6a2938939,133c998161ca4930dae0efe954802a45af2d62d4..51339910fdc0c705d7089f7dff5a9a7bd94d562b
@@@ -9,13 -9,9 +9,11 @@@
  #include <asm/apicdef.h>
  #include <asm/processor.h>
  #include <asm/system.h>
 +#include <asm/cpufeature.h>
 +#include <asm/msr.h>
  
  #define ARCH_APICTIMER_STOPS_ON_C3    1
  
- #define Dprintk printk
  /*
   * Debugging macros
   */
@@@ -51,13 -47,15 +49,13 @@@ extern int disable_apic
  #ifdef CONFIG_PARAVIRT
  #include <asm/paravirt.h>
  #else
 -#define apic_write native_apic_write
 -#define apic_read native_apic_read
  #define setup_boot_clock setup_boot_APIC_clock
  #define setup_secondary_clock setup_secondary_APIC_clock
  #endif
  
  extern int is_vsmp_box(void);
  
 -static inline void native_apic_write(unsigned long reg, u32 v)
 +static inline void native_apic_mem_write(u32 reg, u32 v)
  {
        volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
  
                       ASM_OUTPUT2("0" (v), "m" (*addr)));
  }
  
 -static inline u32 native_apic_read(unsigned long reg)
 +static inline u32 native_apic_mem_read(u32 reg)
  {
        return *((volatile u32 *)(APIC_BASE + reg));
  }
  
 -extern void apic_wait_icr_idle(void);
 -extern u32 safe_apic_wait_icr_idle(void);
 +static inline void native_apic_msr_write(u32 reg, u32 v)
 +{
 +      if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
 +          reg == APIC_LVR)
 +              return;
 +
 +      wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
 +}
 +
 +static inline u32 native_apic_msr_read(u32 reg)
 +{
 +      u32 low, high;
 +
 +      if (reg == APIC_DFR)
 +              return -1;
 +
 +      rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
 +      return low;
 +}
 +
 +#ifndef CONFIG_X86_32
 +extern int x2apic, x2apic_preenabled;
 +extern void check_x2apic(void);
 +extern void enable_x2apic(void);
 +extern void enable_IR_x2apic(void);
 +extern void x2apic_icr_write(u32 low, u32 id);
 +#endif
 +
 +struct apic_ops {
 +      u32 (*read)(u32 reg);
 +      void (*write)(u32 reg, u32 v);
 +      u64 (*icr_read)(void);
 +      void (*icr_write)(u32 low, u32 high);
 +      void (*wait_icr_idle)(void);
 +      u32 (*safe_wait_icr_idle)(void);
 +};
 +
 +extern struct apic_ops *apic_ops;
 +
 +#define apic_read (apic_ops->read)
 +#define apic_write (apic_ops->write)
 +#define apic_icr_read (apic_ops->icr_read)
 +#define apic_icr_write (apic_ops->icr_write)
 +#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
 +#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
 +
  extern int get_physical_broadcast(void);
  
 +#ifdef CONFIG_X86_64
 +static inline void ack_x2APIC_irq(void)
 +{
 +      /* Docs say use 0 for future compatibility */
 +      native_apic_msr_write(APIC_EOI, 0);
 +}
 +#endif
 +
 +
  static inline void ack_APIC_irq(void)
  {
        /*
         */
  
        /* Docs say use 0 for future compatibility */
 +#ifdef CONFIG_X86_32
        apic_write(APIC_EOI, 0);
 +#else
 +      native_apic_mem_write(APIC_EOI, 0);
 +#endif
  }
  
  extern int lapic_get_maxlvt(void);
index 08f89e385a929afa43e7b7e8e458d774b204d453,aec9767836b6cb4a9d90cc31a33d87e7ea4fc774..a8b19ec04ac63778d7d8858b8e6585e6c0f39013
@@@ -200,6 -200,12 +200,6 @@@ struct pv_irq_ops 
  
  struct pv_apic_ops {
  #ifdef CONFIG_X86_LOCAL_APIC
 -      /*
 -       * Direct APIC operations, principally for VMI.  Ideally
 -       * these shouldn't be in this interface.
 -       */
 -      void (*apic_write)(unsigned long reg, u32 v);
 -      u32 (*apic_read)(unsigned long reg);
        void (*setup_boot_clock)(void);
        void (*setup_secondary_clock)(void);
  
@@@ -319,6 -325,15 +319,15 @@@ struct pv_mmu_ops 
                           unsigned long phys, pgprot_t flags);
  };
  
+ struct raw_spinlock;
+ struct pv_lock_ops {
+       int (*spin_is_locked)(struct raw_spinlock *lock);
+       int (*spin_is_contended)(struct raw_spinlock *lock);
+       void (*spin_lock)(struct raw_spinlock *lock);
+       int (*spin_trylock)(struct raw_spinlock *lock);
+       void (*spin_unlock)(struct raw_spinlock *lock);
+ };
  /* This contains all the paravirt structures: we get a convenient
   * number for each function using the offset which we use to indicate
   * what to patch. */
@@@ -329,6 -344,7 +338,7 @@@ struct paravirt_patch_template 
        struct pv_irq_ops pv_irq_ops;
        struct pv_apic_ops pv_apic_ops;
        struct pv_mmu_ops pv_mmu_ops;
+       struct pv_lock_ops pv_lock_ops;
  };
  
  extern struct pv_info pv_info;
@@@ -338,6 -354,7 +348,7 @@@ extern struct pv_cpu_ops pv_cpu_ops
  extern struct pv_irq_ops pv_irq_ops;
  extern struct pv_apic_ops pv_apic_ops;
  extern struct pv_mmu_ops pv_mmu_ops;
+ extern struct pv_lock_ops pv_lock_ops;
  
  #define PARAVIRT_PATCH(x)                                     \
        (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
@@@ -881,6 -898,19 +892,6 @@@ static inline void slow_down_io(void
  }
  
  #ifdef CONFIG_X86_LOCAL_APIC
 -/*
 - * Basic functions accessing APICs.
 - */
 -static inline void apic_write(unsigned long reg, u32 v)
 -{
 -      PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
 -}
 -
 -static inline u32 apic_read(unsigned long reg)
 -{
 -      return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
 -}
 -
  static inline void setup_boot_clock(void)
  {
        PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
@@@ -1349,6 -1379,37 +1360,37 @@@ static inline void __set_fixmap(unsigne
  void _paravirt_nop(void);
  #define paravirt_nop  ((void *)_paravirt_nop)
  
+ void paravirt_use_bytelocks(void);
+ #ifdef CONFIG_SMP
+ static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
+ {
+       return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
+ }
+ static inline int __raw_spin_is_contended(struct raw_spinlock *lock)
+ {
+       return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
+ }
+ static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
+ {
+       PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
+ }
+ static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
+ {
+       return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
+ }
+ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+ {
+       PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
+ }
+ #endif
  /* These all sit in the .parainstructions section to tell us what to patch. */
  struct paravirt_patch_site {
        u8 *instr;              /* original instructions */
@@@ -1371,8 -1432,8 +1413,8 @@@ extern struct paravirt_patch_site __par
   * caller saved registers but the argument parameter */
  #define PV_SAVE_REGS "pushq %%rdi;"
  #define PV_RESTORE_REGS "popq %%rdi;"
- #define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx"
- #define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx"
+ #define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
+ #define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
  #define PV_FLAGS_ARG "D"
  #endif
  
@@@ -1433,6 -1494,7 +1475,7 @@@ static inline unsigned long __raw_local
        return f;
  }
  
  /* Make sure as little as possible of this mess escapes. */
  #undef PARAVIRT_CALL
  #undef __PVOP_CALL
  
  
  #ifdef CONFIG_X86_64
- #define PV_SAVE_REGS   pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx
- #define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
+ #define PV_SAVE_REGS                          \
+       push %rax;                              \
+       push %rcx;                              \
+       push %rdx;                              \
+       push %rsi;                              \
+       push %rdi;                              \
+       push %r8;                               \
+       push %r9;                               \
+       push %r10;                              \
+       push %r11
+ #define PV_RESTORE_REGS                               \
+       pop %r11;                               \
+       pop %r10;                               \
+       pop %r9;                                \
+       pop %r8;                                \
+       pop %rdi;                               \
+       pop %rsi;                               \
+       pop %rdx;                               \
+       pop %rcx;                               \
+       pop %rax
  #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
  #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
  #define PARA_INDIRECT(addr)   *addr(%rip)
diff --combined include/asm-x86/smp.h
index 1896cdb0076a4f90391c40fac796f826bf5896dc,3c877f74f279454cd579cf71530ef3bd051b75ff..59d6cfcc404bd47fe101a0d11e4152729864014d
@@@ -25,6 -25,8 +25,8 @@@ extern cpumask_t cpu_callin_map
  extern void (*mtrr_hook)(void);
  extern void zap_low_mappings(void);
  
+ extern int __cpuinit get_local_pda(int cpu);
  extern int smp_num_siblings;
  extern unsigned int num_processors;
  extern cpumask_t cpu_initialized;
@@@ -163,33 -165,30 +165,33 @@@ extern int safe_smp_processor_id(void)
  
  #ifdef CONFIG_X86_LOCAL_APIC
  
 +#ifndef CONFIG_X86_64
  static inline int logical_smp_processor_id(void)
  {
        /* we don't want to mark this access volatile - bad code generation */
        return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
  }
  
 -#ifndef CONFIG_X86_64
 +#include <mach_apicdef.h>
  static inline unsigned int read_apic_id(void)
  {
 -      return *(u32 *)(APIC_BASE + APIC_ID);
 +      unsigned int reg;
 +
 +      reg = *(u32 *)(APIC_BASE + APIC_ID);
 +
 +      return GET_APIC_ID(reg);
  }
 -#else
 -extern unsigned int read_apic_id(void);
  #endif
  
  
 -# ifdef APIC_DEFINITION
 +# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
  extern int hard_smp_processor_id(void);
  # else
 -#  include <mach_apicdef.h>
 +#include <mach_apicdef.h>
  static inline int hard_smp_processor_id(void)
  {
        /* we don't want to mark this access volatile - bad code generation */
 -      return GET_APIC_ID(read_apic_id());
 +      return read_apic_id();
  }
  # endif /* APIC_DEFINITION */