]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'linus' into x86/x2apic
authorIngo Molnar <mingo@elte.hu>
Fri, 18 Jul 2008 20:50:34 +0000 (22:50 +0200)
committerIngo Molnar <mingo@elte.hu>
Fri, 18 Jul 2008 20:50:34 +0000 (22:50 +0200)
52 files changed:
Documentation/kernel-parameters.txt
arch/x86/Kconfig
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/apic_32.c
arch/x86/kernel/apic_64.c
arch/x86/kernel/cpu/common_64.c
arch/x86/kernel/cpu/feature_names.c
arch/x86/kernel/genapic_64.c
arch/x86/kernel/genapic_flat_64.c
arch/x86/kernel/genx2apic_cluster.c [new file with mode: 0644]
arch/x86/kernel/genx2apic_phys.c [new file with mode: 0644]
arch/x86/kernel/genx2apic_uv_x.c
arch/x86/kernel/i8259.c
arch/x86/kernel/io_apic_32.c
arch/x86/kernel/io_apic_64.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/vmi_32.c
arch/x86/lguest/boot.c
arch/x86/mach-generic/bigsmp.c
arch/x86/mach-generic/es7000.c
arch/x86/mach-generic/numaq.c
arch/x86/mach-generic/summit.c
arch/x86/xen/enlighten.c
drivers/pci/Makefile
drivers/pci/dma_remapping.h [new file with mode: 0644]
drivers/pci/dmar.c
drivers/pci/intel-iommu.c
drivers/pci/intel-iommu.h
drivers/pci/intr_remapping.c [new file with mode: 0644]
drivers/pci/intr_remapping.h [new file with mode: 0644]
include/asm-x86/apic.h
include/asm-x86/apicdef.h
include/asm-x86/cpufeature.h
include/asm-x86/genapic_64.h
include/asm-x86/hw_irq.h
include/asm-x86/i8259.h
include/asm-x86/io_apic.h
include/asm-x86/ipi.h
include/asm-x86/irq_remapping.h [new file with mode: 0644]
include/asm-x86/mach-default/mach_apic.h
include/asm-x86/mach-default/mach_apicdef.h
include/asm-x86/mach-es7000/mach_apic.h
include/asm-x86/msidef.h
include/asm-x86/paravirt.h
include/asm-x86/smp.h
include/linux/dmar.h
include/linux/irq.h
kernel/irq/manage.c

index 09ad7450647bc81dff32a3eaf7ea3c0858f4a896..556b4187d016bb7eba08de99d4ffa173f44313b1 100644 (file)
@@ -1388,6 +1388,12 @@ and is between 256 and 4096 characters. It is defined in the file
 
        nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
 
+       nox2apic        [X86-64,APIC] Do not enable x2APIC mode.
+
+       x2apic_phys     [X86-64,APIC] Use x2apic physical mode instead of
+                       default x2apic cluster mode on platforms
+                       supporting x2apic.
+
        noltlbs         [PPC] Do not use large page/tlb entries for kernel
                        lowmem mapping on PPC40x.
 
index 96e0c2ebc3885713a5d6290f5e8eb959d0d0d36e..baca5545500548d4f1e65c7083d9f5e0b9f89c4c 100644 (file)
@@ -1650,6 +1650,14 @@ config DMAR_FLOPPY_WA
         workaround will setup a 1:1 mapping for the first
         16M to make floppy (an ISA device) work.
 
+config INTR_REMAP
+       bool "Support for Interrupt Remapping (EXPERIMENTAL)"
+       depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
+       help
+        Supports Interrupt remapping for IO-APIC and MSI devices.
+        To use x2apic mode in the CPU's which support x2APIC enhancements or
+        to support platforms with CPU's having > 8 bit APIC ID, say Y.
+
 source "drivers/pci/pcie/Kconfig"
 
 source "drivers/pci/Kconfig"
index da140611bb57593ed401a5de6ee63a84cc408349..673f1d12b420bfc85a52e42258d44365d6c45fa9 100644 (file)
@@ -102,6 +102,8 @@ obj-$(CONFIG_OLPC)          += olpc.o
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
         obj-y                          += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
+        obj-y                          += genx2apic_cluster.o
+        obj-y                          += genx2apic_phys.o
         obj-$(CONFIG_X86_PM_TIMER)     += pmtimer_64.o
         obj-$(CONFIG_AUDIT)            += audit_64.o
 
index f489d7a9be92ffd00c662c4b18554637e4311406..b41b27af33e6cb9cf31e8196397f2ab9f17fc1f4 100644 (file)
@@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address)
 
        set_fixmap_nocache(FIX_APIC_BASE, address);
        if (boot_cpu_physical_apicid == -1U) {
-               boot_cpu_physical_apicid  = GET_APIC_ID(read_apic_id());
+               boot_cpu_physical_apicid  = read_apic_id();
 #ifdef CONFIG_X86_32
                apic_version[boot_cpu_physical_apicid] =
                         GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void)
                                acpi_ioapic = 1;
 
                                smp_found_config = 1;
+#ifdef CONFIG_X86_32
                                setup_apic_routing();
+#endif
                        }
                }
                if (error == -EINVAL) {
index a437d027f20b6d8d7ba3dc88400220e796afe41e..34101962fb0ed02828acfdfe3d18a29eddd135e2 100644 (file)
@@ -145,13 +145,18 @@ static int modern_apic(void)
        return lapic_get_version() >= 0x14;
 }
 
-void apic_wait_icr_idle(void)
+/*
+ * Paravirt kernels also might be using these below ops. So we still
+ * use generic apic_read()/apic_write(), which might be pointing to different
+ * ops in PARAVIRT case.
+ */
+void xapic_wait_icr_idle(void)
 {
        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
                cpu_relax();
 }
 
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
 {
        u32 send_status;
        int timeout;
@@ -167,6 +172,35 @@ u32 safe_apic_wait_icr_idle(void)
        return send_status;
 }
 
+void xapic_icr_write(u32 low, u32 id)
+{
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+       apic_write_around(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+       u32 icr1, icr2;
+
+       icr2 = apic_read(APIC_ICR2);
+       icr1 = apic_read(APIC_ICR);
+
+       return icr1 | ((u64)icr2 << 32);
+}
+
+static struct apic_ops xapic_ops = {
+       .read = native_apic_mem_read,
+       .write = native_apic_mem_write,
+       .write_atomic = native_apic_mem_write_atomic,
+       .icr_read = xapic_icr_read,
+       .icr_write = xapic_icr_write,
+       .wait_icr_idle = xapic_wait_icr_idle,
+       .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+EXPORT_SYMBOL_GPL(apic_ops);
+
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
@@ -1201,7 +1235,7 @@ void __init init_apic_mappings(void)
         * default configuration (or the MP table is broken).
         */
        if (boot_cpu_physical_apicid == -1U)
-               boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+               boot_cpu_physical_apicid = read_apic_id();
 
 }
 
@@ -1241,7 +1275,7 @@ int __init APIC_init_uniprocessor(void)
         * might be zero if read from MP tables. Get it from LAPIC.
         */
 #ifdef CONFIG_CRASH_DUMP
-       boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+       boot_cpu_physical_apicid = read_apic_id();
 #endif
        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 
index 1e3d32e27c14c23a8d48d1dc6bcf8b30faeefae7..c75f58a66d8e16b895d22f347965463cb839db3d 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/module.h>
+#include <linux/dmar.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -39,6 +40,7 @@
 #include <asm/proto.h>
 #include <asm/timex.h>
 #include <asm/apic.h>
+#include <asm/i8259.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
 static int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
 int disable_apic;
+int disable_x2apic;
+int x2apic;
+
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
 
 /* Local APIC timer works in C2 */
 int local_apic_timer_c2_ok;
@@ -119,13 +126,13 @@ static int modern_apic(void)
        return lapic_get_version() >= 0x14;
 }
 
-void apic_wait_icr_idle(void)
+void xapic_wait_icr_idle(void)
 {
        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
                cpu_relax();
 }
 
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
 {
        u32 send_status;
        int timeout;
@@ -141,6 +148,71 @@ u32 safe_apic_wait_icr_idle(void)
        return send_status;
 }
 
+void xapic_icr_write(u32 low, u32 id)
+{
+       apic_write(APIC_ICR2, id << 24);
+       apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+       u32 icr1, icr2;
+
+       icr2 = apic_read(APIC_ICR2);
+       icr1 = apic_read(APIC_ICR);
+
+       return (icr1 | ((u64)icr2 << 32));
+}
+
+static struct apic_ops xapic_ops = {
+       .read = native_apic_mem_read,
+       .write = native_apic_mem_write,
+       .write_atomic = native_apic_mem_write_atomic,
+       .icr_read = xapic_icr_read,
+       .icr_write = xapic_icr_write,
+       .wait_icr_idle = xapic_wait_icr_idle,
+       .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+
+EXPORT_SYMBOL_GPL(apic_ops);
+
+static void x2apic_wait_icr_idle(void)
+{
+       /* no need to wait for icr idle in x2apic */
+       return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+       /* no need to wait for icr idle in x2apic */
+       return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+       wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+       unsigned long val;
+
+       rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+       return val;
+}
+
+static struct apic_ops x2apic_ops = {
+       .read = native_apic_msr_read,
+       .write = native_apic_msr_write,
+       .write_atomic = native_apic_msr_write,
+       .icr_read = x2apic_icr_read,
+       .icr_write = x2apic_icr_write,
+       .wait_icr_idle = x2apic_wait_icr_idle,
+       .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
@@ -626,10 +698,10 @@ int __init verify_local_APIC(void)
        /*
         * The ID register is read/write in a real APIC.
         */
-       reg0 = read_apic_id();
+       reg0 = apic_read(APIC_ID);
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
        apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-       reg1 = read_apic_id();
+       reg1 = apic_read(APIC_ID);
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
        apic_write(APIC_ID, reg0);
        if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -830,6 +902,125 @@ void __cpuinit end_local_APIC_setup(void)
        apic_pm_activate();
 }
 
+void check_x2apic(void)
+{
+       int msr, msr2;
+
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+       if (msr & X2APIC_ENABLE) {
+               printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+               x2apic_preenabled = x2apic = 1;
+               apic_ops = &x2apic_ops;
+       }
+}
+
+void enable_x2apic(void)
+{
+       int msr, msr2;
+
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+       if (!(msr & X2APIC_ENABLE)) {
+               printk("Enabling x2apic\n");
+               wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+       }
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+       int ret;
+       unsigned long flags;
+
+       if (!cpu_has_x2apic)
+               return;
+
+       if (!x2apic_preenabled && disable_x2apic) {
+               printk(KERN_INFO
+                      "Skipped enabling x2apic and Interrupt-remapping "
+                      "because of nox2apic\n");
+               return;
+       }
+
+       if (x2apic_preenabled && disable_x2apic)
+               panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+       if (!x2apic_preenabled && skip_ioapic_setup) {
+               printk(KERN_INFO
+                      "Skipped enabling x2apic and Interrupt-remapping "
+                      "because of skipping io-apic setup\n");
+               return;
+       }
+
+       ret = dmar_table_init();
+       if (ret) {
+               printk(KERN_INFO
+                      "dmar_table_init() failed with %d:\n", ret);
+
+               if (x2apic_preenabled)
+                       panic("x2apic enabled by bios. But IR enabling failed");
+               else
+                       printk(KERN_INFO
+                              "Not enabling x2apic,Intr-remapping\n");
+               return;
+       }
+
+       local_irq_save(flags);
+       mask_8259A();
+       save_mask_IO_APIC_setup();
+
+       ret = enable_intr_remapping(1);
+
+       if (ret && x2apic_preenabled) {
+               local_irq_restore(flags);
+               panic("x2apic enabled by bios. But IR enabling failed");
+       }
+
+       if (ret)
+               goto end;
+
+       if (!x2apic) {
+               x2apic = 1;
+               apic_ops = &x2apic_ops;
+               enable_x2apic();
+       }
+end:
+       if (ret)
+               /*
+                * IR enabling failed
+                */
+               restore_IO_APIC_setup();
+       else
+               reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+       unmask_8259A();
+       local_irq_restore(flags);
+
+       if (!ret) {
+               if (!x2apic_preenabled)
+                       printk(KERN_INFO
+                              "Enabled x2apic and interrupt-remapping\n");
+               else
+                       printk(KERN_INFO
+                              "Enabled Interrupt-remapping\n");
+       } else
+               printk(KERN_ERR
+                      "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+       if (!cpu_has_x2apic)
+               return;
+
+       if (x2apic_preenabled)
+               panic("x2apic enabled prior OS handover,"
+                     " enable CONFIG_INTR_REMAP");
+
+       printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+              " and x2apic\n");
+#endif
+
+       return;
+}
+
 /*
  * Detect and enable local APICs on non-SMP boards.
  * Original code written by Keir Fraser.
@@ -869,7 +1060,7 @@ void __init early_init_lapic_mapping(void)
         * Fetch the APIC ID of the BSP in case we have a
         * default configuration (or the MP table is broken).
         */
-       boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+       boot_cpu_physical_apicid = read_apic_id();
 }
 
 /**
@@ -877,6 +1068,11 @@ void __init early_init_lapic_mapping(void)
  */
 void __init init_apic_mappings(void)
 {
+       if (x2apic) {
+               boot_cpu_physical_apicid = read_apic_id();
+               return;
+       }
+
        /*
         * If no local APIC can be found then set up a fake all
         * zeroes page to simulate the local APIC and another
@@ -896,7 +1092,7 @@ void __init init_apic_mappings(void)
         * Fetch the APIC ID of the BSP in case we have a
         * default configuration (or the MP table is broken).
         */
-       boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+       boot_cpu_physical_apicid = read_apic_id();
 }
 
 /*
@@ -915,6 +1111,9 @@ int __init APIC_init_uniprocessor(void)
                return -1;
        }
 
+       enable_IR_x2apic();
+       setup_apic_routing();
+
        verify_local_APIC();
 
        connect_bsp_APIC();
@@ -1096,6 +1295,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
        cpu_set(cpu, cpu_present_map);
 }
 
+int hard_smp_processor_id(void)
+{
+       return read_apic_id();
+}
+
 /*
  * Power management
  */
@@ -1132,7 +1336,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 
        maxlvt = lapic_get_maxlvt();
 
-       apic_pm_state.apic_id = read_apic_id();
+       apic_pm_state.apic_id = apic_read(APIC_ID);
        apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
        apic_pm_state.apic_ldr = apic_read(APIC_LDR);
        apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1167,10 +1371,14 @@ static int lapic_resume(struct sys_device *dev)
        maxlvt = lapic_get_maxlvt();
 
        local_irq_save(flags);
-       rdmsr(MSR_IA32_APICBASE, l, h);
-       l &= ~MSR_IA32_APICBASE_BASE;
-       l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-       wrmsr(MSR_IA32_APICBASE, l, h);
+       if (!x2apic) {
+               rdmsr(MSR_IA32_APICBASE, l, h);
+               l &= ~MSR_IA32_APICBASE_BASE;
+               l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+               wrmsr(MSR_IA32_APICBASE, l, h);
+       } else
+               enable_x2apic();
+
        apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
        apic_write(APIC_ID, apic_pm_state.apic_id);
        apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1310,6 +1518,15 @@ __cpuinit int apic_is_clustered_box(void)
        return (clusters > 2);
 }
 
+static __init int setup_nox2apic(char *str)
+{
+       disable_x2apic = 1;
+       clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
+       return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+
+
 /*
  * APIC command line parameters
  */
index 7b8cc72feb40e3ed8bfd02437fe3b6324b024f67..c6bee77ca9e6797fbd87f292aefb2f9dc4c0c892 100644 (file)
@@ -608,6 +608,8 @@ void __cpuinit cpu_init(void)
        barrier();
 
        check_efer();
+       if (cpu != 0 && x2apic)
+               enable_x2apic();
 
        /*
         * set up and load the per-CPU TSS
index e43ad4ad4cbae8b75561f67ec1da51628c568491..0bf4d37a04833e5b5532fa497453eb6410938130 100644 (file)
@@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
        /* Intel-defined (#2) */
        "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
        "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
-       NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
+       NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt",
        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 
        /* VIA/Cyrix/Centaur-defined */
index 1fa8be5bd217b5c45a649e6745b847567a9553f6..3940d8161f8bb699f9613a469cfb10ae3e690d78 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/hardirq.h>
+#include <linux/dmar.h>
 
 #include <asm/smp.h>
 #include <asm/ipi.h>
@@ -29,6 +30,15 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
 
 struct genapic __read_mostly *genapic = &apic_flat;
 
+static int x2apic_phys = 0;
+
+static int set_x2apic_phys_mode(char *arg)
+{
+       x2apic_phys = 1;
+       return 0;
+}
+early_param("x2apic_phys", set_x2apic_phys_mode);
+
 static enum uv_system_type uv_system_type;
 
 /*
@@ -38,7 +48,12 @@ void __init setup_apic_routing(void)
 {
        if (uv_system_type == UV_NON_UNIQUE_APIC)
                genapic = &apic_x2apic_uv_x;
-       else
+       else if (cpu_has_x2apic && intr_remapping_enabled) {
+               if (x2apic_phys)
+                       genapic = &apic_x2apic_phys;
+               else
+                       genapic = &apic_x2apic_cluster;
+       } else
 #ifdef CONFIG_ACPI
        /*
         * Quirk: some x86_64 machines can only use physical APIC mode
@@ -61,7 +76,7 @@ void __init setup_apic_routing(void)
 
 /* Same for both flat and physical. */
 
-void send_IPI_self(int vector)
+void apic_send_IPI_self(int vector)
 {
        __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
 }
@@ -79,17 +94,6 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
        return 0;
 }
 
-unsigned int read_apic_id(void)
-{
-       unsigned int id;
-
-       WARN_ON(preemptible() && num_online_cpus() > 1);
-       id = apic_read(APIC_ID);
-       if (uv_system_type >= UV_X2APIC)
-               id  |= __get_cpu_var(x2apic_extra_bits);
-       return id;
-}
-
 enum uv_system_type get_uv_system_type(void)
 {
        return uv_system_type;
index 1a9c68845ee8c8538e6b6e6f1c04d5d4a19fb9be..2c973cbf054fe9e8f2d5c2b28248cd845d562f4a 100644 (file)
 #include <linux/kernel.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
+#include <linux/hardirq.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
+#include <mach_apicdef.h>
 
 static cpumask_t flat_target_cpus(void)
 {
@@ -95,9 +97,33 @@ static void flat_send_IPI_all(int vector)
                __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
 
+static unsigned int get_apic_id(unsigned long x)
+{
+       unsigned int id;
+
+       id = (((x)>>24) & 0xFFu);
+       return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+       unsigned long x;
+
+       x = ((id & 0xFFu)<<24);
+       return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+       unsigned int id;
+
+       id = get_apic_id(apic_read(APIC_ID));
+       return id;
+}
+
 static int flat_apic_id_registered(void)
 {
-       return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
+       return physid_isset(read_xapic_id(), phys_cpu_present_map);
 }
 
 static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -121,8 +147,12 @@ struct genapic apic_flat =  {
        .send_IPI_all = flat_send_IPI_all,
        .send_IPI_allbutself = flat_send_IPI_allbutself,
        .send_IPI_mask = flat_send_IPI_mask,
+       .send_IPI_self = apic_send_IPI_self,
        .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
        .phys_pkg_id = phys_pkg_id,
+       .get_apic_id = get_apic_id,
+       .set_apic_id = set_apic_id,
+       .apic_id_mask = (0xFFu<<24),
 };
 
 /*
@@ -185,6 +215,10 @@ struct genapic apic_physflat =  {
        .send_IPI_all = physflat_send_IPI_all,
        .send_IPI_allbutself = physflat_send_IPI_allbutself,
        .send_IPI_mask = physflat_send_IPI_mask,
+       .send_IPI_self = apic_send_IPI_self,
        .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
        .phys_pkg_id = phys_pkg_id,
+       .get_apic_id = get_apic_id,
+       .set_apic_id = set_apic_id,
+       .apic_id_mask = (0xFFu<<24),
 };
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644 (file)
index 0000000..40bc014
--- /dev/null
@@ -0,0 +1,153 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+
+DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static cpumask_t x2apic_target_cpus(void)
+{
+       return cpumask_of_cpu(0);
+}
+
+/*
+ * for now each logical cpu is in its own vector allocation domain.
+ */
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
+{
+       cpumask_t domain = CPU_MASK_NONE;
+       cpu_set(cpu, domain);
+       return domain;
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+                                  unsigned int dest)
+{
+       unsigned long cfg;
+
+       cfg = __prepare_ICR(0, vector, dest);
+
+       /*
+        * send the IPI.
+        */
+       x2apic_icr_write(cfg, apicid);
+}
+
+/*
+ * for now, we send the IPI's one by one in the cpumask.
+ * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
+ * at once. We have 16 cpu's in a cluster. This will minimize IPI register
+ * writes.
+ */
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+{
+       unsigned long flags;
+       unsigned long query_cpu;
+
+       local_irq_save(flags);
+       for_each_cpu_mask(query_cpu, mask) {
+               __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+                                      vector, APIC_DEST_LOGICAL);
+       }
+       local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+       cpumask_t mask = cpu_online_map;
+
+       cpu_clear(smp_processor_id(), mask);
+
+       if (!cpus_empty(mask))
+               x2apic_send_IPI_mask(mask, vector);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+       x2apic_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+       return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       cpu = first_cpu(cpumask);
+       if ((unsigned)cpu < NR_CPUS)
+               return per_cpu(x86_cpu_to_logical_apicid, cpu);
+       else
+               return BAD_APICID;
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+       unsigned int id;
+
+       id = x;
+       return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+       unsigned long x;
+
+       x = id;
+       return x;
+}
+
+static unsigned int x2apic_read_id(void)
+{
+       return apic_read(APIC_ID);
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+       return x2apic_read_id() >> index_msb;
+}
+
+static void x2apic_send_IPI_self(int vector)
+{
+       apic_write(APIC_SELF_IPI, vector);
+}
+
+static void init_x2apic_ldr(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
+       return;
+}
+
+struct genapic apic_x2apic_cluster = {
+       .name = "cluster x2apic",
+       .int_delivery_mode = dest_LowestPrio,
+       .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+       .target_cpus = x2apic_target_cpus,
+       .vector_allocation_domain = x2apic_vector_allocation_domain,
+       .apic_id_registered = x2apic_apic_id_registered,
+       .init_apic_ldr = init_x2apic_ldr,
+       .send_IPI_all = x2apic_send_IPI_all,
+       .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+       .send_IPI_mask = x2apic_send_IPI_mask,
+       .send_IPI_self = x2apic_send_IPI_self,
+       .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+       .phys_pkg_id = phys_pkg_id,
+       .get_apic_id = get_apic_id,
+       .set_apic_id = set_apic_id,
+       .apic_id_mask = (0xFFFFFFFFu),
+};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644 (file)
index 0000000..2f3c6ca
--- /dev/null
@@ -0,0 +1,140 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static cpumask_t x2apic_target_cpus(void)
+{
+       return cpumask_of_cpu(0);
+}
+
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
+{
+       cpumask_t domain = CPU_MASK_NONE;
+       cpu_set(cpu, domain);
+       return domain;
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+                                  unsigned int dest)
+{
+       unsigned long cfg;
+
+       cfg = __prepare_ICR(0, vector, dest);
+
+       /*
+        * send the IPI.
+        */
+       x2apic_icr_write(cfg, apicid);
+}
+
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+{
+       unsigned long flags;
+       unsigned long query_cpu;
+
+       local_irq_save(flags);
+       for_each_cpu_mask(query_cpu, mask) {
+               __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+                                      vector, APIC_DEST_PHYSICAL);
+       }
+       local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+       cpumask_t mask = cpu_online_map;
+
+       cpu_clear(smp_processor_id(), mask);
+
+       if (!cpus_empty(mask))
+               x2apic_send_IPI_mask(mask, vector);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+       x2apic_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+       return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       cpu = first_cpu(cpumask);
+       if ((unsigned)cpu < NR_CPUS)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+       else
+               return BAD_APICID;
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+       unsigned int id;
+
+       id = x;
+       return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+       unsigned long x;
+
+       x = id;
+       return x;
+}
+
+static unsigned int x2apic_read_id(void)
+{
+       return apic_read(APIC_ID);
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+       return x2apic_read_id() >> index_msb;
+}
+
+void x2apic_send_IPI_self(int vector)
+{
+       apic_write(APIC_SELF_IPI, vector);
+}
+
+void init_x2apic_ldr(void)
+{
+       return;
+}
+
+struct genapic apic_x2apic_phys = {
+       .name = "physical x2apic",
+       .int_delivery_mode = dest_Fixed,
+       .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+       .target_cpus = x2apic_target_cpus,
+       .vector_allocation_domain = x2apic_vector_allocation_domain,
+       .apic_id_registered = x2apic_apic_id_registered,
+       .init_apic_ldr = init_x2apic_ldr,
+       .send_IPI_all = x2apic_send_IPI_all,
+       .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+       .send_IPI_mask = x2apic_send_IPI_mask,
+       .send_IPI_self = x2apic_send_IPI_self,
+       .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+       .phys_pkg_id = phys_pkg_id,
+       .get_apic_id = get_apic_id,
+       .set_apic_id = set_apic_id,
+       .apic_id_mask = (0xFFFFFFFFu),
+};
index 711f11c30b060dd17bc86be21b439d2068040489..3ca29cd8c23c916c37d805b34142886d13e77d55 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/sched.h>
 #include <linux/bootmem.h>
 #include <linux/module.h>
+#include <linux/hardirq.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
@@ -119,6 +120,10 @@ static int uv_apic_id_registered(void)
        return 1;
 }
 
+static void uv_init_apic_ldr(void)
+{
+}
+
 static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
 {
        int cpu;
@@ -134,9 +139,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
                return BAD_APICID;
 }
 
+static unsigned int get_apic_id(unsigned long x)
+{
+       unsigned int id;
+
+       WARN_ON(preemptible() && num_online_cpus() > 1);
+       id = x | __get_cpu_var(x2apic_extra_bits);
+
+       return id;
+}
+
+static long set_apic_id(unsigned int id)
+{
+       unsigned long x;
+
+       /* maskout x2apic_extra_bits ? */
+       x = id;
+       return x;
+}
+
+static unsigned int uv_read_apic_id(void)
+{
+
+       return get_apic_id(apic_read(APIC_ID));
+}
+
 static unsigned int phys_pkg_id(int index_msb)
 {
-       return GET_APIC_ID(read_apic_id()) >> index_msb;
+       return uv_read_apic_id() >> index_msb;
 }
 
 #ifdef ZZZ             /* Needs x2apic patch */
@@ -153,12 +183,16 @@ struct genapic apic_x2apic_uv_x = {
        .target_cpus = uv_target_cpus,
        .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
        .apic_id_registered = uv_apic_id_registered,
+       .init_apic_ldr = uv_init_apic_ldr,
        .send_IPI_all = uv_send_IPI_all,
        .send_IPI_allbutself = uv_send_IPI_allbutself,
        .send_IPI_mask = uv_send_IPI_mask,
        /* ZZZ.send_IPI_self = uv_send_IPI_self, */
        .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
        .phys_pkg_id = phys_pkg_id,     /* Fixme ZZZ */
+       .get_apic_id = get_apic_id,
+       .set_apic_id = set_apic_id,
+       .apic_id_mask = (0xFFFFFFFFu),
 };
 
 static __cpuinit void set_x2apic_extra_bits(int pnode)
index dc92b49d9204d59d181bcaf01c434446d55de39a..4b8a53d841f7a1c71e75eb85a9fe4ecacb17d4b3 100644 (file)
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
 
 device_initcall(i8259A_init_sysfs);
 
+void mask_8259A(void)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+
+       outb(0xff, PIC_MASTER_IMR);     /* mask all of 8259A-1 */
+       outb(0xff, PIC_SLAVE_IMR);      /* mask all of 8259A-2 */
+
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void unmask_8259A(void)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+
+       outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+       outb(cached_slave_mask, PIC_SLAVE_IMR);   /* restore slave IRQ mask */
+
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
 void init_8259A(int auto_eoi)
 {
        unsigned long flags;
index 558abf4c796afa0d7dd7ad2622e3bd42f28e8d39..a82065b0699ec167cc8b75c72e58ffa65f41b94d 100644 (file)
@@ -1494,7 +1494,7 @@ void /*__init*/ print_local_APIC(void *dummy)
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
        printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
-                       GET_APIC_ID(read_apic_id()));
+                       GET_APIC_ID(v));
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
@@ -1702,8 +1702,7 @@ void disable_IO_APIC(void)
                entry.dest_mode       = 0; /* Physical */
                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                entry.vector          = 0;
-               entry.dest.physical.physical_dest =
-                                       GET_APIC_ID(read_apic_id());
+               entry.dest.physical.physical_dest = read_apic_id();
 
                /*
                 * Add it to the IO-APIC irq-routing table:
index 6510cde36b3549149eabefa4aaf724c72e5a0959..39f0be37e9a16b7f0b10df3af743b6338f0532fa 100644 (file)
@@ -37,6 +37,7 @@
 #include <acpi/acpi_bus.h>
 #endif
 #include <linux/bootmem.h>
+#include <linux/dmar.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -48,6 +49,7 @@
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/irq_remapping.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
@@ -107,6 +109,9 @@ DEFINE_SPINLOCK(vector_lock);
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
+/* I/O APIC RTE contents at the OS boot up */
+struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
 /* I/O APIC entries */
 struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
@@ -302,7 +307,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
                pin = entry->pin;
                if (pin == -1)
                        break;
-               io_apic_write(apic, 0x11 + pin*2, dest);
+               /*
+                * With interrupt-remapping, destination information comes
+                * from interrupt-remapping table entry.
+                */
+               if (!irq_remapped(irq))
+                       io_apic_write(apic, 0x11 + pin*2, dest);
                reg = io_apic_read(apic, 0x10 + pin*2);
                reg &= ~IO_APIC_REDIR_VECTOR_MASK;
                reg |= vector;
@@ -439,6 +449,69 @@ static void clear_IO_APIC (void)
                        clear_IO_APIC_pin(apic, pin);
 }
 
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+       union IO_APIC_reg_01 reg_01;
+       unsigned long flags;
+       int apic, pin;
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_01.raw = io_apic_read(apic, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               early_ioapic_entries[apic] =
+                       kzalloc(sizeof(struct IO_APIC_route_entry) *
+                               nr_ioapic_registers[apic], GFP_KERNEL);
+               if (!early_ioapic_entries[apic])
+                       return -ENOMEM;
+       }
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+
+                       entry = early_ioapic_entries[apic][pin] =
+                               ioapic_read_entry(apic, pin);
+                       if (!entry.mask) {
+                               entry.mask = 1;
+                               ioapic_write_entry(apic, pin, entry);
+                       }
+               }
+       return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+       int apic, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       ioapic_write_entry(apic, pin,
+                                          early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+       /*
+        * for now plain restore of previous settings.
+        * TBD: In the case of OS enabling interrupt-remapping,
+        * IO-APIC RTE's need to be setup to point to interrupt-remapping
+        * table entries. for now, do a plain restore, and wait for
+        * the setup_IO_APIC_irqs() to do proper initialization.
+        */
+       restore_IO_APIC_setup();
+}
+
 int skip_ioapic_setup;
 int ioapic_force;
 
@@ -833,18 +906,98 @@ void setup_vector_irq(int cpu)
 
 
 static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
 
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
-       if (trigger) {
+       if (trigger)
                irq_desc[irq].status |= IRQ_LEVEL;
-               set_irq_chip_and_handler_name(irq, &ioapic_chip,
-                                             handle_fasteoi_irq, "fasteoi");
-       } else {
+       else
                irq_desc[irq].status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+               if (trigger)
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_fasteoi_irq,
+                                                    "fasteoi");
+               else
+                       set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+                                                     handle_edge_irq, "edge");
+               return;
+       }
+#endif
+       if (trigger)
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                             handle_fasteoi_irq,
+                                             "fasteoi");
+       else
                set_irq_chip_and_handler_name(irq, &ioapic_chip,
                                              handle_edge_irq, "edge");
+}
+
+static int setup_ioapic_entry(int apic, int irq,
+                             struct IO_APIC_route_entry *entry,
+                             unsigned int destination, int trigger,
+                             int polarity, int vector)
+{
+       /*
+        * add it to the IO-APIC irq-routing table:
+        */
+       memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+       if (intr_remapping_enabled) {
+               struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+               struct irte irte;
+               struct IR_IO_APIC_route_entry *ir_entry =
+                       (struct IR_IO_APIC_route_entry *) entry;
+               int index;
+
+               if (!iommu)
+                       panic("No mapping iommu for ioapic %d\n", apic);
+
+               index = alloc_irte(iommu, irq, 1);
+               if (index < 0)
+                       panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+               memset(&irte, 0, sizeof(irte));
+
+               irte.present = 1;
+               irte.dst_mode = INT_DEST_MODE;
+               irte.trigger_mode = trigger;
+               irte.dlvry_mode = INT_DELIVERY_MODE;
+               irte.vector = vector;
+               irte.dest_id = IRTE_DEST(destination);
+
+               modify_irte(irq, &irte);
+
+               ir_entry->index2 = (index >> 15) & 0x1;
+               ir_entry->zero = 0;
+               ir_entry->format = 1;
+               ir_entry->index = (index & 0x7fff);
+       } else
+#endif
+       {
+               entry->delivery_mode = INT_DELIVERY_MODE;
+               entry->dest_mode = INT_DEST_MODE;
+               entry->dest = destination;
        }
+
+       entry->mask = 0;                                /* enable IRQ */
+       entry->trigger = trigger;
+       entry->polarity = polarity;
+       entry->vector = vector;
+
+       /* Mask level triggered irqs.
+        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+        */
+       if (trigger)
+               entry->mask = 1;
+       return 0;
 }
 
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -869,24 +1022,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
                    irq, trigger, polarity);
 
-       /*
-        * add it to the IO-APIC irq-routing table:
-        */
-       memset(&entry,0,sizeof(entry));
 
-       entry.delivery_mode = INT_DELIVERY_MODE;
-       entry.dest_mode = INT_DEST_MODE;
-       entry.dest = cpu_mask_to_apicid(mask);
-       entry.mask = 0;                         /* enable IRQ */
-       entry.trigger = trigger;
-       entry.polarity = polarity;
-       entry.vector = cfg->vector;
-
-       /* Mask level triggered irqs.
-        * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
-        */
-       if (trigger)
-               entry.mask = 1;
+       if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+                              cpu_mask_to_apicid(mask), trigger, polarity,
+                              cfg->vector)) {
+               printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+                      mp_ioapics[apic].mp_apicid, pin);
+               __clear_irq_vector(irq);
+               return;
+       }
 
        ioapic_register_intr(irq, trigger);
        if (irq < 16)
@@ -938,6 +1082,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
        struct IO_APIC_route_entry entry;
 
+       if (intr_remapping_enabled)
+               return;
+
        memset(&entry, 0, sizeof(entry));
 
        /*
@@ -1084,6 +1231,7 @@ static __apicdebuginit void print_APIC_bitfield (int base)
 void __apicdebuginit print_local_APIC(void * dummy)
 {
        unsigned int v, ver, maxlvt;
+       unsigned long icr;
 
        if (apic_verbosity == APIC_QUIET)
                return;
@@ -1091,7 +1239,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
        printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
-       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
@@ -1127,10 +1275,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
        v = apic_read(APIC_ESR);
        printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 
-       v = apic_read(APIC_ICR);
-       printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
-       v = apic_read(APIC_ICR2);
-       printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+       icr = apic_icr_read();
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
 
        v = apic_read(APIC_LVTT);
        printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1285,7 +1432,7 @@ void disable_IO_APIC(void)
                entry.dest_mode       = 0; /* Physical */
                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                entry.vector          = 0;
-               entry.dest          = GET_APIC_ID(read_apic_id());
+               entry.dest            = read_apic_id();
 
                /*
                 * Add it to the IO-APIC irq-routing table:
@@ -1393,6 +1540,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
  */
 
 #ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little  bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
+{
+       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_desc *desc = irq_desc + irq;
+       cpumask_t tmp, cleanup_mask;
+       struct irte irte;
+       int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+       unsigned int dest;
+       unsigned long flags;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               return;
+
+       if (get_irte(irq, &irte))
+               return;
+
+       if (assign_irq_vector(irq, mask))
+               return;
+
+       cpus_and(tmp, cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       if (modify_ioapic_rte) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               __target_IO_APIC_irq(irq, dest, cfg->vector);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+
+       /*
+        * Modified the IRTE and flushes the Interrupt entry cache.
+        */
+       modify_irte(irq, &irte);
+
+       if (cfg->move_in_progress) {
+               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
+
+       irq_desc[irq].affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+       int ret = -1;
+
+       mask_IO_APIC_irq(irq);
+
+       if (io_apic_level_ack_pending(irq)) {
+               /*
+                * Interrupt in progress. Migrating irq now will change the
+                * vector information in the IO-APIC RTE and that will confuse
+                * the EOI broadcast performed by cpu.
+                * So, delay the irq migration to the next instance.
+                */
+               schedule_delayed_work(&ir_migration_work, 1);
+               goto unmask;
+       }
+
+       /* everthing is clear. we have right of way */
+       migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+
+       ret = 0;
+       irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
+       cpus_clear(irq_desc[irq].pending_mask);
+
+unmask:
+       unmask_IO_APIC_irq(irq);
+       return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+       int irq;
+
+       for (irq = 0; irq < NR_IRQS; irq++) {
+               struct irq_desc *desc = irq_desc + irq;
+               if (desc->status & IRQ_MOVE_PENDING) {
+                       unsigned long flags;
+
+                       spin_lock_irqsave(&desc->lock, flags);
+                       if (!desc->chip->set_affinity ||
+                           !(desc->status & IRQ_MOVE_PENDING)) {
+                               desc->status &= ~IRQ_MOVE_PENDING;
+                               spin_unlock_irqrestore(&desc->lock, flags);
+                               continue;
+                       }
+
+                       desc->chip->set_affinity(irq,
+                                                irq_desc[irq].pending_mask);
+                       spin_unlock_irqrestore(&desc->lock, flags);
+               }
+       }
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+       if (irq_desc[irq].status & IRQ_LEVEL) {
+               irq_desc[irq].status |= IRQ_MOVE_PENDING;
+               irq_desc[irq].pending_mask = mask;
+               migrate_irq_remapped_level(irq);
+               return;
+       }
+
+       migrate_ioapic_irq(irq, mask);
+}
+#endif
+
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
        unsigned vector, me;
@@ -1449,6 +1737,17 @@ static void irq_complete_move(unsigned int irq)
 #else
 static inline void irq_complete_move(unsigned int irq) {}
 #endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+       ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+       ack_x2APIC_irq();
+}
+#endif
 
 static void ack_apic_edge(unsigned int irq)
 {
@@ -1523,6 +1822,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
        .retrigger      = ioapic_retrigger_irq,
 };
 
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+       .name           = "IR-IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_x2apic_edge,
+       .eoi            = ack_x2apic_level,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ir_ioapic_affinity_irq,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+#endif
+
 static inline void init_IO_APIC_traps(void)
 {
        int irq;
@@ -1707,6 +2021,8 @@ static inline void __init check_timer(void)
         * 8259A.
         */
        if (pin1 == -1) {
+               if (intr_remapping_enabled)
+                       panic("BIOS bug: timer not connected to IO-APIC");
                pin1 = pin2;
                apic1 = apic2;
                no_pin1 = 1;
@@ -1733,6 +2049,8 @@ static inline void __init check_timer(void)
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
                }
+               if (intr_remapping_enabled)
+                       panic("timer doesn't work through Interrupt-remapped IO-APIC");
                clear_IO_APIC_pin(apic1, pin1);
                if (!no_pin1)
                        apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: "
@@ -1969,6 +2287,9 @@ void destroy_irq(unsigned int irq)
 
        dynamic_irq_cleanup(irq);
 
+#ifdef CONFIG_INTR_REMAP
+       free_irte(irq);
+#endif
        spin_lock_irqsave(&vector_lock, flags);
        __clear_irq_vector(irq);
        spin_unlock_irqrestore(&vector_lock, flags);
@@ -1987,10 +2308,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 
        tmp = TARGET_CPUS;
        err = assign_irq_vector(irq, tmp);
-       if (!err) {
-               cpus_and(tmp, cfg->domain, tmp);
-               dest = cpu_mask_to_apicid(tmp);
+       if (err)
+               return err;
+
+       cpus_and(tmp, cfg->domain, tmp);
+       dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irte irte;
+               int ir_index;
+               u16 sub_handle;
+
+               ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+               BUG_ON(ir_index == -1);
+
+               memset (&irte, 0, sizeof(irte));
+
+               irte.present = 1;
+               irte.dst_mode = INT_DEST_MODE;
+               irte.trigger_mode = 0; /* edge */
+               irte.dlvry_mode = INT_DELIVERY_MODE;
+               irte.vector = cfg->vector;
+               irte.dest_id = IRTE_DEST(dest);
+
+               modify_irte(irq, &irte);
 
+               msg->address_hi = MSI_ADDR_BASE_HI;
+               msg->data = sub_handle;
+               msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+                                 MSI_ADDR_IR_SHV |
+                                 MSI_ADDR_IR_INDEX1(ir_index) |
+                                 MSI_ADDR_IR_INDEX2(ir_index);
+       } else
+#endif
+       {
                msg->address_hi = MSI_ADDR_BASE_HI;
                msg->address_lo =
                        MSI_ADDR_BASE_LO |
@@ -2041,6 +2393,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        write_msi_msg(irq, &msg);
        irq_desc[irq].affinity = mask;
 }
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       struct irq_cfg *cfg = irq_cfg + irq;
+       unsigned int dest;
+       cpumask_t tmp, cleanup_mask;
+       struct irte irte;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               return;
+
+       if (get_irte(irq, &irte))
+               return;
+
+       if (assign_irq_vector(irq, mask))
+               return;
+
+       cpus_and(tmp, cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+
+       /*
+        * atomically update the IRTE with the new destination and vector.
+        */
+       modify_irte(irq, &irte);
+
+       /*
+        * After this point, all the interrupts will start arriving
+        * at the new destination. So, time to cleanup the previous
+        * vector allocation.
+        */
+       if (cfg->move_in_progress) {
+               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
+
+       irq_desc[irq].affinity = mask;
+}
+#endif
 #endif /* CONFIG_SMP */
 
 /*
@@ -2058,26 +2459,157 @@ static struct irq_chip msi_chip = {
        .retrigger      = ioapic_retrigger_irq,
 };
 
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+       .name           = "IR-PCI-MSI",
+       .unmask         = unmask_msi_irq,
+       .mask           = mask_msi_irq,
+       .ack            = ack_x2apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity   = ir_set_msi_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+       struct intel_iommu *iommu;
+       int index;
+
+       iommu = map_dev_to_ir(dev);
+       if (!iommu) {
+               printk(KERN_ERR
+                      "Unable to map PCI %s to iommu\n", pci_name(dev));
+               return -ENOENT;
+       }
+
+       index = alloc_irte(iommu, irq, nvec);
+       if (index < 0) {
+               printk(KERN_ERR
+                      "Unable to allocate %d IRTE for PCI %s\n", nvec,
+                       pci_name(dev));
+               return -ENOSPC;
+       }
+       return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 {
+       int ret;
        struct msi_msg msg;
+
+       ret = msi_compose_msg(dev, irq, &msg);
+       if (ret < 0)
+               return ret;
+
+       set_irq_msi(irq, desc);
+       write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+               struct irq_desc *desc = irq_desc + irq;
+               /*
+                * irq migration in process context
+                */
+               desc->status |= IRQ_MOVE_PCNTXT;
+               set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+       } else
+#endif
+               set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+       return 0;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
        int irq, ret;
+
        irq = create_irq();
        if (irq < 0)
                return irq;
 
-       ret = msi_compose_msg(dev, irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+       if (!intr_remapping_enabled)
+               goto no_ir;
+
+       ret = msi_alloc_irte(dev, irq, 1);
+       if (ret < 0)
+               goto error;
+no_ir:
+#endif
+       ret = setup_msi_irq(dev, desc, irq);
        if (ret < 0) {
                destroy_irq(irq);
                return ret;
        }
+       return 0;
 
-       set_irq_msi(irq, desc);
-       write_msi_msg(irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+error:
+       destroy_irq(irq);
+       return ret;
+#endif
+}
 
-       set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+       int irq, ret, sub_handle;
+       struct msi_desc *desc;
+#ifdef CONFIG_INTR_REMAP
+       struct intel_iommu *iommu = 0;
+       int index = 0;
+#endif
 
+       sub_handle = 0;
+       list_for_each_entry(desc, &dev->msi_list, list) {
+               irq = create_irq();
+               if (irq < 0)
+                       return irq;
+#ifdef CONFIG_INTR_REMAP
+               if (!intr_remapping_enabled)
+                       goto no_ir;
+
+               if (!sub_handle) {
+                       /*
+                        * allocate the consecutive block of IRTE's
+                        * for 'nvec'
+                        */
+                       index = msi_alloc_irte(dev, irq, nvec);
+                       if (index < 0) {
+                               ret = index;
+                               goto error;
+                       }
+               } else {
+                       iommu = map_dev_to_ir(dev);
+                       if (!iommu) {
+                               ret = -ENOENT;
+                               goto error;
+                       }
+                       /*
+                        * setup the mapping between the irq and the IRTE
+                        * base index, the sub_handle pointing to the
+                        * appropriate interrupt remap table entry.
+                        */
+                       set_irte_irq(irq, iommu, index, sub_handle);
+               }
+no_ir:
+#endif
+               ret = setup_msi_irq(dev, desc, irq);
+               if (ret < 0)
+                       goto error;
+               sub_handle++;
+       }
        return 0;
+
+error:
+       destroy_irq(irq);
+       return ret;
 }
 
 void arch_teardown_msi_irq(unsigned int irq)
@@ -2325,6 +2857,10 @@ void __init setup_ioapic_dest(void)
                                setup_IO_APIC_irq(ioapic, pin, irq,
                                                  irq_trigger(irq_entry),
                                                  irq_polarity(irq_entry));
+#ifdef CONFIG_INTR_REMAP
+                       else if (intr_remapping_enabled)
+                               set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
                        else
                                set_ioapic_affinity_irq(irq, TARGET_CPUS);
                }
index 3b25e49380c6eed8ff660d409afb2deda8c428cd..70e1f3e287fbeff08ddf497565570192b9d892f5 100644 (file)
@@ -545,7 +545,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
        generic_bigsmp_probe();
 #endif
 
+#ifdef CONFIG_X86_32
        setup_apic_routing();
+#endif
        if (!num_processors)
                printk(KERN_ERR "MPTABLE: no processors registered!\n");
        return num_processors;
index e0f571d58c19c0bfa4eeee39d89972ed2239f55f..e0f139106c7e9288202c9aec1dc48344ebae07c7 100644 (file)
@@ -360,9 +360,6 @@ struct pv_cpu_ops pv_cpu_ops = {
 
 struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
-       .apic_write = native_apic_write,
-       .apic_write_atomic = native_apic_write_atomic,
-       .apic_read = native_apic_read,
        .setup_boot_clock = setup_boot_APIC_clock,
        .setup_secondary_clock = setup_secondary_APIC_clock,
        .startup_ipi_hook = paravirt_nop,
index 531b55b8e81a1de1827eac5691d5f8aef1d8d10c..6121ffd46b9eb587ebbd7e380eadb29f698ec638 100644 (file)
@@ -735,6 +735,8 @@ void __init setup_arch(char **cmdline_p)
        num_physpages = max_pfn;
 
        check_efer();
+       if (cpu_has_x2apic)
+               check_x2apic();
 
        /* How many end-of-memory variables you have, grandma! */
        /* need this before calling reserve_initrd */
index 687376ab07e82ece4ab1eeb609d738d76245d226..23c3b3d1f4ccf55fcd7c4aa6c087b88565a3331d 100644 (file)
@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 static atomic_t init_deasserted;
 
-static int boot_cpu_logical_apicid;
 
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
 #endif
 
 #ifdef CONFIG_X86_32
+static int boot_cpu_logical_apicid;
+
 u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
                                        { [0 ... NR_CPUS-1] = BAD_APICID };
 
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
        /*
         * (This works even if the APIC is not enabled.)
         */
-       phys_id = GET_APIC_ID(read_apic_id());
+       phys_id = read_apic_id();
        cpuid = smp_processor_id();
        if (cpu_isset(cpuid, cpu_callin_map)) {
                panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -546,8 +547,7 @@ static inline void __inquire_remote_apic(int apicid)
                        printk(KERN_CONT
                               "a previous APIC delivery may have failed\n");
 
-               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
-               apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+               apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
 
                timeout = 0;
                do {
@@ -579,11 +579,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
        int maxlvt;
 
        /* Target chip */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
        /* Boot on the stack */
        /* Kick the second */
-       apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+       apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
 
        Dprintk("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
@@ -639,13 +637,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
        /*
         * Turn INIT on target chip
         */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
        /*
         * Send IPI
         */
-       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
-                               | APIC_DM_INIT);
+       apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
+                      phys_apicid);
 
        Dprintk("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
@@ -655,10 +651,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
        Dprintk("Deasserting INIT.\n");
 
        /* Target chip */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
        /* Send IPI */
-       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+       apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
 
        Dprintk("Waiting for send to finish...\n");
        send_status = safe_apic_wait_icr_idle();
@@ -703,12 +697,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
                 */
 
                /* Target chip */
-               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
                /* Boot on the stack */
                /* Kick the second */
-               apic_write_around(APIC_ICR, APIC_DM_STARTUP
-                                       | (start_eip >> 12));
+               apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
+                              phys_apicid);
 
                /*
                 * Give the other CPU some time to accept the IPI.
@@ -1147,10 +1139,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
         * Setup boot CPU information
         */
        smp_store_cpu_info(0); /* Final full version of the data */
+#ifdef CONFIG_X86_32
        boot_cpu_logical_apicid = logical_smp_processor_id();
+#endif
        current_thread_info()->cpu = 0;  /* needed? */
        set_cpu_sibling_map(0);
 
+#ifdef CONFIG_X86_64
+       enable_IR_x2apic();
+       setup_apic_routing();
+#endif
+
        if (smp_sanity_check(max_cpus) < 0) {
                printk(KERN_INFO "SMP disabled\n");
                disable_smp();
@@ -1158,9 +1157,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
        }
 
        preempt_disable();
-       if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
+       if (read_apic_id() != boot_cpu_physical_apicid) {
                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
-                    GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
+                    read_apic_id(), boot_cpu_physical_apicid);
                /* Or can we switch back to PIC here? */
        }
        preempt_enable();
index b15346092b7b72aa66e75621f4e31f72698ccbce..237082833c14afc142aa3ce1786f5f5ec1c47163 100644 (file)
@@ -904,9 +904,9 @@ static inline int __init activate_vmi(void)
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-       para_fill(pv_apic_ops.apic_read, APICRead);
-       para_fill(pv_apic_ops.apic_write, APICWrite);
-       para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
+       para_fill(apic_ops->read, APICRead);
+       para_fill(apic_ops->write, APICWrite);
+       para_fill(apic_ops->write_atomic, APICWrite);
 #endif
 
        /*
index 50dad44fb54234e2725c833fafd15b59b3db0283..675ee7a6475e72589682233c89d1492d18416991 100644 (file)
@@ -783,14 +783,45 @@ static void lguest_wbinvd(void)
  * code qualifies for Advanced.  It will also never interrupt anything.  It
  * does, however, allow us to get through the Linux boot code. */
 #ifdef CONFIG_X86_LOCAL_APIC
-static void lguest_apic_write(unsigned long reg, u32 v)
+static void lguest_apic_write(u32 reg, u32 v)
 {
 }
 
-static u32 lguest_apic_read(unsigned long reg)
+static u32 lguest_apic_read(u32 reg)
 {
        return 0;
 }
+
+static u64 lguest_apic_icr_read(void)
+{
+       return 0;
+}
+
+static void lguest_apic_icr_write(u32 low, u32 id)
+{
+       /* Warn to see if there's any stray references */
+       WARN_ON(1);
+}
+
+static void lguest_apic_wait_icr_idle(void)
+{
+       return;
+}
+
+static u32 lguest_apic_safe_wait_icr_idle(void)
+{
+       return 0;
+}
+
+static struct apic_ops lguest_basic_apic_ops = {
+       .read = lguest_apic_read,
+       .write = lguest_apic_write,
+       .write_atomic = lguest_apic_write,
+       .icr_read = lguest_apic_icr_read,
+       .icr_write = lguest_apic_icr_write,
+       .wait_icr_idle = lguest_apic_wait_icr_idle,
+       .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
+};
 #endif
 
 /* STOP!  Until an interrupt comes in. */
@@ -990,9 +1021,7 @@ __init void lguest_init(void)
 
 #ifdef CONFIG_X86_LOCAL_APIC
        /* apic read/write intercepts */
-       pv_apic_ops.apic_write = lguest_apic_write;
-       pv_apic_ops.apic_write_atomic = lguest_apic_write;
-       pv_apic_ops.apic_read = lguest_apic_read;
+       apic_ops = &lguest_basic_apic_ops;
 #endif
 
        /* time operations */
index 59d7717145590b4f70f7e75daad74bff953d8b3a..b31f2800638e34efc29d78712a7221967eafc77c 100644 (file)
@@ -5,17 +5,16 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
-#include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
-#include <asm/mach-bigsmp/mach_apic.h>
 #include <asm/mach-bigsmp/mach_apicdef.h>
+#include <linux/smp.h>
+#include <asm/mach-bigsmp/mach_apic.h>
 #include <asm/mach-bigsmp/mach_ipi.h>
 #include <asm/mach-default/mach_mpparse.h>
 
index 4742626f08c4c7abb5f815248c541d03362b602f..9b30547d746ef41be04d461a532683b375a51633 100644 (file)
@@ -4,16 +4,15 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/smp.h>
 #include <linux/init.h>
 #include <asm/mach-es7000/mach_apicdef.h>
+#include <linux/smp.h>
 #include <asm/mach-es7000/mach_apic.h>
 #include <asm/mach-es7000/mach_ipi.h>
 #include <asm/mach-es7000/mach_mpparse.h>
index 8091e68764c4b623b4f4523cc4bb9324630f9f20..95c07efff6b7f4c8829b68636c32c1ce7fb12153 100644 (file)
@@ -4,7 +4,6 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <linux/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
@@ -12,8 +11,9 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <asm/mach-numaq/mach_apic.h>
 #include <asm/mach-numaq/mach_apicdef.h>
+#include <linux/smp.h>
+#include <asm/mach-numaq/mach_apic.h>
 #include <asm/mach-numaq/mach_ipi.h>
 #include <asm/mach-numaq/mach_mpparse.h>
 #include <asm/mach-numaq/mach_wakecpu.h>
index a97ea0f35b1ee821a96d101d27eb50f6f6e7faef..752edd96b1bf0abc9549b73ba5897de7eae94146 100644 (file)
@@ -4,17 +4,16 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/smp.h>
 #include <linux/init.h>
-#include <asm/mach-summit/mach_apic.h>
 #include <asm/mach-summit/mach_apicdef.h>
+#include <linux/smp.h>
+#include <asm/mach-summit/mach_apic.h>
 #include <asm/mach-summit/mach_ipi.h>
 #include <asm/mach-summit/mach_mpparse.h>
 
index bb508456ef523e1fa50f77a2993bf481b06f03f0..402f3e2c7bee727ebdea7ce8d03309b708cdda43 100644 (file)
@@ -548,16 +548,48 @@ static void xen_io_delay(void)
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-static u32 xen_apic_read(unsigned long reg)
+static u32 xen_apic_read(u32 reg)
 {
        return 0;
 }
 
-static void xen_apic_write(unsigned long reg, u32 val)
+static void xen_apic_write(u32 reg, u32 val)
 {
        /* Warn to see if there's any stray references */
        WARN_ON(1);
 }
+
+static u64 xen_apic_icr_read(void)
+{
+       return 0;
+}
+
+static void xen_apic_icr_write(u32 low, u32 id)
+{
+       /* Warn to see if there's any stray references */
+       WARN_ON(1);
+}
+
+static void xen_apic_wait_icr_idle(void)
+{
+        return;
+}
+
+static u32 xen_safe_apic_wait_icr_idle(void)
+{
+        return 0;
+}
+
+static struct apic_ops xen_basic_apic_ops = {
+       .read = xen_apic_read,
+       .write = xen_apic_write,
+       .write_atomic = xen_apic_write,
+       .icr_read = xen_apic_icr_read,
+       .icr_write = xen_apic_icr_write,
+       .wait_icr_idle = xen_apic_wait_icr_idle,
+       .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
+};
+
 #endif
 
 static void xen_flush_tlb(void)
@@ -1130,9 +1162,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
 #ifdef CONFIG_X86_LOCAL_APIC
-       .apic_write = xen_apic_write,
-       .apic_write_atomic = xen_apic_write,
-       .apic_read = xen_apic_read,
        .setup_boot_clock = paravirt_nop,
        .setup_secondary_clock = paravirt_nop,
        .startup_ipi_hook = paravirt_nop,
@@ -1294,6 +1323,13 @@ asmlinkage void __init xen_start_kernel(void)
        pv_apic_ops = xen_apic_ops;
        pv_mmu_ops = xen_mmu_ops;
 
+#ifdef CONFIG_X86_LOCAL_APIC
+       /*
+        * set up the basic apic ops.
+        */
+       apic_ops = &xen_basic_apic_ops;
+#endif
+
        if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
                pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
                pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
index 7d63f8ced24b12568fc49ebfa06fd588e49b6fc9..4b47f4ece5b70904c9ef68c06f468489ec70939a 100644 (file)
@@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
 # Build Intel IOMMU support
 obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
 
+obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
+
 #
 # Some architectures use the generic PCI setup functions
 #
diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h
new file mode 100644 (file)
index 0000000..bff5c65
--- /dev/null
@@ -0,0 +1,157 @@
+#ifndef _DMA_REMAPPING_H
+#define _DMA_REMAPPING_H
+
+/*
+ * We need a fixed PAGE_SIZE of 4K irrespective of
+ * arch PAGE_SIZE for IOMMU page tables.
+ */
+#define PAGE_SHIFT_4K          (12)
+#define PAGE_SIZE_4K           (1UL << PAGE_SHIFT_4K)
+#define PAGE_MASK_4K           (((u64)-1) << PAGE_SHIFT_4K)
+#define PAGE_ALIGN_4K(addr)    (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+
+#define IOVA_PFN(addr)         ((addr) >> PAGE_SHIFT_4K)
+#define DMA_32BIT_PFN          IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN          IOVA_PFN(DMA_64BIT_MASK)
+
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+       u64     val;
+       u64     rsvd1;
+};
+#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+       return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+       root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+       root->val |= value & PAGE_MASK_4K;
+}
+
+struct context_entry;
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+       return (struct context_entry *)
+               (root_present(root)?phys_to_virt(
+               root->val & PAGE_MASK_4K):
+               NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+       u64 lo;
+       u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_width(c) ((c).hi &  7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+       do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+       do { \
+               (c).lo &= (((u64)-1) << 4) | 3; \
+               (c).lo |= ((val) & 3) << 2; \
+       } while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+       do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+       do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+       u64 val;
+};
+#define dma_clear_pte(p)       do {(p).val = 0;} while (0)
+
+#define DMA_PTE_READ (1)
+#define DMA_PTE_WRITE (2)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+               do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_set_pte_addr(p, addr) do {\
+               (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
+struct intel_iommu;
+
+struct dmar_domain {
+       int     id;                     /* domain id */
+       struct intel_iommu *iommu;      /* back pointer to owning iommu */
+
+       struct list_head devices;       /* all devices' list */
+       struct iova_domain iovad;       /* iova's that belong to this domain */
+
+       struct dma_pte  *pgd;           /* virtual address */
+       spinlock_t      mapping_lock;   /* page table lock */
+       int             gaw;            /* max guest address width */
+
+       /* adjusted guest address width, 0 is level 2 30-bit */
+       int             agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+       int             flags;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+       struct list_head link;  /* link to domain siblings */
+       struct list_head global; /* link to global list */
+       u8 bus;                 /* PCI bus numer */
+       u8 devfn;               /* PCI devfn number */
+       struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+       struct dmar_domain *domain; /* pointer to domain */
+};
+
+extern int init_dmars(void);
+extern void free_dmar_iommu(struct intel_iommu *iommu);
+
+extern int dmar_disabled;
+
+#ifndef CONFIG_DMAR_GFX_WA
+static inline void iommu_prepare_gfx_mapping(void)
+{
+       return;
+}
+#endif /* !CONFIG_DMAR_GFX_WA */
+
+#endif
index f941f609dbf399edfd389b4ace85a07816a5e633..bd2c01674f5ec1cd07063ce499e38e5faf805995 100644 (file)
  * Author: Shaohua Li <shaohua.li@intel.com>
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  *
- * This file implements early detection/parsing of DMA Remapping Devices
+ * This file implements early detection/parsing of Remapping Devices
  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
  * tables.
+ *
+ * These routines are used by both DMA-remapping and Interrupt-remapping
  */
 
 #include <linux/pci.h>
 #include <linux/dmar.h>
+#include <linux/timer.h>
 #include "iova.h"
 #include "intel-iommu.h"
 
@@ -37,7 +40,6 @@
  * these units are not supported by the architecture.
  */
 LIST_HEAD(dmar_drhd_units);
-LIST_HEAD(dmar_rmrr_units);
 
 static struct acpi_table_header * __initdata dmar_tbl;
 
@@ -53,11 +55,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
                list_add(&drhd->list, &dmar_drhd_units);
 }
 
-static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
-{
-       list_add(&rmrr->list, &dmar_rmrr_units);
-}
-
 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
                                           struct pci_dev **dev, u16 segment)
 {
@@ -172,19 +169,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
        struct acpi_dmar_hardware_unit *drhd;
        struct dmar_drhd_unit *dmaru;
        int ret = 0;
-       static int include_all;
 
        dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
        if (!dmaru)
                return -ENOMEM;
 
+       dmaru->hdr = header;
        drhd = (struct acpi_dmar_hardware_unit *)header;
        dmaru->reg_base_addr = drhd->address;
        dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
 
+       ret = alloc_iommu(dmaru);
+       if (ret) {
+               kfree(dmaru);
+               return ret;
+       }
+       dmar_register_drhd_unit(dmaru);
+       return 0;
+}
+
+static int __init
+dmar_parse_dev(struct dmar_drhd_unit *dmaru)
+{
+       struct acpi_dmar_hardware_unit *drhd;
+       static int include_all;
+       int ret;
+
+       drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
+
        if (!dmaru->include_all)
                ret = dmar_parse_dev_scope((void *)(drhd + 1),
-                               ((void *)drhd) + header->length,
+                               ((void *)drhd) + drhd->header.length,
                                &dmaru->devices_cnt, &dmaru->devices,
                                drhd->segment);
        else {
@@ -197,37 +212,59 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
                include_all = 1;
        }
 
-       if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all))
+       if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
+               list_del(&dmaru->list);
                kfree(dmaru);
-       else
-               dmar_register_drhd_unit(dmaru);
+       }
        return ret;
 }
 
+#ifdef CONFIG_DMAR
+LIST_HEAD(dmar_rmrr_units);
+
+static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
+{
+       list_add(&rmrr->list, &dmar_rmrr_units);
+}
+
+
 static int __init
 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 {
        struct acpi_dmar_reserved_memory *rmrr;
        struct dmar_rmrr_unit *rmrru;
-       int ret = 0;
 
        rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
        if (!rmrru)
                return -ENOMEM;
 
+       rmrru->hdr = header;
        rmrr = (struct acpi_dmar_reserved_memory *)header;
        rmrru->base_address = rmrr->base_address;
        rmrru->end_address = rmrr->end_address;
+
+       dmar_register_rmrr_unit(rmrru);
+       return 0;
+}
+
+static int __init
+rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
+{
+       struct acpi_dmar_reserved_memory *rmrr;
+       int ret;
+
+       rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
        ret = dmar_parse_dev_scope((void *)(rmrr + 1),
-               ((void *)rmrr) + header->length,
+               ((void *)rmrr) + rmrr->header.length,
                &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
 
-       if (ret || (rmrru->devices_cnt == 0))
+       if (ret || (rmrru->devices_cnt == 0)) {
+               list_del(&rmrru->list);
                kfree(rmrru);
-       else
-               dmar_register_rmrr_unit(rmrru);
+       }
        return ret;
 }
+#endif
 
 static void __init
 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
@@ -252,6 +289,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
        }
 }
 
+
 /**
  * parse_dmar_table - parses the DMA reporting table
  */
@@ -284,7 +322,9 @@ parse_dmar_table(void)
                        ret = dmar_parse_one_drhd(entry_header);
                        break;
                case ACPI_DMAR_TYPE_RESERVED_MEMORY:
+#ifdef CONFIG_DMAR
                        ret = dmar_parse_one_rmrr(entry_header);
+#endif
                        break;
                default:
                        printk(KERN_WARNING PREFIX
@@ -300,15 +340,77 @@ parse_dmar_table(void)
        return ret;
 }
 
+int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
+                         struct pci_dev *dev)
+{
+       int index;
+
+       while (dev) {
+               for (index = 0; index < cnt; index++)
+                       if (dev == devices[index])
+                               return 1;
 
-int __init dmar_table_init(void)
+               /* Check our parent */
+               dev = dev->bus->self;
+       }
+
+       return 0;
+}
+
+struct dmar_drhd_unit *
+dmar_find_matched_drhd_unit(struct pci_dev *dev)
 {
+       struct dmar_drhd_unit *drhd = NULL;
 
+       list_for_each_entry(drhd, &dmar_drhd_units, list) {
+               if (drhd->include_all || dmar_pci_device_match(drhd->devices,
+                                               drhd->devices_cnt, dev))
+                       return drhd;
+       }
+
+       return NULL;
+}
+
+int __init dmar_dev_scope_init(void)
+{
+       struct dmar_drhd_unit *drhd;
+       int ret = -ENODEV;
+
+       for_each_drhd_unit(drhd) {
+               ret = dmar_parse_dev(drhd);
+               if (ret)
+                       return ret;
+       }
+
+#ifdef CONFIG_DMAR
+       {
+               struct dmar_rmrr_unit *rmrr;
+               for_each_rmrr_units(rmrr) {
+                       ret = rmrr_parse_dev(rmrr);
+                       if (ret)
+                               return ret;
+               }
+       }
+#endif
+
+       return ret;
+}
+
+
+int __init dmar_table_init(void)
+{
+       static int dmar_table_initialized;
        int ret;
 
+       if (dmar_table_initialized)
+               return 0;
+
+       dmar_table_initialized = 1;
+
        ret = parse_dmar_table();
        if (ret) {
-               printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
+               if (ret != -ENODEV)
+                       printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
                return ret;
        }
 
@@ -317,11 +419,14 @@ int __init dmar_table_init(void)
                return -ENODEV;
        }
 
-       if (list_empty(&dmar_rmrr_units)) {
+#ifdef CONFIG_DMAR
+       if (list_empty(&dmar_rmrr_units))
                printk(KERN_INFO PREFIX "No RMRR found\n");
-               return -ENODEV;
-       }
+#endif
 
+#ifdef CONFIG_INTR_REMAP
+       parse_ioapics_under_ir();
+#endif
        return 0;
 }
 
@@ -343,3 +448,255 @@ int __init early_dmar_detect(void)
 
        return (ACPI_SUCCESS(status) ? 1 : 0);
 }
+
+void __init detect_intel_iommu(void)
+{
+       int ret;
+
+       ret = early_dmar_detect();
+
+#ifdef CONFIG_DMAR
+       {
+               struct acpi_table_dmar *dmar;
+               /*
+                * for now we will disable dma-remapping when interrupt
+                * remapping is enabled.
+                * When support for queued invalidation for IOTLB invalidation
+                * is added, we will not need this any more.
+                */
+               dmar = (struct acpi_table_dmar *) dmar_tbl;
+               if (ret && cpu_has_x2apic && dmar->flags & 0x1) {
+                       printk(KERN_INFO
+                              "Queued invalidation will be enabled to support "
+                              "x2apic and Intr-remapping.\n");
+                       printk(KERN_INFO
+                              "Disabling IOMMU detection, because of missing "
+                              "queued invalidation support for IOTLB "
+                              "invalidation\n");
+                       printk(KERN_INFO
+                              "Use \"nox2apic\", if you want to use Intel "
+                              " IOMMU for DMA-remapping and don't care about "
+                              " x2apic support\n");
+
+                       dmar_disabled = 1;
+                       return;
+               }
+
+               if (ret && !no_iommu && !iommu_detected && !swiotlb &&
+                   !dmar_disabled)
+                       iommu_detected = 1;
+       }
+#endif
+}
+
+
+int alloc_iommu(struct dmar_drhd_unit *drhd)
+{
+       struct intel_iommu *iommu;
+       int map_size;
+       u32 ver;
+       static int iommu_allocated = 0;
+
+       iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+       if (!iommu)
+               return -ENOMEM;
+
+       iommu->seq_id = iommu_allocated++;
+
+       iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
+       if (!iommu->reg) {
+               printk(KERN_ERR "IOMMU: can't map the region\n");
+               goto error;
+       }
+       iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
+       iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+
+       /* the registers might be more than one page */
+       map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+               cap_max_fault_reg_offset(iommu->cap));
+       map_size = PAGE_ALIGN_4K(map_size);
+       if (map_size > PAGE_SIZE_4K) {
+               iounmap(iommu->reg);
+               iommu->reg = ioremap(drhd->reg_base_addr, map_size);
+               if (!iommu->reg) {
+                       printk(KERN_ERR "IOMMU: can't map the region\n");
+                       goto error;
+               }
+       }
+
+       ver = readl(iommu->reg + DMAR_VER_REG);
+       pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
+               drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
+               iommu->cap, iommu->ecap);
+
+       spin_lock_init(&iommu->register_lock);
+
+       drhd->iommu = iommu;
+       return 0;
+error:
+       kfree(iommu);
+       return -1;
+}
+
+void free_iommu(struct intel_iommu *iommu)
+{
+       if (!iommu)
+               return;
+
+#ifdef CONFIG_DMAR
+       free_dmar_iommu(iommu);
+#endif
+
+       if (iommu->reg)
+               iounmap(iommu->reg);
+       kfree(iommu);
+}
+
+/*
+ * Reclaim all the submitted descriptors which have completed its work.
+ */
+static inline void reclaim_free_desc(struct q_inval *qi)
+{
+       while (qi->desc_status[qi->free_tail] == QI_DONE) {
+               qi->desc_status[qi->free_tail] = QI_FREE;
+               qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
+               qi->free_cnt++;
+       }
+}
+
+/*
+ * Submit the queued invalidation descriptor to the remapping
+ * hardware unit and wait for its completion.
+ */
+void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
+{
+       struct q_inval *qi = iommu->qi;
+       struct qi_desc *hw, wait_desc;
+       int wait_index, index;
+       unsigned long flags;
+
+       if (!qi)
+               return;
+
+       hw = qi->desc;
+
+       spin_lock(&qi->q_lock);
+       while (qi->free_cnt < 3) {
+               spin_unlock(&qi->q_lock);
+               cpu_relax();
+               spin_lock(&qi->q_lock);
+       }
+
+       index = qi->free_head;
+       wait_index = (index + 1) % QI_LENGTH;
+
+       qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
+
+       hw[index] = *desc;
+
+       wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
+       wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
+
+       hw[wait_index] = wait_desc;
+
+       __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
+       __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
+
+       qi->free_head = (qi->free_head + 2) % QI_LENGTH;
+       qi->free_cnt -= 2;
+
+       spin_lock_irqsave(&iommu->register_lock, flags);
+       /*
+        * update the HW tail register indicating the presence of
+        * new descriptors.
+        */
+       writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
+       spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+       while (qi->desc_status[wait_index] != QI_DONE) {
+               spin_unlock(&qi->q_lock);
+               cpu_relax();
+               spin_lock(&qi->q_lock);
+       }
+
+       qi->desc_status[index] = QI_DONE;
+
+       reclaim_free_desc(qi);
+       spin_unlock(&qi->q_lock);
+}
+
+/*
+ * Flush the global interrupt entry cache.
+ */
+void qi_global_iec(struct intel_iommu *iommu)
+{
+       struct qi_desc desc;
+
+       desc.low = QI_IEC_TYPE;
+       desc.high = 0;
+
+       qi_submit_sync(&desc, iommu);
+}
+
+/*
+ * Enable Queued Invalidation interface. This is a must to support
+ * interrupt-remapping. Also used by DMA-remapping, which replaces
+ * register based IOTLB invalidation.
+ */
+int dmar_enable_qi(struct intel_iommu *iommu)
+{
+       u32 cmd, sts;
+       unsigned long flags;
+       struct q_inval *qi;
+
+       if (!ecap_qis(iommu->ecap))
+               return -ENOENT;
+
+       /*
+        * queued invalidation is already setup and enabled.
+        */
+       if (iommu->qi)
+               return 0;
+
+       iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
+       if (!iommu->qi)
+               return -ENOMEM;
+
+       qi = iommu->qi;
+
+       qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
+       if (!qi->desc) {
+               kfree(qi);
+               iommu->qi = 0;
+               return -ENOMEM;
+       }
+
+       qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
+       if (!qi->desc_status) {
+               free_page((unsigned long) qi->desc);
+               kfree(qi);
+               iommu->qi = 0;
+               return -ENOMEM;
+       }
+
+       qi->free_head = qi->free_tail = 0;
+       qi->free_cnt = QI_LENGTH;
+
+       spin_lock_init(&qi->q_lock);
+
+       spin_lock_irqsave(&iommu->register_lock, flags);
+       /* write zero to the tail reg */
+       writel(0, iommu->reg + DMAR_IQT_REG);
+
+       dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+
+       cmd = iommu->gcmd | DMA_GCMD_QIE;
+       iommu->gcmd |= DMA_GCMD_QIE;
+       writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+       /* Make sure hardware complete it */
+       IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
+       spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+       return 0;
+}
index 3f7b81c065d25188e17d82665b63575a91c92e19..ffccf2341b98e6bff0f4b6750de185a1b0287c99 100644 (file)
@@ -49,8 +49,6 @@
 
 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
 
-#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
-
 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
 
 
@@ -58,8 +56,6 @@ static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
 
-static struct intel_iommu *g_iommus;
-
 #define HIGH_WATER_MARK 250
 struct deferred_flush_tables {
        int next;
@@ -80,7 +76,7 @@ static long list_size;
 
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
-static int dmar_disabled;
+int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
 static int intel_iommu_strict;
@@ -185,13 +181,6 @@ void free_iova_mem(struct iova *iova)
        kmem_cache_free(iommu_iova_cache, iova);
 }
 
-static inline void __iommu_flush_cache(
-       struct intel_iommu *iommu, void *addr, int size)
-{
-       if (!ecap_coherent(iommu->ecap))
-               clflush_cache_range(addr, size);
-}
-
 /* Gets context entry for a given bus and devfn */
 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
                u8 bus, u8 devfn)
@@ -488,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
        return 0;
 }
 
-#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
-{\
-       cycles_t start_time = get_cycles();\
-       while (1) {\
-               sts = op (iommu->reg + offset);\
-               if (cond)\
-                       break;\
-               if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
-                       panic("DMAR hardware is malfunctioning\n");\
-               cpu_relax();\
-       }\
-}
-
 static void iommu_set_root_entry(struct intel_iommu *iommu)
 {
        void *addr;
@@ -990,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
                return -ENOMEM;
        }
 
+       spin_lock_init(&iommu->lock);
+
        /*
         * if Caching mode is set, then invalid translations are tagged
         * with domainid 0. Hence we need to pre-allocate it.
@@ -998,62 +976,15 @@ static int iommu_init_domains(struct intel_iommu *iommu)
                set_bit(0, iommu->domain_ids);
        return 0;
 }
-static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
-                                       struct dmar_drhd_unit *drhd)
-{
-       int ret;
-       int map_size;
-       u32 ver;
-
-       iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
-       if (!iommu->reg) {
-               printk(KERN_ERR "IOMMU: can't map the region\n");
-               goto error;
-       }
-       iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
-       iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
-
-       /* the registers might be more than one page */
-       map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
-               cap_max_fault_reg_offset(iommu->cap));
-       map_size = PAGE_ALIGN_4K(map_size);
-       if (map_size > PAGE_SIZE_4K) {
-               iounmap(iommu->reg);
-               iommu->reg = ioremap(drhd->reg_base_addr, map_size);
-               if (!iommu->reg) {
-                       printk(KERN_ERR "IOMMU: can't map the region\n");
-                       goto error;
-               }
-       }
-
-       ver = readl(iommu->reg + DMAR_VER_REG);
-       pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
-               drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
-               iommu->cap, iommu->ecap);
-       ret = iommu_init_domains(iommu);
-       if (ret)
-               goto error_unmap;
-       spin_lock_init(&iommu->lock);
-       spin_lock_init(&iommu->register_lock);
 
-       drhd->iommu = iommu;
-       return iommu;
-error_unmap:
-       iounmap(iommu->reg);
-error:
-       kfree(iommu);
-       return NULL;
-}
 
 static void domain_exit(struct dmar_domain *domain);
-static void free_iommu(struct intel_iommu *iommu)
+
+void free_dmar_iommu(struct intel_iommu *iommu)
 {
        struct dmar_domain *domain;
        int i;
 
-       if (!iommu)
-               return;
-
        i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
        for (; i < cap_ndoms(iommu->cap); ) {
                domain = iommu->domains[i];
@@ -1078,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu)
 
        /* free context mapping */
        free_context_table(iommu);
-
-       if (iommu->reg)
-               iounmap(iommu->reg);
-       kfree(iommu);
 }
 
 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@ -1426,37 +1353,6 @@ find_domain(struct pci_dev *pdev)
        return NULL;
 }
 
-static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
-     struct pci_dev *dev)
-{
-       int index;
-
-       while (dev) {
-               for (index = 0; index < cnt; index++)
-                       if (dev == devices[index])
-                               return 1;
-
-               /* Check our parent */
-               dev = dev->bus->self;
-       }
-
-       return 0;
-}
-
-static struct dmar_drhd_unit *
-dmar_find_matched_drhd_unit(struct pci_dev *dev)
-{
-       struct dmar_drhd_unit *drhd = NULL;
-
-       list_for_each_entry(drhd, &dmar_drhd_units, list) {
-               if (drhd->include_all || dmar_pci_device_match(drhd->devices,
-                                               drhd->devices_cnt, dev))
-                       return drhd;
-       }
-
-       return NULL;
-}
-
 /* domain is initialized */
 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 {
@@ -1729,8 +1625,6 @@ int __init init_dmars(void)
         * endfor
         */
        for_each_drhd_unit(drhd) {
-               if (drhd->ignored)
-                       continue;
                g_num_of_iommus++;
                /*
                 * lock not needed as this is only incremented in the single
@@ -1739,12 +1633,6 @@ int __init init_dmars(void)
                 */
        }
 
-       g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
-       if (!g_iommus) {
-               ret = -ENOMEM;
-               goto error;
-       }
-
        deferred_flush = kzalloc(g_num_of_iommus *
                sizeof(struct deferred_flush_tables), GFP_KERNEL);
        if (!deferred_flush) {
@@ -1752,16 +1640,15 @@ int __init init_dmars(void)
                goto error;
        }
 
-       i = 0;
        for_each_drhd_unit(drhd) {
                if (drhd->ignored)
                        continue;
-               iommu = alloc_iommu(&g_iommus[i], drhd);
-               i++;
-               if (!iommu) {
-                       ret = -ENOMEM;
+
+               iommu = drhd->iommu;
+
+               ret = iommu_init_domains(iommu);
+               if (ret)
                        goto error;
-               }
 
                /*
                 * TBD:
@@ -1845,7 +1732,6 @@ error:
                iommu = drhd->iommu;
                free_iommu(iommu);
        }
-       kfree(g_iommus);
        return ret;
 }
 
@@ -2002,7 +1888,10 @@ static void flush_unmaps(void)
        /* just flush them all */
        for (i = 0; i < g_num_of_iommus; i++) {
                if (deferred_flush[i].next) {
-                       iommu_flush_iotlb_global(&g_iommus[i], 0);
+                       struct intel_iommu *iommu =
+                               deferred_flush[i].domain[0]->iommu;
+
+                       iommu_flush_iotlb_global(iommu, 0);
                        for (j = 0; j < deferred_flush[i].next; j++) {
                                __free_iova(&deferred_flush[i].domain[j]->iovad,
                                                deferred_flush[i].iova[j]);
@@ -2032,7 +1921,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
        if (list_size == HIGH_WATER_MARK)
                flush_unmaps();
 
-       iommu_id = dom->iommu - g_iommus;
+       iommu_id = dom->iommu->seq_id;
+
        next = deferred_flush[iommu_id].next;
        deferred_flush[iommu_id].domain[next] = dom;
        deferred_flush[iommu_id].iova[next] = iova;
@@ -2348,15 +2238,6 @@ static void __init iommu_exit_mempool(void)
 
 }
 
-void __init detect_intel_iommu(void)
-{
-       if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
-               return;
-       if (early_dmar_detect()) {
-               iommu_detected = 1;
-       }
-}
-
 static void __init init_no_remapping_devices(void)
 {
        struct dmar_drhd_unit *drhd;
@@ -2403,12 +2284,19 @@ int __init intel_iommu_init(void)
 {
        int ret = 0;
 
-       if (no_iommu || swiotlb || dmar_disabled)
-               return -ENODEV;
-
        if (dmar_table_init())
                return  -ENODEV;
 
+       if (dmar_dev_scope_init())
+               return  -ENODEV;
+
+       /*
+        * Check the need for DMA-remapping initialization now.
+        * Above initialization will also be used by Interrupt-remapping.
+        */
+       if (no_iommu || swiotlb || dmar_disabled)
+               return -ENODEV;
+
        iommu_init_mempool();
        dmar_init_reserved_ranges();
 
index afc0ad96122e85ffc52d9570c497de7e304b37db..2142c01e0143c947e353e8ce26b29eff090f3d7b 100644 (file)
 #include <linux/sysdev.h>
 #include "iova.h"
 #include <linux/io.h>
-
-/*
- * We need a fixed PAGE_SIZE of 4K irrespective of
- * arch PAGE_SIZE for IOMMU page tables.
- */
-#define PAGE_SHIFT_4K          (12)
-#define PAGE_SIZE_4K           (1UL << PAGE_SHIFT_4K)
-#define PAGE_MASK_4K           (((u64)-1) << PAGE_SHIFT_4K)
-#define PAGE_ALIGN_4K(addr)    (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
-
-#define IOVA_PFN(addr)         ((addr) >> PAGE_SHIFT_4K)
-#define DMA_32BIT_PFN          IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN          IOVA_PFN(DMA_64BIT_MASK)
+#include <asm/cacheflush.h>
+#include "dma_remapping.h"
 
 /*
  * Intel IOMMU register specification per version 1.0 public spec.
 #define        DMAR_PLMLIMIT_REG 0x6c  /* PMRR low limit */
 #define        DMAR_PHMBASE_REG 0x70   /* pmrr high base addr */
 #define        DMAR_PHMLIMIT_REG 0x78  /* pmrr high limit */
+#define DMAR_IQH_REG   0x80    /* Invalidation queue head register */
+#define DMAR_IQT_REG   0x88    /* Invalidation queue tail register */
+#define DMAR_IQA_REG   0x90    /* Invalidation queue addr register */
+#define DMAR_ICS_REG   0x98    /* Invalidation complete status register */
+#define DMAR_IRTA_REG  0xb8    /* Interrupt remapping table addr register */
 
 #define OFFSET_STRIDE          (9)
 /*
@@ -126,6 +120,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define ecap_max_iotlb_offset(e) \
        (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
 #define ecap_coherent(e)       ((e) & 0x1)
+#define ecap_qis(e)            ((e) & 0x2)
+#define ecap_eim_support(e)    ((e >> 4) & 0x1)
+#define ecap_ir_support(e)     ((e >> 3) & 0x1)
+#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
 
 
 /* IOTLB_REG */
@@ -141,6 +139,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
 #define DMA_TLB_MAX_SIZE (0x3f)
 
+/* INVALID_DESC */
+#define DMA_ID_TLB_GLOBAL_FLUSH        (((u64)1) << 3)
+#define DMA_ID_TLB_DSI_FLUSH   (((u64)2) << 3)
+#define DMA_ID_TLB_PSI_FLUSH   (((u64)3) << 3)
+#define DMA_ID_TLB_READ_DRAIN  (((u64)1) << 7)
+#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
+#define DMA_ID_TLB_DID(id)     (((u64)((id & 0xffff) << 16)))
+#define DMA_ID_TLB_IH_NONLEAF  (((u64)1) << 6)
+#define DMA_ID_TLB_ADDR(addr)  (addr)
+#define DMA_ID_TLB_ADDR_MASK(mask)     (mask)
+
 /* PMEN_REG */
 #define DMA_PMEN_EPM (((u32)1)<<31)
 #define DMA_PMEN_PRS (((u32)1)<<0)
@@ -151,6 +160,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_GCMD_SFL (((u32)1) << 29)
 #define DMA_GCMD_EAFL (((u32)1) << 28)
 #define DMA_GCMD_WBF (((u32)1) << 27)
+#define DMA_GCMD_QIE (((u32)1) << 26)
+#define DMA_GCMD_SIRTP (((u32)1) << 24)
+#define DMA_GCMD_IRE (((u32) 1) << 25)
 
 /* GSTS_REG */
 #define DMA_GSTS_TES (((u32)1) << 31)
@@ -158,6 +170,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_GSTS_FLS (((u32)1) << 29)
 #define DMA_GSTS_AFLS (((u32)1) << 28)
 #define DMA_GSTS_WBFS (((u32)1) << 27)
+#define DMA_GSTS_QIES (((u32)1) << 26)
+#define DMA_GSTS_IRTPS (((u32)1) << 24)
+#define DMA_GSTS_IRES (((u32)1) << 25)
 
 /* CCMD_REG */
 #define DMA_CCMD_ICC (((u64)1) << 63)
@@ -187,158 +202,106 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define dma_frcd_source_id(c) (c & 0xffff)
 #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
 
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-       u64     val;
-       u64     rsvd1;
-};
-#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
-       return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
-       root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
-       root->val |= value & PAGE_MASK_4K;
+#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
+
+#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+{\
+       cycles_t start_time = get_cycles();\
+       while (1) {\
+               sts = op (iommu->reg + offset);\
+               if (cond)\
+                       break;\
+               if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
+                       panic("DMAR hardware is malfunctioning\n");\
+               cpu_relax();\
+       }\
 }
 
-struct context_entry;
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
-       return (struct context_entry *)
-               (root_present(root)?phys_to_virt(
-               root->val & PAGE_MASK_4K):
-               NULL);
-}
-
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-       u64 lo;
-       u64 hi;
-};
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
-#define context_address_width(c) ((c).hi &  7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
-       do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
-       do { \
-               (c).lo &= (((u64)-1) << 4) | 3; \
-               (c).lo |= ((val) & 3) << 2; \
-       } while (0)
-#define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
-       do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
-       do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+#define QI_LENGTH      256     /* queue length */
 
-/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-11: available
- * 12-63: Host physcial address
- */
-struct dma_pte {
-       u64 val;
+enum {
+       QI_FREE,
+       QI_IN_USE,
+       QI_DONE
 };
-#define dma_clear_pte(p)       do {(p).val = 0;} while (0)
-
-#define DMA_PTE_READ (1)
-#define DMA_PTE_WRITE (2)
 
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
-               do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
-#define dma_set_pte_addr(p, addr) do {\
-               (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
+#define QI_CC_TYPE             0x1
+#define QI_IOTLB_TYPE          0x2
+#define QI_DIOTLB_TYPE         0x3
+#define QI_IEC_TYPE            0x4
+#define QI_IWD_TYPE            0x5
 
-struct intel_iommu;
+#define QI_IEC_SELECTIVE       (((u64)1) << 4)
+#define QI_IEC_IIDEX(idx)      (((u64)(idx & 0xffff) << 32))
+#define QI_IEC_IM(m)           (((u64)(m & 0x1f) << 27))
 
-struct dmar_domain {
-       int     id;                     /* domain id */
-       struct intel_iommu *iommu;      /* back pointer to owning iommu */
+#define QI_IWD_STATUS_DATA(d)  (((u64)d) << 32)
+#define QI_IWD_STATUS_WRITE    (((u64)1) << 5)
 
-       struct list_head devices;       /* all devices' list */
-       struct iova_domain iovad;       /* iova's that belong to this domain */
+struct qi_desc {
+       u64 low, high;
+};
 
-       struct dma_pte  *pgd;           /* virtual address */
-       spinlock_t      mapping_lock;   /* page table lock */
-       int             gaw;            /* max guest address width */
+struct q_inval {
+       spinlock_t      q_lock;
+       struct qi_desc  *desc;          /* invalidation queue */
+       int             *desc_status;   /* desc status */
+       int             free_head;      /* first free entry */
+       int             free_tail;      /* last free entry */
+       int             free_cnt;
+};
 
-       /* adjusted guest address width, 0 is level 2 30-bit */
-       int             agaw;
+#ifdef CONFIG_INTR_REMAP
+/* 1MB - maximum possible interrupt remapping table size */
+#define INTR_REMAP_PAGE_ORDER  8
+#define INTR_REMAP_TABLE_REG_SIZE      0xf
 
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
-       int             flags;
-};
+#define INTR_REMAP_TABLE_ENTRIES       65536
 
-/* PCI domain-device relationship */
-struct device_domain_info {
-       struct list_head link;  /* link to domain siblings */
-       struct list_head global; /* link to global list */
-       u8 bus;                 /* PCI bus numer */
-       u8 devfn;               /* PCI devfn number */
-       struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
-       struct dmar_domain *domain; /* pointer to domain */
+struct ir_table {
+       struct irte *base;
 };
-
-extern int init_dmars(void);
+#endif
 
 struct intel_iommu {
        void __iomem    *reg; /* Pointer to hardware regs, virtual addr */
        u64             cap;
        u64             ecap;
-       unsigned long   *domain_ids; /* bitmap of domains */
-       struct dmar_domain **domains; /* ptr to domains */
        int             seg;
        u32             gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
-       spinlock_t      lock; /* protect context, domain ids */
        spinlock_t      register_lock; /* protect register handling */
+       int             seq_id; /* sequence id of the iommu */
+
+#ifdef CONFIG_DMAR
+       unsigned long   *domain_ids; /* bitmap of domains */
+       struct dmar_domain **domains; /* ptr to domains */
+       spinlock_t      lock; /* protect context, domain ids */
        struct root_entry *root_entry; /* virtual address */
 
        unsigned int irq;
        unsigned char name[7];    /* Device Name */
        struct msi_msg saved_msg;
        struct sys_device sysdev;
+#endif
+       struct q_inval  *qi;            /* Queued invalidation info */
+#ifdef CONFIG_INTR_REMAP
+       struct ir_table *ir_table;      /* Interrupt remapping info */
+#endif
 };
 
-#ifndef CONFIG_DMAR_GFX_WA
-static inline void iommu_prepare_gfx_mapping(void)
+static inline void __iommu_flush_cache(
+       struct intel_iommu *iommu, void *addr, int size)
 {
-       return;
+       if (!ecap_coherent(iommu->ecap))
+               clflush_cache_range(addr, size);
 }
-#endif /* !CONFIG_DMAR_GFX_WA */
 
+extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+
+extern int alloc_iommu(struct dmar_drhd_unit *drhd);
+extern void free_iommu(struct intel_iommu *iommu);
+extern int dmar_enable_qi(struct intel_iommu *iommu);
+extern void qi_global_iec(struct intel_iommu *iommu);
+
+extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 #endif
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
new file mode 100644 (file)
index 0000000..bb642cc
--- /dev/null
@@ -0,0 +1,471 @@
+#include <linux/dmar.h>
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <asm/io_apic.h>
+#include "intel-iommu.h"
+#include "intr_remapping.h"
+
+static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
+static int ir_ioapic_num;
+int intr_remapping_enabled;
+
+static struct {
+       struct intel_iommu *iommu;
+       u16 irte_index;
+       u16 sub_handle;
+       u8  irte_mask;
+} irq_2_iommu[NR_IRQS];
+
+static DEFINE_SPINLOCK(irq_2_ir_lock);
+
+int irq_remapped(int irq)
+{
+       if (irq > NR_IRQS)
+               return 0;
+
+       if (!irq_2_iommu[irq].iommu)
+               return 0;
+
+       return 1;
+}
+
+int get_irte(int irq, struct irte *entry)
+{
+       int index;
+
+       if (!entry || irq > NR_IRQS)
+               return -1;
+
+       spin_lock(&irq_2_ir_lock);
+       if (!irq_2_iommu[irq].iommu) {
+               spin_unlock(&irq_2_ir_lock);
+               return -1;
+       }
+
+       index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+       *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
+
+       spin_unlock(&irq_2_ir_lock);
+       return 0;
+}
+
+int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
+{
+       struct ir_table *table = iommu->ir_table;
+       u16 index, start_index;
+       unsigned int mask = 0;
+       int i;
+
+       if (!count)
+               return -1;
+
+       /*
+        * start the IRTE search from index 0.
+        */
+       index = start_index = 0;
+
+       if (count > 1) {
+               count = __roundup_pow_of_two(count);
+               mask = ilog2(count);
+       }
+
+       if (mask > ecap_max_handle_mask(iommu->ecap)) {
+               printk(KERN_ERR
+                      "Requested mask %x exceeds the max invalidation handle"
+                      " mask value %Lx\n", mask,
+                      ecap_max_handle_mask(iommu->ecap));
+               return -1;
+       }
+
+       spin_lock(&irq_2_ir_lock);
+       do {
+               for (i = index; i < index + count; i++)
+                       if  (table->base[i].present)
+                               break;
+               /* empty index found */
+               if (i == index + count)
+                       break;
+
+               index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
+
+               if (index == start_index) {
+                       spin_unlock(&irq_2_ir_lock);
+                       printk(KERN_ERR "can't allocate an IRTE\n");
+                       return -1;
+               }
+       } while (1);
+
+       for (i = index; i < index + count; i++)
+               table->base[i].present = 1;
+
+       irq_2_iommu[irq].iommu = iommu;
+       irq_2_iommu[irq].irte_index =  index;
+       irq_2_iommu[irq].sub_handle = 0;
+       irq_2_iommu[irq].irte_mask = mask;
+
+       spin_unlock(&irq_2_ir_lock);
+
+       return index;
+}
+
+static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
+{
+       struct qi_desc desc;
+
+       desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
+                  | QI_IEC_SELECTIVE;
+       desc.high = 0;
+
+       qi_submit_sync(&desc, iommu);
+}
+
+int map_irq_to_irte_handle(int irq, u16 *sub_handle)
+{
+       int index;
+
+       spin_lock(&irq_2_ir_lock);
+       if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+               spin_unlock(&irq_2_ir_lock);
+               return -1;
+       }
+
+       *sub_handle = irq_2_iommu[irq].sub_handle;
+       index = irq_2_iommu[irq].irte_index;
+       spin_unlock(&irq_2_ir_lock);
+       return index;
+}
+
+int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
+{
+       spin_lock(&irq_2_ir_lock);
+       if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
+               spin_unlock(&irq_2_ir_lock);
+               return -1;
+       }
+
+       irq_2_iommu[irq].iommu = iommu;
+       irq_2_iommu[irq].irte_index = index;
+       irq_2_iommu[irq].sub_handle = subhandle;
+       irq_2_iommu[irq].irte_mask = 0;
+
+       spin_unlock(&irq_2_ir_lock);
+
+       return 0;
+}
+
+int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
+{
+       spin_lock(&irq_2_ir_lock);
+       if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+               spin_unlock(&irq_2_ir_lock);
+               return -1;
+       }
+
+       irq_2_iommu[irq].iommu = NULL;
+       irq_2_iommu[irq].irte_index = 0;
+       irq_2_iommu[irq].sub_handle = 0;
+       irq_2_iommu[irq].irte_mask = 0;
+
+       spin_unlock(&irq_2_ir_lock);
+
+       return 0;
+}
+
+int modify_irte(int irq, struct irte *irte_modified)
+{
+       int index;
+       struct irte *irte;
+       struct intel_iommu *iommu;
+
+       spin_lock(&irq_2_ir_lock);
+       if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+               spin_unlock(&irq_2_ir_lock);
+               return -1;
+       }
+
+       iommu = irq_2_iommu[irq].iommu;
+
+       index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+       irte = &iommu->ir_table->base[index];
+
+       set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
+       __iommu_flush_cache(iommu, irte, sizeof(*irte));
+
+       qi_flush_iec(iommu, index, 0);
+
+       spin_unlock(&irq_2_ir_lock);
+       return 0;
+}
+
+int flush_irte(int irq)
+{
+       int index;
+       struct intel_iommu *iommu;
+
+       spin_lock(&irq_2_ir_lock);
+       if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+               spin_unlock(&irq_2_ir_lock);
+               return -1;
+       }
+
+       iommu = irq_2_iommu[irq].iommu;
+
+       index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+
+       qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+       spin_unlock(&irq_2_ir_lock);
+
+       return 0;
+}
+
+struct intel_iommu *map_ioapic_to_ir(int apic)
+{
+       int i;
+
+       for (i = 0; i < MAX_IO_APICS; i++)
+               if (ir_ioapic[i].id == apic)
+                       return ir_ioapic[i].iommu;
+       return NULL;
+}
+
+struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
+{
+       struct dmar_drhd_unit *drhd;
+
+       drhd = dmar_find_matched_drhd_unit(dev);
+       if (!drhd)
+               return NULL;
+
+       return drhd->iommu;
+}
+
+int free_irte(int irq)
+{
+       int index, i;
+       struct irte *irte;
+       struct intel_iommu *iommu;
+
+       spin_lock(&irq_2_ir_lock);
+       if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
+               spin_unlock(&irq_2_ir_lock);
+               return -1;
+       }
+
+       iommu = irq_2_iommu[irq].iommu;
+
+       index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
+       irte = &iommu->ir_table->base[index];
+
+       if (!irq_2_iommu[irq].sub_handle) {
+               for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
+                       set_64bit((unsigned long *)irte, 0);
+               qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
+       }
+
+       irq_2_iommu[irq].iommu = NULL;
+       irq_2_iommu[irq].irte_index = 0;
+       irq_2_iommu[irq].sub_handle = 0;
+       irq_2_iommu[irq].irte_mask = 0;
+
+       spin_unlock(&irq_2_ir_lock);
+
+       return 0;
+}
+
+static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
+{
+       u64 addr;
+       u32 cmd, sts;
+       unsigned long flags;
+
+       addr = virt_to_phys((void *)iommu->ir_table->base);
+
+       spin_lock_irqsave(&iommu->register_lock, flags);
+
+       dmar_writeq(iommu->reg + DMAR_IRTA_REG,
+                   (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
+
+       /* Set interrupt-remapping table pointer */
+       cmd = iommu->gcmd | DMA_GCMD_SIRTP;
+       writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+       IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+                     readl, (sts & DMA_GSTS_IRTPS), sts);
+       spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+       /*
+        * global invalidation of interrupt entry cache before enabling
+        * interrupt-remapping.
+        */
+       qi_global_iec(iommu);
+
+       spin_lock_irqsave(&iommu->register_lock, flags);
+
+       /* Enable interrupt-remapping */
+       cmd = iommu->gcmd | DMA_GCMD_IRE;
+       iommu->gcmd |= DMA_GCMD_IRE;
+       writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+       IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+                     readl, (sts & DMA_GSTS_IRES), sts);
+
+       spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+
+static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
+{
+       struct ir_table *ir_table;
+       struct page *pages;
+
+       ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
+                                            GFP_KERNEL);
+
+       if (!iommu->ir_table)
+               return -ENOMEM;
+
+       pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
+
+       if (!pages) {
+               printk(KERN_ERR "failed to allocate pages of order %d\n",
+                      INTR_REMAP_PAGE_ORDER);
+               kfree(iommu->ir_table);
+               return -ENOMEM;
+       }
+
+       ir_table->base = page_address(pages);
+
+       iommu_set_intr_remapping(iommu, mode);
+       return 0;
+}
+
+int __init enable_intr_remapping(int eim)
+{
+       struct dmar_drhd_unit *drhd;
+       int setup = 0;
+
+       /*
+        * check for the Interrupt-remapping support
+        */
+       for_each_drhd_unit(drhd) {
+               struct intel_iommu *iommu = drhd->iommu;
+
+               if (!ecap_ir_support(iommu->ecap))
+                       continue;
+
+               if (eim && !ecap_eim_support(iommu->ecap)) {
+                       printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
+                              " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
+                       return -1;
+               }
+       }
+
+       /*
+        * Enable queued invalidation for all the DRHD's.
+        */
+       for_each_drhd_unit(drhd) {
+               int ret;
+               struct intel_iommu *iommu = drhd->iommu;
+               ret = dmar_enable_qi(iommu);
+
+               if (ret) {
+                       printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
+                              " invalidation, ecap %Lx, ret %d\n",
+                              drhd->reg_base_addr, iommu->ecap, ret);
+                       return -1;
+               }
+       }
+
+       /*
+        * Setup Interrupt-remapping for all the DRHD's now.
+        */
+       for_each_drhd_unit(drhd) {
+               struct intel_iommu *iommu = drhd->iommu;
+
+               if (!ecap_ir_support(iommu->ecap))
+                       continue;
+
+               if (setup_intr_remapping(iommu, eim))
+                       goto error;
+
+               setup = 1;
+       }
+
+       if (!setup)
+               goto error;
+
+       intr_remapping_enabled = 1;
+
+       return 0;
+
+error:
+       /*
+        * handle error condition gracefully here!
+        */
+       return -1;
+}
+
+static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
+                                struct intel_iommu *iommu)
+{
+       struct acpi_dmar_hardware_unit *drhd;
+       struct acpi_dmar_device_scope *scope;
+       void *start, *end;
+
+       drhd = (struct acpi_dmar_hardware_unit *)header;
+
+       start = (void *)(drhd + 1);
+       end = ((void *)drhd) + header->length;
+
+       while (start < end) {
+               scope = start;
+               if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
+                       if (ir_ioapic_num == MAX_IO_APICS) {
+                               printk(KERN_WARNING "Exceeded Max IO APICS\n");
+                               return -1;
+                       }
+
+                       printk(KERN_INFO "IOAPIC id %d under DRHD base"
+                              " 0x%Lx\n", scope->enumeration_id,
+                              drhd->address);
+
+                       ir_ioapic[ir_ioapic_num].iommu = iommu;
+                       ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
+                       ir_ioapic_num++;
+               }
+               start += scope->length;
+       }
+
+       return 0;
+}
+
+/*
+ * Finds the assocaition between IOAPIC's and its Interrupt-remapping
+ * hardware unit.
+ */
+int __init parse_ioapics_under_ir(void)
+{
+       struct dmar_drhd_unit *drhd;
+       int ir_supported = 0;
+
+       for_each_drhd_unit(drhd) {
+               struct intel_iommu *iommu = drhd->iommu;
+
+               if (ecap_ir_support(iommu->ecap)) {
+                       if (ir_parse_ioapic_scope(drhd->hdr, iommu))
+                               return -1;
+
+                       ir_supported = 1;
+               }
+       }
+
+       if (ir_supported && ir_ioapic_num != nr_ioapics) {
+               printk(KERN_WARNING
+                      "Not all IO-APIC's listed under remapping hardware\n");
+               return -1;
+       }
+
+       return ir_supported;
+}
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
new file mode 100644 (file)
index 0000000..05f2635
--- /dev/null
@@ -0,0 +1,8 @@
+#include "intel-iommu.h"
+
+struct ioapic_scope {
+       struct intel_iommu *iommu;
+       unsigned int id;
+};
+
+#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
index 4e2c1e517f0652fb1877c378f760fdb03fe6e8bb..fcd2f01277b623fcd7ef5e0aa25b6fb7cca17404 100644 (file)
@@ -7,6 +7,8 @@
 #include <asm/apicdef.h>
 #include <asm/processor.h>
 #include <asm/system.h>
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
 
 #define ARCH_APICTIMER_STOPS_ON_C3     1
 
@@ -47,32 +49,75 @@ extern int disable_apic;
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
-#define apic_write native_apic_write
-#define apic_write_atomic native_apic_write_atomic
-#define apic_read native_apic_read
 #define setup_boot_clock setup_boot_APIC_clock
 #define setup_secondary_clock setup_secondary_APIC_clock
 #endif
 
 extern int is_vsmp_box(void);
 
-static inline void native_apic_write(unsigned long reg, u32 v)
+static inline void native_apic_mem_write(u32 reg, u32 v)
 {
        *((volatile u32 *)(APIC_BASE + reg)) = v;
 }
 
-static inline void native_apic_write_atomic(unsigned long reg, u32 v)
+static inline void native_apic_mem_write_atomic(u32 reg, u32 v)
 {
        (void)xchg((u32 *)(APIC_BASE + reg), v);
 }
 
-static inline u32 native_apic_read(unsigned long reg)
+static inline u32 native_apic_mem_read(u32 reg)
 {
        return *((volatile u32 *)(APIC_BASE + reg));
 }
 
-extern void apic_wait_icr_idle(void);
-extern u32 safe_apic_wait_icr_idle(void);
+static inline void native_apic_msr_write(u32 reg, u32 v)
+{
+       if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
+           reg == APIC_LVR)
+               return;
+
+       wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
+}
+
+static inline u32 native_apic_msr_read(u32 reg)
+{
+       u32 low, high;
+
+       if (reg == APIC_DFR)
+               return -1;
+
+       rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
+       return low;
+}
+
+#ifndef CONFIG_X86_32
+extern int x2apic, x2apic_preenabled;
+extern void check_x2apic(void);
+extern void enable_x2apic(void);
+extern void enable_IR_x2apic(void);
+extern void x2apic_icr_write(u32 low, u32 id);
+#endif
+
+struct apic_ops {
+       u32 (*read)(u32 reg);
+       void (*write)(u32 reg, u32 v);
+       void (*write_atomic)(u32 reg, u32 v);
+       u64 (*icr_read)(void);
+       void (*icr_write)(u32 low, u32 high);
+       void (*wait_icr_idle)(void);
+       u32 (*safe_wait_icr_idle)(void);
+};
+
+extern struct apic_ops *apic_ops;
+
+#define apic_read (apic_ops->read)
+#define apic_write (apic_ops->write)
+#define apic_write_atomic (apic_ops->write_atomic)
+#define apic_icr_read (apic_ops->icr_read)
+#define apic_icr_write (apic_ops->icr_write)
+#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
+#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
+
 extern int get_physical_broadcast(void);
 
 #ifdef CONFIG_X86_GOOD_APIC
@@ -85,6 +130,15 @@ extern int get_physical_broadcast(void);
 # define apic_write_around(x, y) apic_write_atomic((x), (y))
 #endif
 
+#ifdef CONFIG_X86_64
+static inline void ack_x2APIC_irq(void)
+{
+       /* Docs say use 0 for future compatibility */
+       native_apic_msr_write(APIC_EOI, 0);
+}
+#endif
+
+
 static inline void ack_APIC_irq(void)
 {
        /*
@@ -95,7 +149,11 @@ static inline void ack_APIC_irq(void)
         */
 
        /* Docs say use 0 for future compatibility */
+#ifdef CONFIG_X86_32
        apic_write_around(APIC_EOI, 0);
+#else
+       native_apic_mem_write(APIC_EOI, 0);
+#endif
 }
 
 extern int lapic_get_maxlvt(void);
index 6b9008c787319d1040c9520c137a454f24afc06a..bcae297b30b2fc370d4c73e7da508a64dc924def 100644 (file)
 #define        APIC_TMICT      0x380
 #define        APIC_TMCCT      0x390
 #define        APIC_TDCR       0x3E0
+#define APIC_SELF_IPI  0x3F0
 #define                APIC_TDR_DIV_TMBASE     (1 << 2)
 #define                APIC_TDR_DIV_1          0xB
 #define                APIC_TDR_DIV_2          0x0
 #define        APIC_EILVT3     0x530
 
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+#define APIC_BASE_MSR  0x800
+#define X2APIC_ENABLE  (1UL << 10)
 
 #ifdef CONFIG_X86_32
 # define MAX_IO_APICS 64
index 75ef959db32921922db8f38bac44f8ac960cbc4a..5be9510ee0123afc8ce50fd6e26f3df62f2d9f38 100644 (file)
@@ -90,6 +90,7 @@
 #define X86_FEATURE_CX16       (4*32+13) /* CMPXCHG16B */
 #define X86_FEATURE_XTPR       (4*32+14) /* Send Task Priority Messages */
 #define X86_FEATURE_DCA                (4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_X2APIC     (4*32+21) /* x2APIC */
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
 #define X86_FEATURE_XSTORE     (5*32+ 2) /* on-CPU RNG present (xstore insn) */
@@ -188,6 +189,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_gbpages                boot_cpu_has(X86_FEATURE_GBPAGES)
 #define cpu_has_arch_perfmon   boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_pat            boot_cpu_has(X86_FEATURE_PAT)
+#define cpu_has_x2apic         boot_cpu_has(X86_FEATURE_X2APIC)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg                1
index 0f8504627c41ce8c6d9f168e8ff038a8d15c3d12..2871b3fccb21333590c601e0a312cbd67c859be8 100644 (file)
@@ -24,17 +24,24 @@ struct genapic {
        void (*send_IPI_mask)(cpumask_t mask, int vector);
        void (*send_IPI_allbutself)(int vector);
        void (*send_IPI_all)(int vector);
+       void (*send_IPI_self)(int vector);
        /* */
        unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
        unsigned int (*phys_pkg_id)(int index_msb);
+       unsigned int (*get_apic_id)(unsigned long x);
+       unsigned long (*set_apic_id)(unsigned int id);
+       unsigned long apic_id_mask;
 };
 
 extern struct genapic *genapic;
 
 extern struct genapic apic_flat;
 extern struct genapic apic_physflat;
+extern struct genapic apic_x2apic_cluster;
+extern struct genapic apic_x2apic_phys;
 extern int acpi_madt_oem_check(char *, char *);
 
+extern void apic_send_IPI_self(int vector);
 enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
 extern enum uv_system_type get_uv_system_type(void);
 extern int is_uv_system(void);
index 77ba51df56680fcd9e28b4529eb647c0eea07292..ef7a995ee81fe320a650bd32cddcd5e522d581c5 100644 (file)
@@ -73,7 +73,9 @@ extern void enable_IO_APIC(void);
 #endif
 
 /* IPI functions */
+#ifdef CONFIG_X86_32
 extern void send_IPI_self(int vector);
+#endif
 extern void send_IPI(int dest, int vector);
 
 /* Statistics */
index 2f98df91f1f2bef9e7fc7bc47b3a42bbe14403d3..31112b6c595bd1576fadf0a306c7dd8703baab46 100644 (file)
@@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port)
 
 extern struct irq_chip i8259A_chip;
 
+extern void mask_8259A(void);
+extern void unmask_8259A(void);
+
 #endif /* __ASM_I8259_H__ */
index 14f82bbcb5fd6ee5d09f05be363651258ff4e9bb..8dc2622714c84272a2a1d0dd33b18c73c6c2ca45 100644 (file)
@@ -107,6 +107,20 @@ struct IO_APIC_route_entry {
 
 } __attribute__ ((packed));
 
+struct IR_IO_APIC_route_entry {
+       __u64   vector          : 8,
+               zero            : 3,
+               index2          : 1,
+               delivery_status : 1,
+               polarity        : 1,
+               irr             : 1,
+               trigger         : 1,
+               mask            : 1,
+               reserved        : 31,
+               format          : 1,
+               index           : 15;
+} __attribute__ ((packed));
+
 #ifdef CONFIG_X86_IO_APIC
 
 /*
@@ -183,6 +197,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
 extern void ioapic_init_mappings(void);
 
+#ifdef CONFIG_X86_64
+extern int save_mask_IO_APIC_setup(void);
+extern void restore_IO_APIC_setup(void);
+extern void reinit_intr_remapped_IO_APIC(int);
+#endif
+
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
 static const int timer_through_8259 = 0;
index 196d63c28aa44aa3365977da4b729d59444fcb69..3d8d6a6c1f8e5f8435db4e6be54b0608d0872ace 100644 (file)
@@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask)
        return SET_APIC_DEST_FIELD(mask);
 }
 
+static inline void __xapic_wait_icr_idle(void)
+{
+       while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
+               cpu_relax();
+}
+
 static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
                                       unsigned int dest)
 {
@@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
        /*
         * Wait for idle.
         */
-       apic_wait_icr_idle();
+       __xapic_wait_icr_idle();
 
        /*
         * No need to touch the target chip field
@@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
        /*
         * Send the IPI. The write to APIC_ICR fires this off.
         */
-       apic_write(APIC_ICR, cfg);
+       native_apic_mem_write(APIC_ICR, cfg);
 }
 
 /*
@@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
        if (unlikely(vector == NMI_VECTOR))
                safe_apic_wait_icr_idle();
        else
-               apic_wait_icr_idle();
+               __xapic_wait_icr_idle();
 
        /*
         * prepare target chip field
         */
        cfg = __prepare_ICR2(mask);
-       apic_write(APIC_ICR2, cfg);
+       native_apic_mem_write(APIC_ICR2, cfg);
 
        /*
         * program the ICR
@@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
        /*
         * Send the IPI. The write to APIC_ICR fires this off.
         */
-       apic_write(APIC_ICR, cfg);
+       native_apic_mem_write(APIC_ICR, cfg);
 }
 
 static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h
new file mode 100644 (file)
index 0000000..78242c6
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef _ASM_IRQ_REMAPPING_H
+#define _ASM_IRQ_REMAPPING_H
+
+extern int x2apic;
+
+#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
+
+#endif
index 0b2cde5e1b74b38641c4cea560784be0d7716a9b..3d2b455581ec4f903f2ca5ee65df43efa8bf58e0 100644 (file)
@@ -30,6 +30,8 @@ static inline cpumask_t target_cpus(void)
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
 #define phys_pkg_id    (genapic->phys_pkg_id)
 #define vector_allocation_domain    (genapic->vector_allocation_domain)
+#define read_apic_id()  (GET_APIC_ID(apic_read(APIC_ID)))
+#define send_IPI_self (genapic->send_IPI_self)
 extern void setup_apic_routing(void);
 #else
 #define INT_DELIVERY_MODE dest_LowestPrio
@@ -54,7 +56,7 @@ static inline void init_apic_ldr(void)
 
 static inline int apic_id_registered(void)
 {
-       return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
+       return physid_isset(read_apic_id(), phys_cpu_present_map);
 }
 
 static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
index e4b29ba37de604894bae11681a9236bd64c041e7..a55518aa5a2dc1b24b7a4a198071384f7a58a911 100644 (file)
@@ -4,9 +4,9 @@
 #include <asm/apic.h>
 
 #ifdef CONFIG_X86_64
-#define        APIC_ID_MASK            (0xFFu<<24)
-#define GET_APIC_ID(x)          (((x)>>24)&0xFFu)
-#define        SET_APIC_ID(x)          (((x)<<24))
+#define        APIC_ID_MASK            (genapic->apic_id_mask)
+#define GET_APIC_ID(x)         (genapic->get_apic_id(x))
+#define        SET_APIC_ID(x)          (genapic->set_apic_id(x))
 #else
 #define                APIC_ID_MASK            (0xF<<24)
 static inline unsigned get_apic_id(unsigned long x) 
index fbc8ad256f5aadda25deaeb7d7564515ec1e12ca..b3556ec3bca50e9bfcd28326423fd5652e6c2117 100644 (file)
@@ -141,7 +141,7 @@ static inline void setup_portio_remap(void)
 extern unsigned int boot_cpu_physical_apicid;
 static inline int check_phys_apicid_present(int cpu_physical_apicid)
 {
-       boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+       boot_cpu_physical_apicid = read_apic_id();
        return (1);
 }
 
index 296f29ce426d4e75a3c9451d1e16bb7d8aed3f71..57fd85935e5a0f5fc9cd5468c6304dd11570d6c6 100644 (file)
@@ -48,4 +48,8 @@
 #define  MSI_ADDR_DEST_ID(dest)                (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
                                         MSI_ADDR_DEST_ID_MASK)
 
+#define MSI_ADDR_IR_EXT_INT            (1 << 4)
+#define MSI_ADDR_IR_SHV                        (1 << 3)
+#define MSI_ADDR_IR_INDEX1(index)      ((index & 0x8000) >> 13)
+#define MSI_ADDR_IR_INDEX2(index)      ((index & 0x7fff) << 5)
 #endif /* ASM_MSIDEF_H */
index ef5e8ec6a6ab7431f0c799a2b56a4cd5f18fe5e9..08f89e385a929afa43e7b7e8e458d774b204d453 100644 (file)
@@ -200,13 +200,6 @@ struct pv_irq_ops {
 
 struct pv_apic_ops {
 #ifdef CONFIG_X86_LOCAL_APIC
-       /*
-        * Direct APIC operations, principally for VMI.  Ideally
-        * these shouldn't be in this interface.
-        */
-       void (*apic_write)(unsigned long reg, u32 v);
-       void (*apic_write_atomic)(unsigned long reg, u32 v);
-       u32 (*apic_read)(unsigned long reg);
        void (*setup_boot_clock)(void);
        void (*setup_secondary_clock)(void);
 
@@ -888,24 +881,6 @@ static inline void slow_down_io(void)
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-/*
- * Basic functions accessing APICs.
- */
-static inline void apic_write(unsigned long reg, u32 v)
-{
-       PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
-}
-
-static inline void apic_write_atomic(unsigned long reg, u32 v)
-{
-       PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
-}
-
-static inline u32 apic_read(unsigned long reg)
-{
-       return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
-}
-
 static inline void setup_boot_clock(void)
 {
        PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
index c2784b3e0b77e23269a1c61407aaeb2db39cdc9d..1896cdb0076a4f90391c40fac796f826bf5896dc 100644 (file)
@@ -163,30 +163,33 @@ extern int safe_smp_processor_id(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
+#ifndef CONFIG_X86_64
 static inline int logical_smp_processor_id(void)
 {
        /* we don't want to mark this access volatile - bad code generation */
        return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
 }
 
-#ifndef CONFIG_X86_64
+#include <mach_apicdef.h>
 static inline unsigned int read_apic_id(void)
 {
-       return *(u32 *)(APIC_BASE + APIC_ID);
+       unsigned int reg;
+
+       reg = *(u32 *)(APIC_BASE + APIC_ID);
+
+       return GET_APIC_ID(reg);
 }
-#else
-extern unsigned int read_apic_id(void);
 #endif
 
 
-# ifdef APIC_DEFINITION
+# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
 extern int hard_smp_processor_id(void);
 # else
-#  include <mach_apicdef.h>
+#include <mach_apicdef.h>
 static inline int hard_smp_processor_id(void)
 {
        /* we don't want to mark this access volatile - bad code generation */
-       return GET_APIC_ID(read_apic_id());
+       return read_apic_id();
 }
 # endif /* APIC_DEFINITION */
 
index 56c73b8475519cc7ee5db5886c92aa6d8f5f2d15..c360c558e59eb5b58952955e3172658ed86f8848 100644 (file)
 #include <linux/types.h>
 #include <linux/msi.h>
 
-#ifdef CONFIG_DMAR
+#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
 struct intel_iommu;
 
+struct dmar_drhd_unit {
+       struct list_head list;          /* list of drhd units   */
+       struct  acpi_dmar_header *hdr;  /* ACPI header          */
+       u64     reg_base_addr;          /* register base address*/
+       struct  pci_dev **devices;      /* target device array  */
+       int     devices_cnt;            /* target device count  */
+       u8      ignored:1;              /* ignore drhd          */
+       u8      include_all:1;
+       struct intel_iommu *iommu;
+};
+
+extern struct list_head dmar_drhd_units;
+
+#define for_each_drhd_unit(drhd) \
+       list_for_each_entry(drhd, &dmar_drhd_units, list)
+
+extern int dmar_table_init(void);
+extern int early_dmar_detect(void);
+extern int dmar_dev_scope_init(void);
+
+/* Intel IOMMU detection */
+extern void detect_intel_iommu(void);
+
+
+extern int parse_ioapics_under_ir(void);
+extern int alloc_iommu(struct dmar_drhd_unit *);
+#else
+static inline void detect_intel_iommu(void)
+{
+       return;
+}
+
+static inline int dmar_table_init(void)
+{
+       return -ENODEV;
+}
+#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */
+
+#ifdef CONFIG_INTR_REMAP
+extern int intr_remapping_enabled;
+extern int enable_intr_remapping(int);
+
+struct irte {
+       union {
+               struct {
+                       __u64   present         : 1,
+                               fpd             : 1,
+                               dst_mode        : 1,
+                               redir_hint      : 1,
+                               trigger_mode    : 1,
+                               dlvry_mode      : 3,
+                               avail           : 4,
+                               __reserved_1    : 4,
+                               vector          : 8,
+                               __reserved_2    : 8,
+                               dest_id         : 32;
+               };
+               __u64 low;
+       };
+
+       union {
+               struct {
+                       __u64   sid             : 16,
+                               sq              : 2,
+                               svt             : 2,
+                               __reserved_3    : 44;
+               };
+               __u64 high;
+       };
+};
+extern int get_irte(int irq, struct irte *entry);
+extern int modify_irte(int irq, struct irte *irte_modified);
+extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
+extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
+                       u16 sub_handle);
+extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
+extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
+extern int flush_irte(int irq);
+extern int free_irte(int irq);
+
+extern int irq_remapped(int irq);
+extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
+extern struct intel_iommu *map_ioapic_to_ir(int apic);
+#else
+#define irq_remapped(irq)              (0)
+#define enable_intr_remapping(mode)    (-1)
+#define intr_remapping_enabled         (0)
+#endif
+
+#ifdef CONFIG_DMAR
 extern const char *dmar_get_fault_reason(u8 fault_reason);
 
 /* Can't use the common MSI interrupt functions
@@ -40,47 +130,30 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg);
 extern int dmar_set_interrupt(struct intel_iommu *iommu);
 extern int arch_setup_dmar_msi(unsigned int irq);
 
-/* Intel IOMMU detection and initialization functions */
-extern void detect_intel_iommu(void);
-extern int intel_iommu_init(void);
-
-extern int dmar_table_init(void);
-extern int early_dmar_detect(void);
-
-extern struct list_head dmar_drhd_units;
+extern int iommu_detected, no_iommu;
 extern struct list_head dmar_rmrr_units;
-
-struct dmar_drhd_unit {
-       struct list_head list;          /* list of drhd units   */
-       u64     reg_base_addr;          /* register base address*/
-       struct  pci_dev **devices;      /* target device array  */
-       int     devices_cnt;            /* target device count  */
-       u8      ignored:1;              /* ignore drhd          */
-       u8      include_all:1;
-       struct intel_iommu *iommu;
-};
-
 struct dmar_rmrr_unit {
        struct list_head list;          /* list of rmrr units   */
+       struct acpi_dmar_header *hdr;   /* ACPI header          */
        u64     base_address;           /* reserved base address*/
        u64     end_address;            /* reserved end address */
        struct pci_dev **devices;       /* target devices */
        int     devices_cnt;            /* target device count */
 };
 
-#define for_each_drhd_unit(drhd) \
-       list_for_each_entry(drhd, &dmar_drhd_units, list)
 #define for_each_rmrr_units(rmrr) \
        list_for_each_entry(rmrr, &dmar_rmrr_units, list)
+/* Intel DMAR  initialization functions */
+extern int intel_iommu_init(void);
+extern int dmar_disabled;
 #else
-static inline void detect_intel_iommu(void)
-{
-       return;
-}
 static inline int intel_iommu_init(void)
 {
+#ifdef CONFIG_INTR_REMAP
+       return dmar_dev_scope_init();
+#else
        return -ENODEV;
+#endif
 }
-
 #endif /* !CONFIG_DMAR */
 #endif /* __DMAR_H__ */
index 8ccb462ea42c4cb3c4813c51ad285fb168efc2f9..8d9411bc60f6f9356e0237cc601330576283d22a 100644 (file)
@@ -62,6 +62,7 @@ typedef       void (*irq_flow_handler_t)(unsigned int irq,
 #define IRQ_MOVE_PENDING       0x00200000      /* need to re-target IRQ destination */
 #define IRQ_NO_BALANCING       0x00400000      /* IRQ is excluded from balancing */
 #define IRQ_SPURIOUS_DISABLED  0x00800000      /* IRQ was disabled by the spurious trap */
+#define IRQ_MOVE_PCNTXT        0x01000000      /* IRQ migration from process context */
 
 #ifdef CONFIG_IRQ_PER_CPU
 # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
index 77a51be360103c98c8ce3a165668ea423f69d119..909b2231fa93cd82e69f4432e1eb2f99c5146a99 100644 (file)
@@ -89,7 +89,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
        set_balance_irq_affinity(irq, cpumask);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-       set_pending_irq(irq, cpumask);
+       if (desc->status & IRQ_MOVE_PCNTXT) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&desc->lock, flags);
+               desc->chip->set_affinity(irq, cpumask);
+               spin_unlock_irqrestore(&desc->lock, flags);
+       } else
+               set_pending_irq(irq, cpumask);
 #else
        desc->affinity = cpumask;
        desc->chip->set_affinity(irq, cpumask);