return lapic_get_version() >= 0x14;
}
-void apic_wait_icr_idle(void)
+/*
+ * Paravirt kernels also might be using these below ops. So we still
+ * use generic apic_read()/apic_write(), which might be pointing to different
+ * ops in PARAVIRT case.
+ */
+void xapic_wait_icr_idle(void)
{
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax();
}
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
{
u32 send_status;
int timeout;
return send_status;
}
+void xapic_icr_write(u32 low, u32 id)
+{
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+ apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+ u32 icr1, icr2;
+
+ icr2 = apic_read(APIC_ICR2);
+ icr1 = apic_read(APIC_ICR);
+
+ return icr1 | ((u64)icr2 << 32);
+}
+
+static struct apic_ops xapic_ops = {
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = xapic_icr_read,
+ .icr_write = xapic_icr_write,
+ .wait_icr_idle = xapic_wait_icr_idle,
+ .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+EXPORT_SYMBOL_GPL(apic_ops);
+
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
void __cpuinit enable_NMI_through_LVT0(void)
{
- unsigned int v = APIC_DM_NMI;
+ unsigned int v;
- /* Level triggered for 82489DX */
+ /* unmask and set to NMI */
+ v = APIC_DM_NMI;
+
+ /* Level triggered for 82489DX (32bit mode) */
if (!lapic_is_integrated())
v |= APIC_LVT_LEVEL_TRIGGER;
+
apic_write(APIC_LVT0, v);
}
*/
int lapic_get_maxlvt(void)
{
- unsigned int v = apic_read(APIC_LVR);
+ unsigned int v;
- /* 82489DXs do not report # of LVT entries. */
+ v = apic_read(APIC_LVR);
+ /*
+ * - we always have APIC integrated on 64bit mode
+ * - 82489DXs do not report # of LVT entries
+ */
return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
}
* default configuration (or the MP table is broken).
*/
if (boot_cpu_physical_apicid == -1U)
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
}
* might be zero if read from MP tables. Get it from LAPIC.
*/
#ifdef CONFIG_CRASH_DUMP
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
#endif
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
irq_exit();
}
-#ifdef CONFIG_SMP
-void __init smp_intr_init(void)
-{
- /*
- * IRQ0 must be given a fixed assignment and initialized,
- * because it's used before the IO-APIC is set up.
- */
- set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
-
- /*
- * The reschedule interrupt is a CPU-to-CPU reschedule-helper
- * IPI, driven by wakeup.
- */
- alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
- /* IPI for invalidation */
- alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
-
- /* IPI for generic function call */
- alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
- /* IPI for single call function */
- set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
- call_function_single_interrupt);
-}
-#endif
-
-/*
- * Initialize APIC interrupts
- */
-void __init apic_intr_init(void)
-{
-#ifdef CONFIG_SMP
- smp_intr_init();
-#endif
- /* self generated IPI for local APIC timer */
- alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
- /* IPI vectors for APIC spurious and error interrupts */
- alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
- alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-
- /* thermal monitor LVT interrupt */
-#ifdef CONFIG_X86_MCE_P4THERMAL
- alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-#endif
-}
-
/**
* connect_bsp_APIC - attach the APIC to the interrupt system
*/
}
early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-static int __init apic_set_verbosity(char *str)
+static int __init apic_set_verbosity(char *arg)
{
- if (strcmp("debug", str) == 0)
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp(arg, "debug") == 0)
apic_verbosity = APIC_DEBUG;
- else if (strcmp("verbose", str) == 0)
+ else if (strcmp(arg, "verbose") == 0)
apic_verbosity = APIC_VERBOSE;
- return 1;
+
+ return 0;
}
-__setup("apic=", apic_set_verbosity);
+early_param("apic", apic_set_verbosity);
static int __init lapic_insert_resource(void)
{
#include <linux/clockchips.h>
#include <linux/acpi_pmtmr.h>
#include <linux/module.h>
+#include <linux/dmar.h>
#include <asm/atomic.h>
#include <asm/smp.h>
#include <asm/proto.h>
#include <asm/timex.h>
#include <asm/apic.h>
+#include <asm/i8259.h>
#include <mach_ipi.h>
#include <mach_apic.h>
static int disable_apic_timer __cpuinitdata;
static int apic_calibrate_pmtmr __initdata;
int disable_apic;
+int disable_x2apic;
+int x2apic;
+
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
/* Local APIC timer works in C2 */
int local_apic_timer_c2_ok;
return lapic_get_version() >= 0x14;
}
-void apic_wait_icr_idle(void)
+void xapic_wait_icr_idle(void)
{
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax();
}
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
{
u32 send_status;
int timeout;
return send_status;
}
+void xapic_icr_write(u32 low, u32 id)
+{
+ apic_write(APIC_ICR2, id << 24);
+ apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+ u32 icr1, icr2;
+
+ icr2 = apic_read(APIC_ICR2);
+ icr1 = apic_read(APIC_ICR);
+
+ return (icr1 | ((u64)icr2 << 32));
+}
+
+static struct apic_ops xapic_ops = {
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = xapic_icr_read,
+ .icr_write = xapic_icr_write,
+ .wait_icr_idle = xapic_wait_icr_idle,
+ .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+
+EXPORT_SYMBOL_GPL(apic_ops);
+
+static void x2apic_wait_icr_idle(void)
+{
+ /* no need to wait for icr idle in x2apic */
+ return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+ /* no need to wait for icr idle in x2apic */
+ return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+ wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+ unsigned long val;
+
+ rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+ return val;
+}
+
+static struct apic_ops x2apic_ops = {
+ .read = native_apic_msr_read,
+ .write = native_apic_msr_write,
+ .icr_read = x2apic_icr_read,
+ .icr_write = x2apic_icr_write,
+ .wait_icr_idle = x2apic_wait_icr_idle,
+ .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
/* unmask and set to NMI */
v = APIC_DM_NMI;
+
+ /* Level triggered for 82489DX (32bit mode) */
+ if (!lapic_is_integrated())
+ v |= APIC_LVT_LEVEL_TRIGGER;
+
apic_write(APIC_LVT0, v);
}
*/
int lapic_get_maxlvt(void)
{
- unsigned int v, maxlvt;
+ unsigned int v;
v = apic_read(APIC_LVR);
- maxlvt = GET_APIC_MAXLVT(v);
- return maxlvt;
+ /*
+ * - we always have APIC integrated on 64bit mode
+ * - 82489DXs do not report # of LVT entries
+ */
+ return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
}
/*
/*
* The ID register is read/write in a real APIC.
*/
- reg0 = read_apic_id();
+ reg0 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
- reg1 = read_apic_id();
+ reg1 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
apic_write(APIC_ID, reg0);
if (reg1 != (reg0 ^ APIC_ID_MASK))
apic_pm_activate();
}
+void check_x2apic(void)
+{
+ int msr, msr2;
+
+ rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+ if (msr & X2APIC_ENABLE) {
+ printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+ x2apic_preenabled = x2apic = 1;
+ apic_ops = &x2apic_ops;
+ }
+}
+
+void enable_x2apic(void)
+{
+ int msr, msr2;
+
+ rdmsr(MSR_IA32_APICBASE, msr, msr2);
+ if (!(msr & X2APIC_ENABLE)) {
+ printk("Enabling x2apic\n");
+ wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+ }
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+ int ret;
+ unsigned long flags;
+
+ if (!cpu_has_x2apic)
+ return;
+
+ if (!x2apic_preenabled && disable_x2apic) {
+ printk(KERN_INFO
+ "Skipped enabling x2apic and Interrupt-remapping "
+ "because of nox2apic\n");
+ return;
+ }
+
+ if (x2apic_preenabled && disable_x2apic)
+ panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+ if (!x2apic_preenabled && skip_ioapic_setup) {
+ printk(KERN_INFO
+ "Skipped enabling x2apic and Interrupt-remapping "
+ "because of skipping io-apic setup\n");
+ return;
+ }
+
+ ret = dmar_table_init();
+ if (ret) {
+ printk(KERN_INFO
+ "dmar_table_init() failed with %d:\n", ret);
+
+ if (x2apic_preenabled)
+ panic("x2apic enabled by bios. But IR enabling failed");
+ else
+ printk(KERN_INFO
+ "Not enabling x2apic,Intr-remapping\n");
+ return;
+ }
+
+ local_irq_save(flags);
+ mask_8259A();
+ save_mask_IO_APIC_setup();
+
+ ret = enable_intr_remapping(1);
+
+ if (ret && x2apic_preenabled) {
+ local_irq_restore(flags);
+ panic("x2apic enabled by bios. But IR enabling failed");
+ }
+
+ if (ret)
+ goto end;
+
+ if (!x2apic) {
+ x2apic = 1;
+ apic_ops = &x2apic_ops;
+ enable_x2apic();
+ }
+end:
+ if (ret)
+ /*
+ * IR enabling failed
+ */
+ restore_IO_APIC_setup();
+ else
+ reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+ unmask_8259A();
+ local_irq_restore(flags);
+
+ if (!ret) {
+ if (!x2apic_preenabled)
+ printk(KERN_INFO
+ "Enabled x2apic and interrupt-remapping\n");
+ else
+ printk(KERN_INFO
+ "Enabled Interrupt-remapping\n");
+ } else
+ printk(KERN_ERR
+ "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+ if (!cpu_has_x2apic)
+ return;
+
+ if (x2apic_preenabled)
+ panic("x2apic enabled prior OS handover,"
+ " enable CONFIG_INTR_REMAP");
+
+ printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+ " and x2apic\n");
+#endif
+
+ return;
+}
+
/*
* Detect and enable local APICs on non-SMP boards.
* Original code written by Keir Fraser.
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
}
/**
*/
void __init init_apic_mappings(void)
{
+ if (x2apic) {
+ boot_cpu_physical_apicid = read_apic_id();
+ return;
+ }
+
/*
* If no local APIC can be found then set up a fake all
* zeroes page to simulate the local APIC and another
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
}
/*
return -1;
}
+ enable_IR_x2apic();
+ setup_apic_routing();
+
verify_local_APIC();
connect_bsp_APIC();
cpu_set(cpu, cpu_present_map);
}
+int hard_smp_processor_id(void)
+{
+ return read_apic_id();
+}
+
/*
* Power management
*/
maxlvt = lapic_get_maxlvt();
- apic_pm_state.apic_id = read_apic_id();
+ apic_pm_state.apic_id = apic_read(APIC_ID);
apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
apic_pm_state.apic_ldr = apic_read(APIC_LDR);
apic_pm_state.apic_dfr = apic_read(APIC_DFR);
maxlvt = lapic_get_maxlvt();
local_irq_save(flags);
- rdmsr(MSR_IA32_APICBASE, l, h);
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
- wrmsr(MSR_IA32_APICBASE, l, h);
+ if (!x2apic) {
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ l &= ~MSR_IA32_APICBASE_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ } else
+ enable_x2apic();
+
apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
apic_write(APIC_ID, apic_pm_state.apic_id);
apic_write(APIC_DFR, apic_pm_state.apic_dfr);
return (clusters > 2);
}
+static __init int setup_nox2apic(char *str)
+{
+ disable_x2apic = 1;
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
+ return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+
+
/*
* APIC command line parameters
*/
#include <acpi/acpi_bus.h>
#endif
#include <linux/bootmem.h>
+#include <linux/dmar.h>
#include <asm/idle.h>
#include <asm/io.h>
#include <asm/nmi.h>
#include <asm/msidef.h>
#include <asm/hypertransport.h>
+#include <asm/irq_remapping.h>
#include <mach_ipi.h>
#include <mach_apic.h>
+ #define __apicdebuginit(type) static type __init
+
struct irq_cfg {
cpumask_t domain;
cpumask_t old_domain;
char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
- #define __apicdebuginit __init
-
int sis_apic_bug; /* not actually supported, dummy for compile */
static int no_timer_check;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
+static DEFINE_SPINLOCK(vector_lock);
/*
* # of IRQ routing registers
*/
int nr_ioapic_registers[MAX_IO_APICS];
+/* I/O APIC RTE contents at the OS boot up */
+struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
/* I/O APIC entries */
struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics;
pin = entry->pin;
if (pin == -1)
break;
- io_apic_write(apic, 0x11 + pin*2, dest);
+ /*
+ * With interrupt-remapping, destination information comes
+ * from interrupt-remapping table entry.
+ */
+ if (!irq_remapped(irq))
+ io_apic_write(apic, 0x11 + pin*2, dest);
reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
clear_IO_APIC_pin(apic, pin);
}
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+ int apic, pin;
+
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(apic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+ }
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ early_ioapic_entries[apic] =
+ kzalloc(sizeof(struct IO_APIC_route_entry) *
+ nr_ioapic_registers[apic], GFP_KERNEL);
+ if (!early_ioapic_entries[apic])
+ return -ENOMEM;
+ }
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ struct IO_APIC_route_entry entry;
+
+ entry = early_ioapic_entries[apic][pin] =
+ ioapic_read_entry(apic, pin);
+ if (!entry.mask) {
+ entry.mask = 1;
+ ioapic_write_entry(apic, pin, entry);
+ }
+ }
+ return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ ioapic_write_entry(apic, pin,
+ early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+ /*
+ * for now plain restore of previous settings.
+ * TBD: In the case of OS enabling interrupt-remapping,
+ * IO-APIC RTE's need to be setup to point to interrupt-remapping
+ * table entries. for now, do a plain restore, and wait for
+ * the setup_IO_APIC_irqs() to do proper initialization.
+ */
+ restore_IO_APIC_setup();
+}
+
int skip_ioapic_setup;
int ioapic_force;
return irq;
}
+void lock_vector_lock(void)
+{
+ /* Used to the online set of cpus does not change
+ * during assign_irq_vector.
+ */
+ spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+ spin_unlock(&vector_lock);
+}
+
static int __assign_irq_vector(int irq, cpumask_t mask)
{
/*
cpus_clear(cfg->domain);
}
-static void __setup_vector_irq(int cpu)
+void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
/* This function must be called with vector_lock held */
}
}
-void setup_vector_irq(int cpu)
-{
- spin_lock(&vector_lock);
- __setup_vector_irq(smp_processor_id());
- spin_unlock(&vector_lock);
-}
-
-
static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
static void ioapic_register_intr(int irq, unsigned long trigger)
{
- if (trigger) {
+ if (trigger)
irq_desc[irq].status |= IRQ_LEVEL;
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_fasteoi_irq, "fasteoi");
- } else {
+ else
irq_desc[irq].status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+ if (irq_remapped(irq)) {
+ irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+ if (trigger)
+ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+ handle_fasteoi_irq,
+ "fasteoi");
+ else
+ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+ handle_edge_irq, "edge");
+ return;
+ }
+#endif
+ if (trigger)
+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
+ handle_fasteoi_irq,
+ "fasteoi");
+ else
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_edge_irq, "edge");
+}
+
+static int setup_ioapic_entry(int apic, int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination, int trigger,
+ int polarity, int vector)
+{
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+ if (intr_remapping_enabled) {
+ struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+ struct irte irte;
+ struct IR_IO_APIC_route_entry *ir_entry =
+ (struct IR_IO_APIC_route_entry *) entry;
+ int index;
+
+ if (!iommu)
+ panic("No mapping iommu for ioapic %d\n", apic);
+
+ index = alloc_irte(iommu, irq, 1);
+ if (index < 0)
+ panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+ memset(&irte, 0, sizeof(irte));
+
+ irte.present = 1;
+ irte.dst_mode = INT_DEST_MODE;
+ irte.trigger_mode = trigger;
+ irte.dlvry_mode = INT_DELIVERY_MODE;
+ irte.vector = vector;
+ irte.dest_id = IRTE_DEST(destination);
+
+ modify_irte(irq, &irte);
+
+ ir_entry->index2 = (index >> 15) & 0x1;
+ ir_entry->zero = 0;
+ ir_entry->format = 1;
+ ir_entry->index = (index & 0x7fff);
+ } else
+#endif
+ {
+ entry->delivery_mode = INT_DELIVERY_MODE;
+ entry->dest_mode = INT_DEST_MODE;
+ entry->dest = destination;
}
+
+ entry->mask = 0; /* enable IRQ */
+ entry->trigger = trigger;
+ entry->polarity = polarity;
+ entry->vector = vector;
+
+ /* Mask level triggered irqs.
+ * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+ */
+ if (trigger)
+ entry->mask = 1;
+ return 0;
}
static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
irq, trigger, polarity);
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry,0,sizeof(entry));
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.dest = cpu_mask_to_apicid(mask);
- entry.mask = 0; /* enable IRQ */
- entry.trigger = trigger;
- entry.polarity = polarity;
- entry.vector = cfg->vector;
-
- /* Mask level triggered irqs.
- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
- */
- if (trigger)
- entry.mask = 1;
+ if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+ cpu_mask_to_apicid(mask), trigger, polarity,
+ cfg->vector)) {
+ printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
+ mp_ioapics[apic].mp_apicid, pin);
+ __clear_irq_vector(irq);
+ return;
+ }
ioapic_register_intr(irq, trigger);
if (irq < 16)
{
struct IO_APIC_route_entry entry;
+ if (intr_remapping_enabled)
+ return;
+
memset(&entry, 0, sizeof(entry));
/*
ioapic_write_entry(apic, pin, entry);
}
- void __apicdebuginit print_IO_APIC(void)
+
+ __apicdebuginit(void) print_IO_APIC(void)
{
int apic, i;
union IO_APIC_reg_00 reg_00;
return;
}
- #if 0
-
- static __apicdebuginit void print_APIC_bitfield (int base)
+ __apicdebuginit(void) print_APIC_bitfield(int base)
{
unsigned int v;
int i, j;
}
}
- void __apicdebuginit print_local_APIC(void * dummy)
+ __apicdebuginit(void) print_local_APIC(void *dummy)
{
unsigned int v, ver, maxlvt;
+ unsigned long icr;
if (apic_verbosity == APIC_QUIET)
return;
printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID);
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+ printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
v = apic_read(APIC_LVR);
printk(KERN_INFO "... APIC VERSION: %08x\n", v);
ver = GET_APIC_VERSION(v);
v = apic_read(APIC_ESR);
printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
- v = apic_read(APIC_ICR);
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
- v = apic_read(APIC_ICR2);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+ icr = apic_icr_read();
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
v = apic_read(APIC_LVTT);
printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
printk("\n");
}
- void print_all_local_APICs (void)
+ __apicdebuginit(void) print_all_local_APICs(void)
{
on_each_cpu(print_local_APIC, NULL, 1);
}
- void __apicdebuginit print_PIC(void)
+ __apicdebuginit(void) print_PIC(void)
{
unsigned int v;
unsigned long flags;
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}
- #endif /* 0 */
+ __apicdebuginit(int) print_all_ICs(void)
+ {
+ print_PIC();
+ print_all_local_APICs();
+ print_IO_APIC();
+
+ return 0;
+ }
+
+ fs_initcall(print_all_ICs);
+
void __init enable_IO_APIC(void)
{
entry.dest_mode = 0; /* Physical */
entry.delivery_mode = dest_ExtINT; /* ExtInt */
entry.vector = 0;
- entry.dest = GET_APIC_ID(read_apic_id());
+ entry.dest = read_apic_id();
/*
* Add it to the IO-APIC irq-routing table:
*/
#ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
+{
+ struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_desc *desc = irq_desc + irq;
+ cpumask_t tmp, cleanup_mask;
+ struct irte irte;
+ int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+ unsigned int dest;
+ unsigned long flags;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ if (get_irte(irq, &irte))
+ return;
+
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
+
+ if (modify_ioapic_rte) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __target_IO_APIC_irq(irq, dest, cfg->vector);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+
+ /*
+ * Modified the IRTE and flushes the Interrupt entry cache.
+ */
+ modify_irte(irq, &irte);
+
+ if (cfg->move_in_progress) {
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+
+ irq_desc[irq].affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+ int ret = -1;
+
+ mask_IO_APIC_irq(irq);
+
+ if (io_apic_level_ack_pending(irq)) {
+ /*
+ * Interrupt in progress. Migrating irq now will change the
+ * vector information in the IO-APIC RTE and that will confuse
+ * the EOI broadcast performed by cpu.
+ * So, delay the irq migration to the next instance.
+ */
+ schedule_delayed_work(&ir_migration_work, 1);
+ goto unmask;
+ }
+
+ /* everthing is clear. we have right of way */
+ migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+
+ ret = 0;
+ irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
+ cpus_clear(irq_desc[irq].pending_mask);
+
+unmask:
+ unmask_IO_APIC_irq(irq);
+ return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+ int irq;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ struct irq_desc *desc = irq_desc + irq;
+ if (desc->status & IRQ_MOVE_PENDING) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ if (!desc->chip->set_affinity ||
+ !(desc->status & IRQ_MOVE_PENDING)) {
+ desc->status &= ~IRQ_MOVE_PENDING;
+ spin_unlock_irqrestore(&desc->lock, flags);
+ continue;
+ }
+
+ desc->chip->set_affinity(irq,
+ irq_desc[irq].pending_mask);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+ }
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+ if (irq_desc[irq].status & IRQ_LEVEL) {
+ irq_desc[irq].status |= IRQ_MOVE_PENDING;
+ irq_desc[irq].pending_mask = mask;
+ migrate_irq_remapped_level(irq);
+ return;
+ }
+
+ migrate_ioapic_irq(irq, mask);
+}
+#endif
+
asmlinkage void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
#else
static inline void irq_complete_move(unsigned int irq) {}
#endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+ ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+ ack_x2APIC_irq();
+}
+#endif
static void ack_apic_edge(unsigned int irq)
{
.retrigger = ioapic_retrigger_irq,
};
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+ .name = "IR-IO-APIC",
+ .startup = startup_ioapic_irq,
+ .mask = mask_IO_APIC_irq,
+ .unmask = unmask_IO_APIC_irq,
+ .ack = ack_x2apic_edge,
+ .eoi = ack_x2apic_level,
+#ifdef CONFIG_SMP
+ .set_affinity = set_ir_ioapic_affinity_irq,
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+#endif
+
static inline void init_IO_APIC_traps(void)
{
int irq;
* 8259A.
*/
if (pin1 == -1) {
+ if (intr_remapping_enabled)
+ panic("BIOS bug: timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
clear_IO_APIC_pin(0, pin1);
goto out;
}
+ if (intr_remapping_enabled)
+ panic("timer doesn't work through Interrupt-remapped IO-APIC");
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
setup_IO_APIC_irqs();
init_IO_APIC_traps();
check_timer();
- if (!acpi_ioapic)
- print_IO_APIC();
}
struct sysfs_ioapic_data {
dynamic_irq_cleanup(irq);
+#ifdef CONFIG_INTR_REMAP
+ free_irte(irq);
+#endif
spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq);
spin_unlock_irqrestore(&vector_lock, flags);
tmp = TARGET_CPUS;
err = assign_irq_vector(irq, tmp);
- if (!err) {
- cpus_and(tmp, cfg->domain, tmp);
- dest = cpu_mask_to_apicid(tmp);
+ if (err)
+ return err;
+
+ cpus_and(tmp, cfg->domain, tmp);
+ dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+ if (irq_remapped(irq)) {
+ struct irte irte;
+ int ir_index;
+ u16 sub_handle;
+
+ ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+ BUG_ON(ir_index == -1);
+
+ memset (&irte, 0, sizeof(irte));
+
+ irte.present = 1;
+ irte.dst_mode = INT_DEST_MODE;
+ irte.trigger_mode = 0; /* edge */
+ irte.dlvry_mode = INT_DELIVERY_MODE;
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+
+ modify_irte(irq, &irte);
+ msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->data = sub_handle;
+ msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+ MSI_ADDR_IR_SHV |
+ MSI_ADDR_IR_INDEX1(ir_index) |
+ MSI_ADDR_IR_INDEX2(ir_index);
+ } else
+#endif
+ {
msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo =
MSI_ADDR_BASE_LO |
write_msi_msg(irq, &msg);
irq_desc[irq].affinity = mask;
}
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+ struct irq_cfg *cfg = irq_cfg + irq;
+ unsigned int dest;
+ cpumask_t tmp, cleanup_mask;
+ struct irte irte;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ if (get_irte(irq, &irte))
+ return;
+
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
+
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+
+ /*
+ * atomically update the IRTE with the new destination and vector.
+ */
+ modify_irte(irq, &irte);
+
+ /*
+ * After this point, all the interrupts will start arriving
+ * at the new destination. So, time to cleanup the previous
+ * vector allocation.
+ */
+ if (cfg->move_in_progress) {
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+
+ irq_desc[irq].affinity = mask;
+}
+#endif
#endif /* CONFIG_SMP */
/*
.retrigger = ioapic_retrigger_irq,
};
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+ .name = "IR-PCI-MSI",
+ .unmask = unmask_msi_irq,
+ .mask = mask_msi_irq,
+ .ack = ack_x2apic_edge,
+#ifdef CONFIG_SMP
+ .set_affinity = ir_set_msi_irq_affinity,
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+ struct intel_iommu *iommu;
+ int index;
+
+ iommu = map_dev_to_ir(dev);
+ if (!iommu) {
+ printk(KERN_ERR
+ "Unable to map PCI %s to iommu\n", pci_name(dev));
+ return -ENOENT;
+ }
+
+ index = alloc_irte(iommu, irq, nvec);
+ if (index < 0) {
+ printk(KERN_ERR
+ "Unable to allocate %d IRTE for PCI %s\n", nvec,
+ pci_name(dev));
+ return -ENOSPC;
+ }
+ return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
{
+ int ret;
struct msi_msg msg;
+
+ ret = msi_compose_msg(dev, irq, &msg);
+ if (ret < 0)
+ return ret;
+
+ set_irq_msi(irq, desc);
+ write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+ if (irq_remapped(irq)) {
+ struct irq_desc *desc = irq_desc + irq;
+ /*
+ * irq migration in process context
+ */
+ desc->status |= IRQ_MOVE_PCNTXT;
+ set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+ } else
+#endif
+ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+ return 0;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
int irq, ret;
+
irq = create_irq();
if (irq < 0)
return irq;
- ret = msi_compose_msg(dev, irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+ if (!intr_remapping_enabled)
+ goto no_ir;
+
+ ret = msi_alloc_irte(dev, irq, 1);
+ if (ret < 0)
+ goto error;
+no_ir:
+#endif
+ ret = setup_msi_irq(dev, desc, irq);
if (ret < 0) {
destroy_irq(irq);
return ret;
}
+ return 0;
- set_irq_msi(irq, desc);
- write_msi_msg(irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+error:
+ destroy_irq(irq);
+ return ret;
+#endif
+}
- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ int irq, ret, sub_handle;
+ struct msi_desc *desc;
+#ifdef CONFIG_INTR_REMAP
+ struct intel_iommu *iommu = 0;
+ int index = 0;
+#endif
+ sub_handle = 0;
+ list_for_each_entry(desc, &dev->msi_list, list) {
+ irq = create_irq();
+ if (irq < 0)
+ return irq;
+#ifdef CONFIG_INTR_REMAP
+ if (!intr_remapping_enabled)
+ goto no_ir;
+
+ if (!sub_handle) {
+ /*
+ * allocate the consecutive block of IRTE's
+ * for 'nvec'
+ */
+ index = msi_alloc_irte(dev, irq, nvec);
+ if (index < 0) {
+ ret = index;
+ goto error;
+ }
+ } else {
+ iommu = map_dev_to_ir(dev);
+ if (!iommu) {
+ ret = -ENOENT;
+ goto error;
+ }
+ /*
+ * setup the mapping between the irq and the IRTE
+ * base index, the sub_handle pointing to the
+ * appropriate interrupt remap table entry.
+ */
+ set_irte_irq(irq, iommu, index, sub_handle);
+ }
+no_ir:
+#endif
+ ret = setup_msi_irq(dev, desc, irq);
+ if (ret < 0)
+ goto error;
+ sub_handle++;
+ }
return 0;
+
+error:
+ destroy_irq(irq);
+ return ret;
}
void arch_teardown_msi_irq(unsigned int irq)
setup_IO_APIC_irq(ioapic, pin, irq,
irq_trigger(irq_entry),
irq_polarity(irq_entry));
+#ifdef CONFIG_INTR_REMAP
+ else if (intr_remapping_enabled)
+ set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
else
set_ioapic_affinity_irq(irq, TARGET_CPUS);
}