From: Rusty Russell Date: Wed, 31 Dec 2008 12:35:57 +0000 (+1030) Subject: Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 X-Git-Tag: v2.6.29-rc1~521^2~11^2~33 X-Git-Url: http://www.pilppa.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=2ca1a615835d9f4990f42102ab1f2ef434e7e89c;hp=-c;p=linux-2.6-omap-h63xx.git Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 Conflicts: arch/x86/kernel/io_apic.c --- 2ca1a615835d9f4990f42102ab1f2ef434e7e89c diff --combined arch/arm/kernel/smp.c index bd905c0a736,019237d2162..55fa7ff96a3 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@@ -33,6 -33,16 +33,6 @@@ #include #include -/* - * bitmask of present and online CPUs. - * The present bitmask indicates that the CPU is physically present. - * The online bitmask indicates that the CPU is up and running. - */ -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); -cpumask_t cpu_online_map; -EXPORT_SYMBOL(cpu_online_map); - /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@@ -171,7 -181,7 +171,7 @@@ int __cpuexit __cpu_disable(void /* * Stop the local timer for this CPU. */ - local_timer_stop(cpu); + local_timer_stop(); /* * Flush user cache and TLB mappings, and then remove this CPU @@@ -274,7 -284,7 +274,7 @@@ asmlinkage void __cpuinit secondary_sta /* * Setup local timer for this CPU. */ - local_timer_setup(cpu); + local_timer_setup(); calibrate_delay(); diff --combined arch/arm/mach-at91/at91rm9200_time.c index 72f51d39202,d140eae53de..1ff1bda0a89 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@@ -141,6 -141,15 +141,15 @@@ clkevt32k_next_event(unsigned long delt /* Use "raw" primitives so we behave correctly on RT kernels. */ raw_local_irq_save(flags); + /* + * According to Thomas Gleixner irqs are already disabled here. Simply + * removing raw_local_irq_save above (and the matching + * raw_local_irq_restore) was not accepted. See + * http://thread.gmane.org/gmane.linux.ports.arm.kernel/41174 + * So for now (2008-11-20) just warn once if irqs were not disabled ... + */ + WARN_ON_ONCE(!raw_irqs_disabled_flags(flags)); + /* The alarm IRQ uses absolute time (now+delta), not the relative * time (delta) in our calling convention. Like all clockevents * using such "match" hardware, we have a race to defend against. @@@ -169,6 -178,7 +178,6 @@@ static struct clock_event_device clkev .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 150, - .cpumask = CPU_MASK_CPU0, .set_next_event = clkevt32k_next_event, .set_mode = clkevt32k_mode, }; @@@ -196,7 -206,7 +205,7 @@@ void __init at91rm9200_timer_init(void clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift); clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt); clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1; - clkevt.cpumask = cpumask_of_cpu(0); + clkevt.cpumask = cpumask_of(0); clockevents_register_device(&clkevt); /* register clocksource */ diff --combined arch/arm/mach-pxa/time.c index bf3c9a4aad5,00162415851..95656a72268 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@@ -22,8 -22,8 +22,8 @@@ #include #include #include + #include #include - #include /* * This is PXA's sched_clock implementation. This has a resolution @@@ -122,6 -122,7 +122,6 @@@ static struct clock_event_device ckevt_ .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 200, - .cpumask = CPU_MASK_CPU0, .set_next_event = pxa_osmr0_set_next_event, .set_mode = pxa_osmr0_set_mode, }; @@@ -149,18 -150,11 +149,11 @@@ static struct irqaction pxa_ost0_irq = static void __init pxa_timer_init(void) { - unsigned long clock_tick_rate; + unsigned long clock_tick_rate = get_clock_tick_rate(); OIER = 0; OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3; - if (cpu_is_pxa25x()) - clock_tick_rate = 3686400; - else if (machine_is_mainstone()) - clock_tick_rate = 3249600; - else - clock_tick_rate = 3250000; - set_oscr2ns_scale(clock_tick_rate); ckevt_pxa_osmr0.mult = @@@ -169,7 -163,6 +162,7 @@@ clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0); ckevt_pxa_osmr0.min_delta_ns = clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1; + ckevt_pxa_osmr0.cpumask = cpumask_of(0); cksrc_pxa_oscr0.mult = clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift); diff --combined arch/arm/mach-realview/core.c index b07cb9b7adb,5f1d55963ce..bd2aa4f1614 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@@ -28,11 -28,14 +28,14 @@@ #include #include #include + #include + #include #include #include #include #include + #include #include #include @@@ -49,7 -52,7 +52,7 @@@ #define REALVIEW_REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET) - /* used by entry-macro.S */ + /* used by entry-macro.S and platsmp.c */ void __iomem *gic_cpu_base_addr; /* @@@ -124,6 -127,29 +127,29 @@@ int realview_flash_register(struct reso return platform_device_register(&realview_flash_device); } + static struct smc911x_platdata realview_smc911x_platdata = { + .flags = SMC911X_USE_32BIT, + .irq_flags = IRQF_SHARED, + .irq_polarity = 1, + }; + + static struct platform_device realview_eth_device = { + .name = "smc911x", + .id = 0, + .num_resources = 2, + }; + + int realview_eth_register(const char *name, struct resource *res) + { + if (name) + realview_eth_device.name = name; + realview_eth_device.resource = res; + if (strcmp(realview_eth_device.name, "smc911x") == 0) + realview_eth_device.dev.platform_data = &realview_smc911x_platdata; + + return platform_device_register(&realview_eth_device); + } + static struct resource realview_i2c_resource = { .start = REALVIEW_I2C_BASE, .end = REALVIEW_I2C_BASE + SZ_4K - 1, @@@ -177,9 -203,14 +203,14 @@@ static const struct icst307_params real static void realview_oscvco_set(struct clk *clk, struct icst307_vco vco) { void __iomem *sys_lock = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_LOCK_OFFSET; - void __iomem *sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC4_OFFSET; + void __iomem *sys_osc; u32 val; + if (machine_is_realview_pb1176()) + sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC0_OFFSET; + else + sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC4_OFFSET; + val = readl(sys_osc) & ~0x7ffff; val |= vco.v | (vco.r << 9) | (vco.s << 16); @@@ -188,12 -219,59 +219,59 @@@ writel(0, sys_lock); } - struct clk realview_clcd_clk = { - .name = "CLCDCLK", + static struct clk oscvco_clk = { .params = &realview_oscvco_params, .setvco = realview_oscvco_set, }; + /* + * These are fixed clocks. + */ + static struct clk ref24_clk = { + .rate = 24000000, + }; + + static struct clk_lookup lookups[] = { + { /* UART0 */ + .dev_id = "dev:f1", + .clk = &ref24_clk, + }, { /* UART1 */ + .dev_id = "dev:f2", + .clk = &ref24_clk, + }, { /* UART2 */ + .dev_id = "dev:f3", + .clk = &ref24_clk, + }, { /* UART3 */ + .dev_id = "fpga:09", + .clk = &ref24_clk, + }, { /* KMI0 */ + .dev_id = "fpga:06", + .clk = &ref24_clk, + }, { /* KMI1 */ + .dev_id = "fpga:07", + .clk = &ref24_clk, + }, { /* MMC0 */ + .dev_id = "fpga:05", + .clk = &ref24_clk, + }, { /* EB:CLCD */ + .dev_id = "dev:20", + .clk = &oscvco_clk, + }, { /* PB:CLCD */ + .dev_id = "issp:20", + .clk = &oscvco_clk, + } + }; + + static int __init clk_init(void) + { + int i; + + for (i = 0; i < ARRAY_SIZE(lookups); i++) + clkdev_add(&lookups[i]); + return 0; + } + arch_initcall(clk_init); + /* * CLCD support. */ @@@ -226,7 -304,30 +304,30 @@@ static struct clcd_panel vga = .width = -1, .height = -1, .tim2 = TIM2_BCD | TIM2_IPC, - .cntl = CNTL_LCDTFT | CNTL_LCDVCOMP(1), + .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), + .bpp = 16, + }; + + static struct clcd_panel xvga = { + .mode = { + .name = "XVGA", + .refresh = 60, + .xres = 1024, + .yres = 768, + .pixclock = 15748, + .left_margin = 152, + .right_margin = 48, + .upper_margin = 23, + .lower_margin = 3, + .hsync_len = 104, + .vsync_len = 4, + .sync = 0, + .vmode = FB_VMODE_NONINTERLACED, + }, + .width = -1, + .height = -1, + .tim2 = TIM2_BCD | TIM2_IPC, + .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), .bpp = 16, }; @@@ -249,7 -350,7 +350,7 @@@ static struct clcd_panel sanyo_3_8_in .width = -1, .height = -1, .tim2 = TIM2_BCD, - .cntl = CNTL_LCDTFT | CNTL_LCDVCOMP(1), + .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), .bpp = 16, }; @@@ -272,7 -373,7 +373,7 @@@ static struct clcd_panel sanyo_2_5_in .width = -1, .height = -1, .tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC, - .cntl = CNTL_LCDTFT | CNTL_LCDVCOMP(1), + .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), .bpp = 16, }; @@@ -295,7 -396,7 +396,7 @@@ static struct clcd_panel epson_2_2_in .width = -1, .height = -1, .tim2 = TIM2_BCD | TIM2_IPC, - .cntl = CNTL_LCDTFT | CNTL_LCDVCOMP(1), + .cntl = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1), .bpp = 16, }; @@@ -308,9 -409,15 +409,15 @@@ static struct clcd_panel *realview_clcd_panel(void) { void __iomem *sys_clcd = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_CLCD_OFFSET; - struct clcd_panel *panel = &vga; + struct clcd_panel *vga_panel; + struct clcd_panel *panel; u32 val; + if (machine_is_realview_eb()) + vga_panel = &vga; + else + vga_panel = &xvga; + val = readl(sys_clcd) & SYS_CLCD_ID_MASK; if (val == SYS_CLCD_ID_SANYO_3_8) panel = &sanyo_3_8_in; @@@ -319,11 -426,11 +426,11 @@@ else if (val == SYS_CLCD_ID_EPSON_2_2) panel = &epson_2_2_in; else if (val == SYS_CLCD_ID_VGA) - panel = &vga; + panel = vga_panel; else { printk(KERN_ERR "CLCD: unknown LCD panel ID 0x%08x, using VGA\n", val); - panel = &vga; + panel = vga_panel; } return panel; @@@ -358,12 -465,18 +465,18 @@@ static void realview_clcd_enable(struc writel(val, sys_clcd); } - static unsigned long framesize = SZ_1M; - static int realview_clcd_setup(struct clcd_fb *fb) { + unsigned long framesize; dma_addr_t dma; + if (machine_is_realview_eb()) + /* VGA, 16bpp */ + framesize = 640 * 480 * 2; + else + /* XVGA, 16bpp */ + framesize = 1024 * 768 * 2; + fb->panel = realview_clcd_panel(); fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize, @@@ -511,7 -624,7 +624,7 @@@ static struct clock_event_device timer0 .set_mode = timer_set_mode, .set_next_event = timer_set_next_event, .rating = 300, - .cpumask = CPU_MASK_ALL, + .cpumask = cpu_all_mask, }; static void __init realview_clockevents_init(unsigned int timer_irq) @@@ -588,7 -701,7 +701,7 @@@ void __init realview_timer_init(unsigne * The dummy clock device has to be registered before the main device * so that the latter will broadcast the clock events */ - local_timer_setup(smp_processor_id()); + local_timer_setup(); #endif /* diff --combined arch/arm/mach-realview/localtimer.c index 504961ef343,9019ef2e561..67d6d9cc68b --- a/arch/arm/mach-realview/localtimer.c +++ b/arch/arm/mach-realview/localtimer.c @@@ -38,18 -38,14 +38,14 @@@ void local_timer_interrupt(void #ifdef CONFIG_LOCAL_TIMERS - #define TWD_BASE(cpu) (twd_base_addr + (cpu) * twd_size) - /* set up by the platform code */ - void __iomem *twd_base_addr; - unsigned int twd_size; + void __iomem *twd_base; static unsigned long mpcore_timer_rate; static void local_timer_set_mode(enum clock_event_mode mode, struct clock_event_device *clk) { - void __iomem *base = TWD_BASE(smp_processor_id()); unsigned long ctrl; switch(mode) { @@@ -68,17 -64,16 +64,16 @@@ ctrl = 0; } - __raw_writel(ctrl, base + TWD_TIMER_CONTROL); + __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); } static int local_timer_set_next_event(unsigned long evt, struct clock_event_device *unused) { - void __iomem *base = TWD_BASE(smp_processor_id()); - unsigned long ctrl = __raw_readl(base + TWD_TIMER_CONTROL); + unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL); - __raw_writel(evt, base + TWD_TIMER_COUNTER); - __raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, base + TWD_TIMER_CONTROL); + __raw_writel(evt, twd_base + TWD_TIMER_COUNTER); + __raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, twd_base + TWD_TIMER_CONTROL); return 0; } @@@ -91,19 -86,16 +86,16 @@@ */ int local_timer_ack(void) { - void __iomem *base = TWD_BASE(smp_processor_id()); - - if (__raw_readl(base + TWD_TIMER_INTSTAT)) { - __raw_writel(1, base + TWD_TIMER_INTSTAT); + if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) { + __raw_writel(1, twd_base + TWD_TIMER_INTSTAT); return 1; } return 0; } - static void __cpuinit twd_calibrate_rate(unsigned int cpu) + static void __cpuinit twd_calibrate_rate(void) { - void __iomem *base = TWD_BASE(cpu); unsigned long load, count; u64 waitjiffies; @@@ -124,15 -116,15 +116,15 @@@ waitjiffies += 5; /* enable, no interrupt or reload */ - __raw_writel(0x1, base + TWD_TIMER_CONTROL); + __raw_writel(0x1, twd_base + TWD_TIMER_CONTROL); /* maximum value */ - __raw_writel(0xFFFFFFFFU, base + TWD_TIMER_COUNTER); + __raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); while (get_jiffies_64() < waitjiffies) udelay(10); - count = __raw_readl(base + TWD_TIMER_COUNTER); + count = __raw_readl(twd_base + TWD_TIMER_COUNTER); mpcore_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5); @@@ -142,18 -134,19 +134,19 @@@ load = mpcore_timer_rate / HZ; - __raw_writel(load, base + TWD_TIMER_LOAD); + __raw_writel(load, twd_base + TWD_TIMER_LOAD); } /* * Setup the local clock events for a CPU. */ - void __cpuinit local_timer_setup(unsigned int cpu) + void __cpuinit local_timer_setup(void) { + unsigned int cpu = smp_processor_id(); struct clock_event_device *clk = &per_cpu(local_clockevent, cpu); unsigned long flags; - twd_calibrate_rate(cpu); + twd_calibrate_rate(); clk->name = "local_timer"; clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; @@@ -161,7 -154,7 +154,7 @@@ clk->set_mode = local_timer_set_mode; clk->set_next_event = local_timer_set_next_event; clk->irq = IRQ_LOCALTIMER; - clk->cpumask = cpumask_of_cpu(cpu); + clk->cpumask = cpumask_of(cpu); clk->shift = 20; clk->mult = div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift); clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk); @@@ -178,9 -171,9 +171,9 @@@ /* * take a local timer down */ - void __cpuexit local_timer_stop(unsigned int cpu) + void __cpuexit local_timer_stop(void) { - __raw_writel(0, TWD_BASE(cpu) + TWD_TIMER_CONTROL); + __raw_writel(0, twd_base + TWD_TIMER_CONTROL); } #else /* CONFIG_LOCAL_TIMERS */ @@@ -190,8 -183,9 +183,9 @@@ static void dummy_timer_set_mode(enum c { } - void __cpuinit local_timer_setup(unsigned int cpu) + void __cpuinit local_timer_setup(void) { + unsigned int cpu = smp_processor_id(); struct clock_event_device *clk = &per_cpu(local_clockevent, cpu); clk->name = "dummy_timer"; @@@ -199,7 -193,7 +193,7 @@@ clk->rating = 200; clk->set_mode = dummy_timer_set_mode; clk->broadcast = smp_timer_broadcast; - clk->cpumask = cpumask_of_cpu(cpu); + clk->cpumask = cpumask_of(cpu); clockevents_register_device(clk); } diff --combined arch/arm/mach-sa1100/time.c index 1cac4ac0b4b,8c5e727f3b7..711c0295c66 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@@ -2,8 -2,8 +2,8 @@@ * linux/arch/arm/mach-sa1100/time.c * * Copyright (C) 1998 Deborah Wallach. - * Twiddles (C) 1999 Hugo Fiennes - * + * Twiddles (C) 1999 Hugo Fiennes + * * 2000/03/29 (C) Nicolas Pitre * Rewritten: big cleanup, much simpler, better HZ accuracy. * @@@ -73,6 -73,7 +73,6 @@@ static struct clock_event_device ckevt_ .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 200, - .cpumask = CPU_MASK_CPU0, .set_next_event = sa1100_osmr0_set_next_event, .set_mode = sa1100_osmr0_set_mode, }; @@@ -109,7 -110,6 +109,7 @@@ static void __init sa1100_timer_init(vo clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0); ckevt_sa1100_osmr0.min_delta_ns = clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1; + ckevt_sa1100_osmr0.cpumask = cpumask_of(0); cksrc_sa1100_oscr.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift); diff --combined arch/arm/mach-versatile/core.c index a3f1933434e,df25aa13850..1c43494f5c4 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@@ -31,6 -31,7 +31,7 @@@ #include #include + #include #include #include #include @@@ -373,22 -374,60 +374,60 @@@ static const struct icst307_params vers static void versatile_oscvco_set(struct clk *clk, struct icst307_vco vco) { - void __iomem *sys_lock = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_LOCK_OFFSET; - void __iomem *sys_osc = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_OSCCLCD_OFFSET; + void __iomem *sys = __io_address(VERSATILE_SYS_BASE); + void __iomem *sys_lock = sys + VERSATILE_SYS_LOCK_OFFSET; u32 val; - val = readl(sys_osc) & ~0x7ffff; + val = readl(sys + clk->oscoff) & ~0x7ffff; val |= vco.v | (vco.r << 9) | (vco.s << 16); writel(0xa05f, sys_lock); - writel(val, sys_osc); + writel(val, sys + clk->oscoff); writel(0, sys_lock); } - static struct clk versatile_clcd_clk = { - .name = "CLCDCLK", + static struct clk osc4_clk = { .params = &versatile_oscvco_params, - .setvco = versatile_oscvco_set, + .oscoff = VERSATILE_SYS_OSCCLCD_OFFSET, + .setvco = versatile_oscvco_set, + }; + + /* + * These are fixed clocks. + */ + static struct clk ref24_clk = { + .rate = 24000000, + }; + + static struct clk_lookup lookups[] __initdata = { + { /* UART0 */ + .dev_id = "dev:f1", + .clk = &ref24_clk, + }, { /* UART1 */ + .dev_id = "dev:f2", + .clk = &ref24_clk, + }, { /* UART2 */ + .dev_id = "dev:f3", + .clk = &ref24_clk, + }, { /* UART3 */ + .dev_id = "fpga:09", + .clk = &ref24_clk, + }, { /* KMI0 */ + .dev_id = "fpga:06", + .clk = &ref24_clk, + }, { /* KMI1 */ + .dev_id = "fpga:07", + .clk = &ref24_clk, + }, { /* MMC0 */ + .dev_id = "fpga:05", + .clk = &ref24_clk, + }, { /* MMC1 */ + .dev_id = "fpga:0b", + .clk = &ref24_clk, + }, { /* CLCD */ + .dev_id = "dev:20", + .clk = &osc4_clk, + } }; /* @@@ -786,7 -825,8 +825,8 @@@ void __init versatile_init(void { int i; - clk_register(&versatile_clcd_clk); + for (i = 0; i < ARRAY_SIZE(lookups); i++) + clkdev_add(&lookups[i]); platform_device_register(&versatile_flash_device); platform_device_register(&versatile_i2c_device); @@@ -965,7 -1005,7 +1005,7 @@@ static void __init versatile_timer_init timer0_clockevent.min_delta_ns = clockevent_delta2ns(0xf, &timer0_clockevent); - timer0_clockevent.cpumask = cpumask_of_cpu(0); + timer0_clockevent.cpumask = cpumask_of(0); clockevents_register_device(&timer0_clockevent); } diff --combined arch/sparc/kernel/irq_64.c index 4aaf18e83c8,a3ea2bcb95d..cab8e028687 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@@ -312,8 -312,7 +312,8 @@@ static void sun4u_irq_enable(unsigned i } } -static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask) +static void sun4u_set_affinity(unsigned int virt_irq, + const struct cpumask *mask) { sun4u_irq_enable(virt_irq); } @@@ -363,8 -362,7 +363,8 @@@ static void sun4v_irq_enable(unsigned i ino, err); } -static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask) +static void sun4v_set_affinity(unsigned int virt_irq, + const struct cpumask *mask) { unsigned int ino = virt_irq_table[virt_irq].dev_ino; unsigned long cpuid = irq_choose_cpu(virt_irq); @@@ -431,8 -429,7 +431,8 @@@ static void sun4v_virq_enable(unsigned dev_handle, dev_ino, err); } -static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask) +static void sun4v_virt_set_affinity(unsigned int virt_irq, + const struct cpumask *mask) { unsigned long cpuid, dev_handle, dev_ino; int err; @@@ -778,6 -775,69 +778,69 @@@ void do_softirq(void local_irq_restore(flags); } + static void unhandled_perf_irq(struct pt_regs *regs) + { + unsigned long pcr, pic; + + read_pcr(pcr); + read_pic(pic); + + write_pcr(0); + + printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n", + smp_processor_id()); + printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n", + smp_processor_id(), pcr, pic); + } + + /* Almost a direct copy of the powerpc PMC code. */ + static DEFINE_SPINLOCK(perf_irq_lock); + static void *perf_irq_owner_caller; /* mostly for debugging */ + static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq; + + /* Invoked from level 15 PIL handler in trap table. */ + void perfctr_irq(int irq, struct pt_regs *regs) + { + clear_softint(1 << irq); + perf_irq(regs); + } + + int register_perfctr_intr(void (*handler)(struct pt_regs *)) + { + int ret; + + if (!handler) + return -EINVAL; + + spin_lock(&perf_irq_lock); + if (perf_irq != unhandled_perf_irq) { + printk(KERN_WARNING "register_perfctr_intr: " + "perf IRQ busy (reserved by caller %p)\n", + perf_irq_owner_caller); + ret = -EBUSY; + goto out; + } + + perf_irq_owner_caller = __builtin_return_address(0); + perf_irq = handler; + + ret = 0; + out: + spin_unlock(&perf_irq_lock); + + return ret; + } + EXPORT_SYMBOL_GPL(register_perfctr_intr); + + void release_perfctr_intr(void (*handler)(struct pt_regs *)) + { + spin_lock(&perf_irq_lock); + perf_irq_owner_caller = NULL; + perf_irq = unhandled_perf_irq; + spin_unlock(&perf_irq_lock); + } + EXPORT_SYMBOL_GPL(release_perfctr_intr); + #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) { @@@ -791,7 -851,7 +854,7 @@@ !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, - irq_desc[irq].affinity); + &irq_desc[irq].affinity); } spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } diff --combined arch/sparc/kernel/of_device_64.c index 4f6098d318e,46e231f7c5c..4873f28905b --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@@ -778,9 -778,9 +778,9 @@@ static unsigned int __init build_one_de out: nid = of_node_to_nid(dp); if (nid != -1) { - cpumask_t numa_mask = node_to_cpumask(nid); + cpumask_t numa_mask = *cpumask_of_node(nid); - irq_set_affinity(irq, numa_mask); + irq_set_affinity(irq, &numa_mask); } return irq; @@@ -811,20 -811,20 +811,20 @@@ static struct of_device * __init scan_o irq = of_get_property(dp, "interrupts", &len); if (irq) { - memcpy(op->irqs, irq, len); op->num_irqs = len / 4; + + /* Prevent overrunning the op->irqs[] array. */ + if (op->num_irqs > PROMINTR_MAX) { + printk(KERN_WARNING "%s: Too many irqs (%d), " + "limiting to %d.\n", + dp->full_name, op->num_irqs, PROMINTR_MAX); + op->num_irqs = PROMINTR_MAX; + } + memcpy(op->irqs, irq, op->num_irqs * 4); } else { op->num_irqs = 0; } - /* Prevent overrunning the op->irqs[] array. */ - if (op->num_irqs > PROMINTR_MAX) { - printk(KERN_WARNING "%s: Too many irqs (%d), " - "limiting to %d.\n", - dp->full_name, op->num_irqs, PROMINTR_MAX); - op->num_irqs = PROMINTR_MAX; - } - build_device_resources(op, parent); for (i = 0; i < op->num_irqs; i++) op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]); diff --combined arch/sparc/kernel/pci_msi.c index 4ef282e8191,2e680f34f72..4ef282e8191 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@@ -286,9 -286,9 +286,9 @@@ static int bringup_one_msi_queue(struc nid = pbm->numa_node; if (nid != -1) { - cpumask_t numa_mask = node_to_cpumask(nid); + cpumask_t numa_mask = *cpumask_of_node(nid); - irq_set_affinity(irq, numa_mask); + irq_set_affinity(irq, &numa_mask); } err = request_irq(irq, sparc64_msiq_interrupt, 0, "MSIQ", diff --combined arch/sparc/kernel/smp_32.c index 1e5ac4e282e,e396c1f17a9..1e5ac4e282e --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@@ -39,6 -39,8 +39,6 @@@ volatile unsigned long cpu_callin_map[N unsigned char boot_cpu_id = 0; unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -cpumask_t phys_cpu_present_map = CPU_MASK_NONE; cpumask_t smp_commenced_mask = CPU_MASK_NONE; /* The only guaranteed locking primitive available on all Sparc @@@ -332,7 -334,7 +332,7 @@@ void __init smp_setup_cpu_possible_map( instance = 0; while (!cpu_find_by_instance(instance, NULL, &mid)) { if (mid < NR_CPUS) { - cpu_set(mid, phys_cpu_present_map); + cpu_set(mid, cpu_possible_map); cpu_set(mid, cpu_present_map); } instance++; @@@ -352,7 -354,7 +352,7 @@@ void __init smp_prepare_boot_cpu(void current_thread_info()->cpu = cpuid; cpu_set(cpuid, cpu_online_map); - cpu_set(cpuid, phys_cpu_present_map); + cpu_set(cpuid, cpu_possible_map); } int __cpuinit __cpu_up(unsigned int cpu) diff --combined arch/sparc/kernel/smp_64.c index a97b8822c22,bfe99d82d45..46329799f34 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@@ -49,10 -49,14 +49,10 @@@ int sparc64_multi_core __read_mostly; -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE; -cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; -EXPORT_SYMBOL(cpu_possible_map); -EXPORT_SYMBOL(cpu_online_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); @@@ -159,7 -163,7 +159,7 @@@ static inline long get_delta (long *rt for (i = 0; i < NUM_ITERS; i++) { t0 = tick_ops->get_tick(); go[MASTER] = 1; - membar_storeload(); + membar_safe("#StoreLoad"); while (!(tm = go[SLAVE])) rmb(); go[SLAVE] = 0; @@@ -253,7 -257,7 +253,7 @@@ static void smp_synchronize_one_tick(in /* now let the client proceed into his loop */ go[MASTER] = 0; - membar_storeload(); + membar_safe("#StoreLoad"); spin_lock_irqsave(&itc_sync_lock, flags); { @@@ -263,7 -267,7 +263,7 @@@ go[MASTER] = 0; wmb(); go[SLAVE] = tick_ops->get_tick(); - membar_storeload(); + membar_safe("#StoreLoad"); } } spin_unlock_irqrestore(&itc_sync_lock, flags); @@@ -769,7 -773,7 +769,7 @@@ static void xcall_deliver(u64 data0, u6 /* Setup the initial cpu list. */ cnt = 0; - for_each_cpu_mask_nr(i, *mask) { + for_each_cpu(i, mask) { if (i == this_cpu || !cpu_online(i)) continue; cpu_list[cnt++] = i; @@@ -1118,7 -1122,6 +1118,6 @@@ void smp_capture(void smp_processor_id()); #endif penguins_are_doing_time = 1; - membar_storestore_loadstore(); atomic_inc(&smp_capture_registry); smp_cross_call(&xcall_capture, 0, 0, 0); while (atomic_read(&smp_capture_registry) != ncpus) @@@ -1138,13 -1141,13 +1137,13 @@@ void smp_release(void smp_processor_id()); #endif penguins_are_doing_time = 0; - membar_storeload_storestore(); + membar_safe("#StoreLoad"); atomic_dec(&smp_capture_registry); } } - /* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they - * can service tlb flush xcalls... + /* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE + * set, so they can service tlb flush xcalls... */ extern void prom_world(int); @@@ -1157,7 -1160,7 +1156,7 @@@ void smp_penguin_jailcell(int irq, stru __asm__ __volatile__("flushw"); prom_world(1); atomic_inc(&smp_capture_registry); - membar_storeload_storestore(); + membar_safe("#StoreLoad"); while (penguins_are_doing_time) rmb(); atomic_dec(&smp_capture_registry); diff --combined arch/sparc/kernel/sparc_ksyms_32.c index 32d11a5fe3a,a4d45fc29b2..e1e97639231 --- a/arch/sparc/kernel/sparc_ksyms_32.c +++ b/arch/sparc/kernel/sparc_ksyms_32.c @@@ -61,7 -61,6 +61,6 @@@ extern void (*bzero_1page)(void *) extern void *__bzero(void *, size_t); extern void *__memscan_zero(void *, size_t); extern void *__memscan_generic(void *, int, size_t); - extern int __memcmp(const void *, const void *, __kernel_size_t); extern int __strncmp(const char *, const char *, __kernel_size_t); extern int __ashrdi3(int, int); @@@ -113,15 -112,17 +112,13 @@@ EXPORT_PER_CPU_SYMBOL(__cpu_data) #ifdef CONFIG_SMP /* IRQ implementation. */ EXPORT_SYMBOL(synchronize_irq); - -/* CPU online map and active count. */ -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(phys_cpu_present_map); #endif EXPORT_SYMBOL(__udelay); EXPORT_SYMBOL(__ndelay); EXPORT_SYMBOL(rtc_lock); - #ifdef CONFIG_SUN_AUXIO EXPORT_SYMBOL(set_auxio); EXPORT_SYMBOL(get_auxio); - #endif EXPORT_SYMBOL(io_remap_pfn_range); #ifndef CONFIG_SMP @@@ -209,7 -210,6 +206,6 @@@ EXPORT_SYMBOL(bzero_1page) EXPORT_SYMBOL(__bzero); EXPORT_SYMBOL(__memscan_zero); EXPORT_SYMBOL(__memscan_generic); - EXPORT_SYMBOL(__memcmp); EXPORT_SYMBOL(__strncmp); EXPORT_SYMBOL(__memmove); diff --combined arch/sparc/kernel/time_64.c index 9df8f095a8b,141da375909..9df8f095a8b --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@@ -763,7 -763,7 +763,7 @@@ void __devinit setup_sparc64_timer(void sevt = &__get_cpu_var(sparc64_events); memcpy(sevt, &sparc64_clockevent, sizeof(*sevt)); - sevt->cpumask = cpumask_of_cpu(smp_processor_id()); + sevt->cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(sevt); } diff --combined arch/x86/include/asm/pci.h index f8959c7a985,66834c41c04..a977de23cb4 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@@ -84,6 -84,8 +84,8 @@@ static inline void pci_dma_burst_advice static inline void early_quirks(void) { } #endif + extern void pci_iommu_alloc(void); + #endif /* __KERNEL__ */ #ifdef CONFIG_X86_32 @@@ -100,9 -102,9 +102,9 @@@ #ifdef CONFIG_NUMA /* Returns the node based on pci bus */ -static inline int __pcibus_to_node(struct pci_bus *bus) +static inline int __pcibus_to_node(const struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; + const struct pci_sysdata *sd = bus->sysdata; return sd->node; } @@@ -111,12 -113,6 +113,12 @@@ static inline cpumask_t __pcibus_to_cpu { return node_to_cpumask(__pcibus_to_node(bus)); } + +static inline const struct cpumask * +cpumask_of_pcibus(const struct pci_bus *bus) +{ + return cpumask_of_node(__pcibus_to_node(bus)); +} #endif #endif /* _ASM_X86_PCI_H */ diff --combined arch/x86/kernel/hpet.c index b5310ff1259,845ea097383..cd759ad9069 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@@ -248,7 -248,7 +248,7 @@@ static void hpet_legacy_clockevent_regi * Start hpet with the boot cpu mask and make it * global after the IO_APIC has been initialized. */ - hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); + hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(&hpet_clockevent); global_clock_event = &hpet_clockevent; printk(KERN_DEBUG "hpet clockevent registered\n"); @@@ -303,7 -303,7 +303,7 @@@ static void hpet_set_mode(enum clock_ev struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); hpet_setup_msi_irq(hdev->irq); disable_irq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); + irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } break; @@@ -451,7 -451,7 +451,7 @@@ static int hpet_setup_irq(struct hpet_d return -1; disable_irq(dev->irq); - irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); + irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); enable_irq(dev->irq); printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", @@@ -502,7 -502,7 +502,7 @@@ static void init_one_hpet_msi_clockeven /* 5 usec minimum reprogramming delta. */ evt->min_delta_ns = 5000; - evt->cpumask = cpumask_of_cpu(hdev->cpu); + evt->cpumask = cpumask_of(hdev->cpu); clockevents_register_device(evt); } @@@ -813,7 -813,7 +813,7 @@@ int __init hpet_enable(void out_nohpet: hpet_clear_mapping(); - boot_hpet_disable = 1; + hpet_address = 0; return 0; } @@@ -836,10 -836,11 +836,11 @@@ static __init int hpet_late_init(void hpet_address = force_hpet_address; hpet_enable(); - if (!hpet_virt_address) - return -ENODEV; } + if (!hpet_virt_address) + return -ENODEV; + hpet_reserve_platform_timers(hpet_readl(HPET_ID)); for_each_online_cpu(cpu) { diff --combined arch/x86/kernel/io_apic.c index 6dbf427175f,f6ea94b74da..e7745961ed3 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@@ -108,94 -108,253 +108,253 @@@ static int __init parse_noapic(char *st early_param("noapic", parse_noapic); struct irq_pin_list; + + /* + * This is performance-critical, we want to do it O(1) + * + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ + + struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; + }; + + static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) + { + struct irq_pin_list *pin; + int node; + + node = cpu_to_node(cpu); + + pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node); + + return pin; + } + struct irq_cfg { - unsigned int irq; struct irq_pin_list *irq_2_pin; cpumask_t domain; cpumask_t old_domain; unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; + #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + u8 move_desc_pending : 1; + #endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ + #ifdef CONFIG_SPARSE_IRQ + static struct irq_cfg irq_cfgx[] = { + #else static struct irq_cfg irq_cfgx[NR_IRQS] = { - [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, + #endif + [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; - #define for_each_irq_cfg(irq, cfg) \ - for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) + void __init arch_early_irq_init(void) + { + struct irq_cfg *cfg; + struct irq_desc *desc; + int count; + int i; + + cfg = irq_cfgx; + count = ARRAY_SIZE(irq_cfgx); + for (i = 0; i < count; i++) { + desc = irq_to_desc(i); + desc->chip_data = &cfg[i]; + } + } + + #ifdef CONFIG_SPARSE_IRQ static struct irq_cfg *irq_cfg(unsigned int irq) { - return irq < nr_irqs ? irq_cfgx + irq : NULL; + struct irq_cfg *cfg = NULL; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + if (desc) + cfg = desc->chip_data; + + return cfg; } - static struct irq_cfg *irq_cfg_alloc(unsigned int irq) + static struct irq_cfg *get_one_free_irq_cfg(int cpu) { - return irq_cfg(irq); + struct irq_cfg *cfg; + int node; + + node = cpu_to_node(cpu); + + cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); + + return cfg; } - /* - * Rough estimation of how many shared IRQs there are, can be changed - * anytime. - */ - #define MAX_PLUS_SHARED_IRQS NR_IRQS - #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) + void arch_init_chip_data(struct irq_desc *desc, int cpu) + { + struct irq_cfg *cfg; - /* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ + cfg = desc->chip_data; + if (!cfg) { + desc->chip_data = get_one_free_irq_cfg(cpu); + if (!desc->chip_data) { + printk(KERN_ERR "can not alloc irq_cfg\n"); + BUG_ON(1); + } + } + } - struct irq_pin_list { - int apic, pin; - struct irq_pin_list *next; - }; + #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + + static void + init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) + { + struct irq_pin_list *old_entry, *head, *tail, *entry; + + cfg->irq_2_pin = NULL; + old_entry = old_cfg->irq_2_pin; + if (!old_entry) + return; + + entry = get_one_free_irq_2_pin(cpu); + if (!entry) + return; + + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + head = entry; + tail = entry; + old_entry = old_entry->next; + while (old_entry) { + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + entry = head; + while (entry) { + head = entry->next; + kfree(entry); + entry = head; + } + /* still use the old one */ + return; + } + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + tail->next = entry; + tail = entry; + old_entry = old_entry->next; + } - static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; - static struct irq_pin_list *irq_2_pin_ptr; + tail->next = NULL; + cfg->irq_2_pin = head; + } - static void __init irq_2_pin_init(void) + static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) { - struct irq_pin_list *pin = irq_2_pin_head; - int i; + struct irq_pin_list *entry, *next; + + if (old_cfg->irq_2_pin == cfg->irq_2_pin) + return; - for (i = 1; i < PIN_MAP_SIZE; i++) - pin[i-1].next = &pin[i]; + entry = old_cfg->irq_2_pin; - irq_2_pin_ptr = &pin[0]; + while (entry) { + next = entry->next; + kfree(entry); + entry = next; + } + old_cfg->irq_2_pin = NULL; } - static struct irq_pin_list *get_one_free_irq_2_pin(void) + void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) { - struct irq_pin_list *pin = irq_2_pin_ptr; + struct irq_cfg *cfg; + struct irq_cfg *old_cfg; - if (!pin) - panic("can not get more irq_2_pin\n"); + cfg = get_one_free_irq_cfg(cpu); - irq_2_pin_ptr = pin->next; - pin->next = NULL; - return pin; + if (!cfg) + return; + + desc->chip_data = cfg; + + old_cfg = old_desc->chip_data; + + memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + + init_copy_irq_2_pin(old_cfg, cfg, cpu); + } + + static void free_irq_cfg(struct irq_cfg *old_cfg) + { + kfree(old_cfg); + } + + void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) + { + struct irq_cfg *old_cfg, *cfg; + + old_cfg = old_desc->chip_data; + cfg = desc->chip_data; + + if (old_cfg == cfg) + return; + + if (old_cfg) { + free_irq_2_pin(old_cfg, cfg); + free_irq_cfg(old_cfg); + old_desc->chip_data = NULL; + } } + static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) + { + struct irq_cfg *cfg = desc->chip_data; + + if (!cfg->move_in_progress) { + /* it means that domain is not changed */ + if (!cpus_intersects(desc->affinity, mask)) + cfg->move_desc_pending = 1; + } + } + #endif + + #else + static struct irq_cfg *irq_cfg(unsigned int irq) + { + return irq < nr_irqs ? irq_cfgx + irq : NULL; + } + + #endif + + #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC + static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) + { + } + #endif + struct io_apic { unsigned int index; unsigned int unused[3]; @@@ -237,11 -396,10 +396,10 @@@ static inline void io_apic_modify(unsig writel(value, &io_apic->data); } - static bool io_apic_level_ack_pending(unsigned int irq) + static bool io_apic_level_ack_pending(struct irq_cfg *cfg) { struct irq_pin_list *entry; unsigned long flags; - struct irq_cfg *cfg = irq_cfg(irq); spin_lock_irqsave(&ioapic_lock, flags); entry = cfg->irq_2_pin; @@@ -323,13 -481,12 +481,12 @@@ static void ioapic_mask_entry(int apic } #ifdef CONFIG_SMP - static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) + static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) { int apic, pin; - struct irq_cfg *cfg; struct irq_pin_list *entry; + u8 vector = cfg->vector; - cfg = irq_cfg(irq); entry = cfg->irq_2_pin; for (;;) { unsigned int reg; @@@ -359,37 -516,48 +516,49 @@@ } } - static int assign_irq_vector(int irq, cpumask_t mask); + static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); - static void set_ioapic_affinity_irq(unsigned int irq, - const struct cpumask *mask) + static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; unsigned long flags; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; + unsigned int irq; - if (!cpumask_intersects(mask, cpu_online_mask)) + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) return; - cfg = irq_cfg(irq); - if (assign_irq_vector(irq, *mask)) + irq = desc->irq; + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cpumask_and(&tmp, &cfg->domain, mask); + set_extra_move_desc(desc, mask); + + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); /* * Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); - desc = irq_to_desc(irq); spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); - cpumask_copy(&desc->affinity, mask); + __target_IO_APIC_irq(irq, dest, cfg); + desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } + -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) ++static void set_ioapic_affinity_irq(unsigned int irq, ++ const struct cpumask *mask) + { + struct irq_desc *desc; + + desc = irq_to_desc(irq); + - set_ioapic_affinity_irq_desc(desc, mask); ++ set_ioapic_affinity_irq_desc(desc, *mask); + } #endif /* CONFIG_SMP */ /* @@@ -397,16 -565,18 +566,18 @@@ * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ - static void add_pin_to_irq(unsigned int irq, int apic, int pin) + static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) { - struct irq_cfg *cfg; struct irq_pin_list *entry; - /* first time to refer irq_cfg, so with new */ - cfg = irq_cfg_alloc(irq); entry = cfg->irq_2_pin; if (!entry) { - entry = get_one_free_irq_2_pin(); + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", + apic, pin); + return; + } cfg->irq_2_pin = entry; entry->apic = apic; entry->pin = pin; @@@ -421,7 -591,7 +592,7 @@@ entry = entry->next; } - entry->next = get_one_free_irq_2_pin(); + entry->next = get_one_free_irq_2_pin(cpu); entry = entry->next; entry->apic = apic; entry->pin = pin; @@@ -430,11 -600,10 +601,10 @@@ /* * Reroute an IRQ to a different pin. */ - static void __init replace_pin_at_irq(unsigned int irq, + static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, int oldapic, int oldpin, int newapic, int newpin) { - struct irq_cfg *cfg = irq_cfg(irq); struct irq_pin_list *entry = cfg->irq_2_pin; int replaced = 0; @@@ -451,18 -620,16 +621,16 @@@ /* why? call replace before add? */ if (!replaced) - add_pin_to_irq(irq, newapic, newpin); + add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); } - static inline void io_apic_modify_irq(unsigned int irq, + static inline void io_apic_modify_irq(struct irq_cfg *cfg, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { int pin; - struct irq_cfg *cfg; struct irq_pin_list *entry; - cfg = irq_cfg(irq); for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { unsigned int reg; pin = entry->pin; @@@ -475,9 -642,9 +643,9 @@@ } } - static void __unmask_IO_APIC_irq(unsigned int irq) + static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); } #ifdef CONFIG_X86_64 @@@ -492,47 -659,64 +660,64 @@@ void io_apic_sync(struct irq_pin_list * readl(&io_apic->data); } - static void __mask_IO_APIC_irq(unsigned int irq) + static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } #else /* CONFIG_X86_32 */ - static void __mask_IO_APIC_irq(unsigned int irq) + static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); + io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); } - static void __mask_and_edge_IO_APIC_irq(unsigned int irq) + static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, IO_APIC_REDIR_MASKED, NULL); } - static void __unmask_and_level_IO_APIC_irq(unsigned int irq) + static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } #endif /* CONFIG_X86_32 */ - static void mask_IO_APIC_irq (unsigned int irq) + static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { + struct irq_cfg *cfg = desc->chip_data; unsigned long flags; + BUG_ON(!cfg); + spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); + __mask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } - static void unmask_IO_APIC_irq (unsigned int irq) + static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) { + struct irq_cfg *cfg = desc->chip_data; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); + __unmask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } + static void mask_IO_APIC_irq(unsigned int irq) + { + struct irq_desc *desc = irq_to_desc(irq); + + mask_IO_APIC_irq_desc(desc); + } + static void unmask_IO_APIC_irq(unsigned int irq) + { + struct irq_desc *desc = irq_to_desc(irq); + + unmask_IO_APIC_irq_desc(desc); + } + static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@@ -809,7 -993,7 +994,7 @@@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vecto */ static int EISA_ELCR(unsigned int irq) { - if (irq < 16) { + if (irq < NR_IRQS_LEGACY) { unsigned int port = 0x4d0 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } @@@ -1034,7 -1218,7 +1219,7 @@@ void unlock_vector_lock(void spin_unlock(&vector_lock); } - static int __assign_irq_vector(int irq, cpumask_t mask) + static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { /* * NOTE! The local APIC isn't very good at handling @@@ -1050,16 -1234,13 +1235,13 @@@ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; int cpu; - struct irq_cfg *cfg; - cfg = irq_cfg(irq); + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; /* Only try and allocate irqs on cpus that are present */ cpus_and(mask, mask, cpu_online_map); - if ((cfg->move_in_progress) || cfg->move_cleanup_count) - return -EBUSY; - old_vector = cfg->vector; if (old_vector) { cpumask_t tmp; @@@ -1113,24 -1294,22 +1295,22 @@@ next return -ENOSPC; } - static int assign_irq_vector(int irq, cpumask_t mask) + static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { int err; unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, mask); + err = __assign_irq_vector(irq, cfg, mask); spin_unlock_irqrestore(&vector_lock, flags); return err; } - static void __clear_irq_vector(int irq) + static void __clear_irq_vector(int irq, struct irq_cfg *cfg) { - struct irq_cfg *cfg; cpumask_t mask; int cpu, vector; - cfg = irq_cfg(irq); BUG_ON(!cfg->vector); vector = cfg->vector; @@@ -1162,9 -1341,13 +1342,13 @@@ void __setup_vector_irq(int cpu /* This function must be called with vector_lock held */ int irq, vector; struct irq_cfg *cfg; + struct irq_desc *desc; /* Mark the inuse vectors */ - for_each_irq_cfg(irq, cfg) { + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + cfg = desc->chip_data; if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; @@@ -1215,11 -1398,8 +1399,8 @@@ static inline int IO_APIC_irq_trigger(i } #endif - static void ioapic_register_intr(int irq, unsigned long trigger) + static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) @@@ -1311,7 -1491,7 +1492,7 @@@ static int setup_ioapic_entry(int apic return 0; } - static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, + static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, int trigger, int polarity) { struct irq_cfg *cfg; @@@ -1321,10 -1501,10 +1502,10 @@@ if (!IO_APIC_IRQ(irq)) return; - cfg = irq_cfg(irq); + cfg = desc->chip_data; mask = TARGET_CPUS; - if (assign_irq_vector(irq, mask)) + if (assign_irq_vector(irq, cfg, mask)) return; cpus_and(mask, cfg->domain, mask); @@@ -1341,12 -1521,12 +1522,12 @@@ cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic].mp_apicid, pin); - __clear_irq_vector(irq); + __clear_irq_vector(irq, cfg); return; } - ioapic_register_intr(irq, trigger); - if (irq < 16) + ioapic_register_intr(irq, desc, trigger); + if (irq < NR_IRQS_LEGACY) disable_8259A_irq(irq); ioapic_write_entry(apic, pin, entry); @@@ -1356,6 -1536,9 +1537,9 @@@ static void __init setup_IO_APIC_irqs(v { int apic, pin, idx, irq; int notcon = 0; + struct irq_desc *desc; + struct irq_cfg *cfg; + int cpu = boot_cpu_id; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); @@@ -1387,9 -1570,15 +1571,15 @@@ if (multi_timer_check(apic, irq)) continue; #endif - add_pin_to_irq(irq, apic, pin); + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + continue; + } + cfg = desc->chip_data; + add_pin_to_irq_cpu(cfg, cpu, apic, pin); - setup_IO_APIC_irq(apic, pin, irq, + setup_IO_APIC_irq(apic, pin, irq, desc, irq_trigger(idx), irq_polarity(idx)); } } @@@ -1448,6 -1637,7 +1638,7 @@@ __apicdebuginit(void) print_IO_APIC(voi union IO_APIC_reg_03 reg_03; unsigned long flags; struct irq_cfg *cfg; + struct irq_desc *desc; unsigned int irq; if (apic_verbosity == APIC_QUIET) @@@ -1537,8 -1727,13 +1728,13 @@@ } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_cfg(irq, cfg) { - struct irq_pin_list *entry = cfg->irq_2_pin; + for_each_irq_desc(irq, desc) { + struct irq_pin_list *entry; + + if (!desc) + continue; + cfg = desc->chip_data; + entry = cfg->irq_2_pin; if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", irq); @@@ -2022,14 -2217,16 +2218,16 @@@ static unsigned int startup_ioapic_irq( { int was_pending = 0; unsigned long flags; + struct irq_cfg *cfg; spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { + if (irq < NR_IRQS_LEGACY) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) was_pending = 1; } - __unmask_IO_APIC_irq(irq); + cfg = irq_cfg(irq); + __unmask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; @@@ -2092,35 -2289,37 +2290,37 @@@ static DECLARE_DELAYED_WORK(ir_migratio * as simple as edge triggered migration and we can do the irq migration * with a simple atomic update to IO-APIC RTE. */ - static void migrate_ioapic_irq(int irq, cpumask_t mask) + static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; - struct irq_desc *desc; cpumask_t tmp, cleanup_mask; struct irte irte; int modify_ioapic_rte; unsigned int dest; unsigned long flags; + unsigned int irq; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; + irq = desc->irq; if (get_irte(irq, &irte)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - desc = irq_to_desc(irq); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); + __target_IO_APIC_irq(irq, dest, cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } @@@ -2142,14 -2341,14 +2342,14 @@@ desc->affinity = mask; } - static int migrate_irq_remapped_level(int irq) + static int migrate_irq_remapped_level_desc(struct irq_desc *desc) { int ret = -1; - struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; - mask_IO_APIC_irq(irq); + mask_IO_APIC_irq_desc(desc); - if (io_apic_level_ack_pending(irq)) { + if (io_apic_level_ack_pending(cfg)) { /* * Interrupt in progress. Migrating irq now will change the * vector information in the IO-APIC RTE and that will confuse @@@ -2161,14 -2360,15 +2361,15 @@@ } /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; cpus_clear(desc->pending_mask); unmask: - unmask_IO_APIC_irq(irq); + unmask_IO_APIC_irq_desc(desc); + return ret; } @@@ -2178,6 -2378,9 +2379,9 @@@ static void ir_irq_migration(struct wor struct irq_desc *desc; for_each_irq_desc(irq, desc) { + if (!desc) + continue; + if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@@ -2189,7 -2392,7 +2393,7 @@@ continue; } - desc->chip->set_affinity(irq, desc->pending_mask); + desc->chip->set_affinity(irq, &desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@@ -2198,19 -2401,22 +2402,23 @@@ /* * Migrates the IRQ destination in the process context. */ - static void set_ir_ioapic_affinity_irq(unsigned int irq, - const struct cpumask *mask) + static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { - struct irq_desc *desc = irq_to_desc(irq); - if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, mask); - migrate_irq_remapped_level(irq); + desc->pending_mask = mask; + migrate_irq_remapped_level_desc(desc); return; } - migrate_ioapic_irq(irq, *mask); + migrate_ioapic_irq_desc(desc, mask); + } -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) ++static void set_ir_ioapic_affinity_irq(unsigned int irq, ++ const struct cpumask *mask) + { + struct irq_desc *desc = irq_to_desc(irq); + - set_ir_ioapic_affinity_irq_desc(desc, mask); ++ set_ir_ioapic_affinity_irq_desc(desc, *mask); } #endif @@@ -2229,6 -2435,9 +2437,9 @@@ asmlinkage void smp_irq_move_cleanup_in struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; + if (irq == -1) + continue; + desc = irq_to_desc(irq); if (!desc) continue; @@@ -2250,19 -2459,40 +2461,40 @@@ unlock irq_exit(); } - static void irq_complete_move(unsigned int irq) + static void irq_complete_move(struct irq_desc **descp) { - struct irq_cfg *cfg = irq_cfg(irq); + struct irq_desc *desc = *descp; + struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) + if (likely(!cfg->move_in_progress)) { + #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + if (likely(!cfg->move_desc_pending)) + return; + + /* domain has not changed, but affinity did */ + me = smp_processor_id(); + if (cpu_isset(me, desc->affinity)) { + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; + cfg->move_desc_pending = 0; + } + #endif return; + } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { cpumask_t cleanup_mask; + #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; + #endif + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); cfg->move_cleanup_count = cpus_weight(cleanup_mask); send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); @@@ -2270,8 -2500,9 +2502,9 @@@ } } #else - static inline void irq_complete_move(unsigned int irq) {} + static inline void irq_complete_move(struct irq_desc **descp) {} #endif + #ifdef CONFIG_INTR_REMAP static void ack_x2apic_level(unsigned int irq) { @@@ -2282,11 -2513,14 +2515,14 @@@ static void ack_x2apic_edge(unsigned in { ack_x2APIC_irq(); } + #endif static void ack_apic_edge(unsigned int irq) { - irq_complete_move(irq); + struct irq_desc *desc = irq_to_desc(irq); + + irq_complete_move(&desc); move_native_irq(irq); ack_APIC_irq(); } @@@ -2295,18 -2529,21 +2531,21 @@@ atomic_t irq_mis_count static void ack_apic_level(unsigned int irq) { + struct irq_desc *desc = irq_to_desc(irq); + #ifdef CONFIG_X86_32 unsigned long v; int i; #endif + struct irq_cfg *cfg; int do_unmask_irq = 0; - irq_complete_move(irq); + irq_complete_move(&desc); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { + if (unlikely(desc->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; - mask_IO_APIC_irq(irq); + mask_IO_APIC_irq_desc(desc); } #endif @@@ -2330,7 -2567,8 +2569,8 @@@ * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro */ - i = irq_cfg(irq)->vector; + cfg = desc->chip_data; + i = cfg->vector; v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); #endif @@@ -2369,17 -2607,18 +2609,18 @@@ * accurate and is causing problems then it is a hardware bug * and you can go talk to the chipset vendor about it. */ - if (!io_apic_level_ack_pending(irq)) + cfg = desc->chip_data; + if (!io_apic_level_ack_pending(cfg)) move_masked_irq(irq); - unmask_IO_APIC_irq(irq); + unmask_IO_APIC_irq_desc(desc); } #ifdef CONFIG_X86_32 if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + __mask_and_edge_IO_APIC_irq(cfg); + __unmask_and_level_IO_APIC_irq(cfg); spin_unlock(&ioapic_lock); } #endif @@@ -2430,20 -2669,22 +2671,22 @@@ static inline void init_IO_APIC_traps(v * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for_each_irq_cfg(irq, cfg) { - if (IO_APIC_IRQ(irq) && !cfg->vector) { + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + + cfg = desc->chip_data; + if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (irq < 16) + if (irq < NR_IRQS_LEGACY) make_8259A_irq(irq); - else { - desc = irq_to_desc(irq); + else /* Strange. Oh, well.. */ desc->chip = &no_irq_chip; - } } } } @@@ -2468,7 -2709,7 +2711,7 @@@ static void unmask_lapic_irq(unsigned i apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } - static void ack_lapic_irq (unsigned int irq) + static void ack_lapic_irq(unsigned int irq) { ack_APIC_irq(); } @@@ -2480,11 -2721,8 +2723,8 @@@ static struct irq_chip lapic_chip __rea .ack = ack_lapic_irq, }; - static void lapic_register_intr(int irq) + static void lapic_register_intr(int irq, struct irq_desc *desc) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); @@@ -2588,7 -2826,9 +2828,9 @@@ int timer_through_8259 __initdata */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_cfg(0); + struct irq_desc *desc = irq_to_desc(0); + struct irq_cfg *cfg = desc->chip_data; + int cpu = boot_cpu_id; int apic1, pin1, apic2, pin2; unsigned long flags; unsigned int ver; @@@ -2603,7 -2843,7 +2845,7 @@@ * get/set the timer IRQ vector: */ disable_8259A_irq(0); - assign_irq_vector(0, TARGET_CPUS); + assign_irq_vector(0, cfg, TARGET_CPUS); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@@ -2654,10 -2894,10 +2896,10 @@@ * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq(0, apic1, pin1); + add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } - unmask_IO_APIC_irq(0); + unmask_IO_APIC_irq_desc(desc); if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@@ -2683,9 -2923,9 +2925,9 @@@ /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); + replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq(0); + unmask_IO_APIC_irq_desc(desc); enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@@ -2717,7 -2957,7 +2959,7 @@@ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); - lapic_register_intr(0); + lapic_register_intr(0, desc); apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); @@@ -2902,22 -3142,26 +3144,26 @@@ unsigned int create_irq_nr(unsigned in unsigned int irq; unsigned int new; unsigned long flags; - struct irq_cfg *cfg_new; - - irq_want = nr_irqs - 1; + struct irq_cfg *cfg_new = NULL; + int cpu = boot_cpu_id; + struct irq_desc *desc_new = NULL; irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new > 0; new--) { + for (new = irq_want; new < NR_IRQS; new++) { if (platform_legacy_irq(new)) continue; - cfg_new = irq_cfg(new); - if (cfg_new && cfg_new->vector != 0) + + desc_new = irq_to_desc_alloc_cpu(new, cpu); + if (!desc_new) { + printk(KERN_INFO "can not get irq_desc for %d\n", new); continue; - /* check if need to create one */ - if (!cfg_new) - cfg_new = irq_cfg_alloc(new); - if (__assign_irq_vector(new, TARGET_CPUS) == 0) + } + cfg_new = desc_new->chip_data; + + if (cfg_new->vector != 0) + continue; + if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) irq = new; break; } @@@ -2925,15 -3169,21 +3171,21 @@@ if (irq > 0) { dynamic_irq_init(irq); + /* restore it, in case dynamic_irq_init clear it */ + if (desc_new) + desc_new->chip_data = cfg_new; } return irq; } + static int nr_irqs_gsi = NR_IRQS_LEGACY; int create_irq(void) { + unsigned int irq_want; int irq; - irq = create_irq_nr(nr_irqs - 1); + irq_want = nr_irqs_gsi; + irq = create_irq_nr(irq_want); if (irq == 0) irq = -1; @@@ -2944,14 -3194,22 +3196,22 @@@ void destroy_irq(unsigned int irq) { unsigned long flags; + struct irq_cfg *cfg; + struct irq_desc *desc; + /* store it, in case dynamic_irq_cleanup clear it */ + desc = irq_to_desc(irq); + cfg = desc->chip_data; dynamic_irq_cleanup(irq); + /* connect back irq_cfg */ + if (desc) + desc->chip_data = cfg; #ifdef CONFIG_INTR_REMAP free_irte(irq); #endif spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); + __clear_irq_vector(irq, cfg); spin_unlock_irqrestore(&vector_lock, flags); } @@@ -2966,12 -3224,12 +3226,12 @@@ static int msi_compose_msg(struct pci_d unsigned dest; cpumask_t tmp; + cfg = irq_cfg(irq); tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); + err = assign_irq_vector(irq, cfg, tmp); if (err) return err; - cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@@ -3027,61 -3285,64 +3287,63 @@@ } #ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + if (!cpumask_intersects(mask, cpu_online_mask)) return; - if (assign_irq_vector(irq, *mask)) + cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) ++ if (assign_irq_vector(irq, cfg, *mask)) return; - cfg = irq_cfg(irq); - set_extra_move_desc(desc, mask); ++ set_extra_move_desc(desc, *mask); + - cpus_and(tmp, cfg->domain, mask); + cpumask_and(&tmp, &cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - read_msi_msg(irq, &msg); + read_msi_msg_desc(desc, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); - write_msi_msg(irq, &msg); - desc = irq_to_desc(irq); + write_msi_msg_desc(desc, &msg); - desc->affinity = mask; + cpumask_copy(&desc->affinity, mask); } - #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +static void ir_set_msi_irq_affinity(unsigned int irq, + const struct cpumask *mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp, cleanup_mask; struct irte irte; - struct irq_desc *desc; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + if (!cpumask_intersects(mask, cpu_online_mask)) return; if (get_irte(irq, &irte)) return; - if (assign_irq_vector(irq, *mask)) + cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) ++ if (assign_irq_vector(irq, cfg, *mask)) return; - cfg = irq_cfg(irq); - set_extra_move_desc(desc, mask); ++ set_extra_move_desc(desc, *mask); + - cpus_and(tmp, cfg->domain, mask); + cpumask_and(&tmp, &cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); irte.vector = cfg->vector; @@@ -3104,9 -3365,9 +3366,9 @@@ cfg->move_in_progress = 0; } - desc = irq_to_desc(irq); - desc->affinity = mask; + cpumask_copy(&desc->affinity, mask); } + #endif #endif /* CONFIG_SMP */ @@@ -3165,7 -3426,7 +3427,7 @@@ static int msi_alloc_irte(struct pci_de } #endif - static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) + static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { int ret; struct msi_msg msg; @@@ -3174,7 -3435,7 +3436,7 @@@ if (ret < 0) return ret; - set_irq_msi(irq, desc); + set_irq_msi(irq, msidesc); write_msi_msg(irq, &msg); #ifdef CONFIG_INTR_REMAP @@@ -3194,26 -3455,13 +3456,13 @@@ return 0; } - static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) - { - unsigned int irq; - - irq = dev->bus->number; - irq <<= 8; - irq |= dev->devfn; - irq <<= 12; - - return irq; - } - - int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) + int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) { unsigned int irq; int ret; unsigned int irq_want; - irq_want = build_irq_for_pci_dev(dev) + 0x100; - + irq_want = nr_irqs_gsi; irq = create_irq_nr(irq_want); if (irq == 0) return -1; @@@ -3227,7 -3475,7 +3476,7 @@@ goto error; no_ir: #endif - ret = setup_msi_irq(dev, desc, irq); + ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) { destroy_irq(irq); return ret; @@@ -3245,7 -3493,7 +3494,7 @@@ int arch_setup_msi_irqs(struct pci_dev { unsigned int irq; int ret, sub_handle; - struct msi_desc *desc; + struct msi_desc *msidesc; unsigned int irq_want; #ifdef CONFIG_INTR_REMAP @@@ -3253,10 -3501,11 +3502,11 @@@ int index = 0; #endif - irq_want = build_irq_for_pci_dev(dev) + 0x100; + irq_want = nr_irqs_gsi; sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want--); + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want); + irq_want++; if (irq == 0) return -1; #ifdef CONFIG_INTR_REMAP @@@ -3288,7 -3537,7 +3538,7 @@@ } no_ir: #endif - ret = setup_msi_irq(dev, desc, irq); + ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) goto error; sub_handle++; @@@ -3307,22 -3556,25 +3557,24 @@@ void arch_teardown_msi_irq(unsigned in #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + if (!cpumask_intersects(mask, cpu_online_mask)) return; - if (assign_irq_vector(irq, *mask)) + cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) ++ if (assign_irq_vector(irq, cfg, *mask)) return; - cfg = irq_cfg(irq); - set_extra_move_desc(desc, mask); ++ set_extra_move_desc(desc, *mask); + - cpus_and(tmp, cfg->domain, mask); + cpumask_and(&tmp, &cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); dmar_msi_read(irq, &msg); @@@ -3333,9 -3585,9 +3585,9 @@@ msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; + cpumask_copy(&desc->affinity, mask); } + #endif /* CONFIG_SMP */ struct irq_chip dmar_msi_type = { @@@ -3367,22 -3619,25 +3619,24 @@@ int arch_setup_dmar_msi(unsigned int ir #ifdef CONFIG_HPET_TIMER #ifdef CONFIG_SMP -static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) +static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; - struct irq_desc *desc; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + if (!cpumask_intersects(mask, cpu_online_mask)) return; - if (assign_irq_vector(irq, *mask)) + cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) ++ if (assign_irq_vector(irq, cfg, *mask)) return; - cfg = irq_cfg(irq); - set_extra_move_desc(desc, mask); ++ set_extra_move_desc(desc, *mask); + - cpus_and(tmp, cfg->domain, mask); + cpumask_and(&tmp, &cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); hpet_msi_read(irq, &msg); @@@ -3393,9 -3648,9 +3647,9 @@@ msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; + cpumask_copy(&desc->affinity, mask); } + #endif /* CONFIG_SMP */ struct irq_chip hpet_msi_type = { @@@ -3448,27 -3703,30 +3702,29 @@@ static void target_ht_irq(unsigned int write_ht_irq_msg(irq, &msg); } -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) +static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + if (!cpumask_intersects(mask, cpu_online_mask)) return; - if (assign_irq_vector(irq, *mask)) + cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) ++ if (assign_irq_vector(irq, cfg, *mask)) return; - cfg = irq_cfg(irq); - set_extra_move_desc(desc, mask); ++ set_extra_move_desc(desc, *mask); + - cpus_and(tmp, cfg->domain, mask); + cpumask_and(&tmp, &cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - desc = irq_to_desc(irq); - desc->affinity = mask; + cpumask_copy(&desc->affinity, mask); } + #endif static struct irq_chip ht_irq_chip = { @@@ -3488,13 -3746,13 +3744,13 @@@ int arch_setup_ht_irq(unsigned int irq int err; cpumask_t tmp; + cfg = irq_cfg(irq); tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); + err = assign_irq_vector(irq, cfg, tmp); if (!err) { struct ht_irq_msg msg; unsigned dest; - cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@@ -3540,7 -3798,9 +3796,9 @@@ int arch_enable_uv_irq(char *irq_name, unsigned long flags; int err; - err = assign_irq_vector(irq, *eligible_cpu); + cfg = irq_cfg(irq); + + err = assign_irq_vector(irq, cfg, *eligible_cpu); if (err != 0) return err; @@@ -3549,8 -3809,6 +3807,6 @@@ irq_name); spin_unlock_irqrestore(&vector_lock, flags); - cfg = irq_cfg(irq); - mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); @@@ -3602,9 -3860,16 +3858,16 @@@ int __init io_apic_get_redir_entries (i return reg_01.bits.entries; } - int __init probe_nr_irqs(void) + void __init probe_nr_irqs_gsi(void) { - return NR_IRQS; + int idx; + int nr = 0; + + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; } /* -------------------------------------------------------------------------- @@@ -3703,19 -3968,31 +3966,31 @@@ int __init io_apic_get_version(int ioap int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { + struct irq_desc *desc; + struct irq_cfg *cfg; + int cpu = boot_cpu_id; + if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ioapic); return -EINVAL; } + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc %d\n", irq); + return 0; + } + /* * IRQs < 16 are already in the irq_2_pin[] map */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); + if (irq >= NR_IRQS_LEGACY) { + cfg = desc->chip_data; + add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); + } - setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); + setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); return 0; } @@@ -3769,9 -4046,10 +4044,10 @@@ void __init setup_ioapic_dest(void * when you have too many devices, because at that time only boot * cpu is online. */ - cfg = irq_cfg(irq); + desc = irq_to_desc(irq); + cfg = desc->chip_data; if (!cfg->vector) { - setup_IO_APIC_irq(ioapic, pin, irq, + setup_IO_APIC_irq(ioapic, pin, irq, desc, irq_trigger(irq_entry), irq_polarity(irq_entry)); continue; @@@ -3781,7 -4059,6 +4057,6 @@@ /* * Honour affinities which have been set in early boot */ - desc = irq_to_desc(irq); if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) mask = desc->affinity; @@@ -3790,10 -4067,10 +4065,10 @@@ #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, &mask); + set_ir_ioapic_affinity_irq_desc(desc, mask); else #endif - set_ioapic_affinity_irq(irq, &mask); + set_ioapic_affinity_irq_desc(desc, mask); } } @@@ -3842,7 -4119,6 +4117,6 @@@ void __init ioapic_init_mappings(void struct resource *ioapic_res; int i; - irq_2_pin_init(); ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { diff --combined arch/x86/kernel/irq_32.c index 87870a49be4,119fc9c8ff7..9cf9cbbf7a0 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@@ -242,6 -242,8 +242,8 @@@ void fixup_irqs(cpumask_t map for_each_irq_desc(irq, desc) { cpumask_t mask; + if (!desc) + continue; if (irq == 2) continue; @@@ -251,7 -253,7 +253,7 @@@ mask = map; } if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, mask); + desc->chip->set_affinity(irq, &mask); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --combined arch/x86/kernel/irq_64.c index 8cbd069e5b4,a174a217eb1..54c69d47a77 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@@ -91,6 -91,8 +91,8 @@@ void fixup_irqs(cpumask_t map int break_affinity = 0; int set_affinity = 1; + if (!desc) + continue; if (irq == 2) continue; @@@ -113,7 -115,7 +115,7 @@@ desc->chip->mask(irq); if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, mask); + desc->chip->set_affinity(irq, &mask); else if (!(warned++)) set_affinity = 0; diff --combined drivers/xen/events.c index eba5ec5b020,46625cd3874..add640ff5c6 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@@ -141,8 -141,12 +141,12 @@@ static void init_evtchn_cpu_bindings(vo int i; /* By default all event channels notify CPU#0. */ - for_each_irq_desc(i, desc) + for_each_irq_desc(i, desc) { + if (!desc) + continue; + desc->affinity = cpumask_of_cpu(0); + } #endif memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); @@@ -229,15 -233,20 +233,20 @@@ static void unmask_evtchn(int port static int find_unbound_irq(void) { int irq; + struct irq_desc *desc; /* Only allocate from dynirq range */ - for_each_irq_nr(irq) + for (irq = 0; irq < nr_irqs; irq++) if (irq_bindcount[irq] == 0) break; if (irq == nr_irqs) panic("No available IRQ to bind to: increase nr_irqs!\n"); + desc = irq_to_desc_alloc_cpu(irq, 0); + if (WARN_ON(desc == NULL)) + return -1; + return irq; } @@@ -579,7 -588,7 +588,7 @@@ void rebind_evtchn_irq(int evtchn, int spin_unlock(&irq_mapping_update_lock); /* new event channels are always bound to cpu 0 */ - irq_set_affinity(irq, cpumask_of_cpu(0)); + irq_set_affinity(irq, cpumask_of(0)); /* Unmask the event channel. */ enable_irq(irq); @@@ -608,9 -617,9 +617,9 @@@ static void rebind_irq_to_cpu(unsigned } -static void set_affinity_irq(unsigned irq, cpumask_t dest) +static void set_affinity_irq(unsigned irq, const struct cpumask *dest) { - unsigned tcpu = first_cpu(dest); + unsigned tcpu = cpumask_first(dest); rebind_irq_to_cpu(irq, tcpu); } @@@ -792,7 -801,7 +801,7 @@@ void xen_irq_resume(void mask_evtchn(evtchn); /* No IRQ <-> event-channel mappings. */ - for_each_irq_nr(irq) + for (irq = 0; irq < nr_irqs; irq++) irq_info[irq].evtchn = 0; /* zap event-channel binding */ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) @@@ -824,7 -833,7 +833,7 @@@ void __init xen_init_IRQ(void mask_evtchn(i); /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - for_each_irq_nr(i) + for (i = 0; i < nr_irqs; i++) irq_bindcount[i] = 0; irq_ctx_init(smp_processor_id()); diff --combined include/linux/interrupt.h index 48e63934fab,be3c484b524..dfaee6bd265 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@@ -14,6 -14,8 +14,8 @@@ #include #include #include + #include + #include #include #include @@@ -109,13 -111,13 +111,13 @@@ extern void enable_irq(unsigned int irq extern cpumask_t irq_default_affinity; -extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask); +extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); #else /* CONFIG_SMP */ -static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask) +static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) { return -EINVAL; } @@@ -251,9 -253,6 +253,6 @@@ enu BLOCK_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, - #ifdef CONFIG_HIGH_RES_TIMERS - HRTIMER_SOFTIRQ, - #endif RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS diff --combined include/linux/irq.h index ab70fd604d3,98564dc6447..5845bdc1ac0 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@@ -113,8 -113,7 +113,8 @@@ struct irq_chip void (*eoi)(unsigned int irq); void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, cpumask_t dest); + void (*set_affinity)(unsigned int irq, + const struct cpumask *dest); int (*retrigger)(unsigned int irq); int (*set_type)(unsigned int irq, unsigned int flow_type); int (*set_wake)(unsigned int irq, unsigned int on); @@@ -130,9 -129,14 +130,14 @@@ const char *typename; }; + struct timer_rand_state; + struct irq_2_iommu; /** * struct irq_desc - interrupt descriptor * @irq: interrupt number for this descriptor + * @timer_rand_state: pointer to timer rand state struct + * @kstat_irqs: irq stats per cpu + * @irq_2_iommu: iommu with this irq * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] * @chip: low level interrupt hardware access * @msi_desc: MSI descriptor @@@ -144,8 -148,8 +149,8 @@@ * @depth: disable-depth, for nested irq_disable() calls * @wake_depth: enable depth, for multiple set_irq_wake() callers * @irq_count: stats field to detect stalled irqs - * @irqs_unhandled: stats field for spurious unhandled interrupts * @last_unhandled: aging timer for unhandled count + * @irqs_unhandled: stats field for spurious unhandled interrupts * @lock: locking for SMP * @affinity: IRQ affinity on SMP * @cpu: cpu index useful for balancing @@@ -155,6 -159,13 +160,13 @@@ */ struct irq_desc { unsigned int irq; + #ifdef CONFIG_SPARSE_IRQ + struct timer_rand_state *timer_rand_state; + unsigned int *kstat_irqs; + # ifdef CONFIG_INTR_REMAP + struct irq_2_iommu *irq_2_iommu; + # endif + #endif irq_flow_handler_t handle_irq; struct irq_chip *chip; struct msi_desc *msi_desc; @@@ -166,8 -177,8 +178,8 @@@ unsigned int depth; /* nested irq disables */ unsigned int wake_depth; /* nested wake enables */ unsigned int irq_count; /* For detecting broken IRQs */ - unsigned int irqs_unhandled; unsigned long last_unhandled; /* Aging timer for unhandled count */ + unsigned int irqs_unhandled; spinlock_t lock; #ifdef CONFIG_SMP cpumask_t affinity; @@@ -182,12 -193,51 +194,51 @@@ const char *name; } ____cacheline_internodealigned_in_smp; + extern void early_irq_init(void); + extern void arch_early_irq_init(void); + extern void arch_init_chip_data(struct irq_desc *desc, int cpu); + extern void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int cpu); + extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); + #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; static inline struct irq_desc *irq_to_desc(unsigned int irq) { - return (irq < nr_irqs) ? irq_desc + irq : NULL; + return (irq < NR_IRQS) ? irq_desc + irq : NULL; + } + static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) + { + return irq_to_desc(irq); + } + + #else + + extern struct irq_desc *irq_to_desc(unsigned int irq); + extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); + extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); + + # define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq)) + # define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq)) + + #define kstat_irqs_this_cpu(DESC) \ + ((DESC)->kstat_irqs[smp_processor_id()]) + #define kstat_incr_irqs_this_cpu(irqno, DESC) \ + ((DESC)->kstat_irqs[smp_processor_id()]++) + + #endif + + static inline struct irq_desc * + irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) + { + #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + return irq_to_desc(irq); + #else + return desc; + #endif } /* @@@ -381,6 -431,11 +432,11 @@@ extern int set_irq_msi(unsigned int irq #define get_irq_data(irq) (irq_to_desc(irq)->handler_data) #define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc) + #define get_irq_desc_chip(desc) ((desc)->chip) + #define get_irq_desc_chip_data(desc) ((desc)->chip_data) + #define get_irq_desc_data(desc) ((desc)->handler_data) + #define get_irq_desc_msi(desc) ((desc)->msi_desc) + #endif /* CONFIG_GENERIC_HARDIRQS */ #endif /* !CONFIG_S390 */ diff --combined init/Kconfig index 8e9904fc302,13627191a60..f6281711166 --- a/init/Kconfig +++ b/init/Kconfig @@@ -924,15 -924,6 +924,15 @@@ config KMO endif # MODULES +config INIT_ALL_POSSIBLE + bool + help + Back when each arch used to define their own cpu_online_map and + cpu_possible_map, some of them chose to initialize cpu_possible_map + with all 1s, and others with all 0s. When they were centralised, + it was better to provide this option than to break all the archs + and have several arch maintainers persuing me down dark alleys. + config STOP_MACHINE bool default y @@@ -945,10 -936,90 +945,90 @@@ source "block/Kconfig config PREEMPT_NOTIFIERS bool + choice + prompt "RCU Implementation" + default CLASSIC_RCU + config CLASSIC_RCU - def_bool !PREEMPT_RCU + bool "Classic RCU" help This option selects the classic RCU implementation that is designed for best read-side performance on non-realtime - systems. Classic RCU is the default. Note that the - PREEMPT_RCU symbol is used to select/deselect this option. + systems. + + Select this option if you are unsure. + + config TREE_RCU + bool "Tree-based hierarchical RCU" + help + This option selects the RCU implementation that is + designed for very large SMP system with hundreds or + thousands of CPUs. + + config PREEMPT_RCU + bool "Preemptible RCU" + depends on PREEMPT + help + This option reduces the latency of the kernel by making certain + RCU sections preemptible. Normally RCU code is non-preemptible, if + this option is selected then read-only RCU sections become + preemptible. This helps latency, but may expose bugs due to + now-naive assumptions about each RCU read-side critical section + remaining on a given CPU through its execution. + + endchoice + + config RCU_TRACE + bool "Enable tracing for RCU" + depends on TREE_RCU || PREEMPT_RCU + help + This option provides tracing in RCU which presents stats + in debugfs for debugging RCU implementation. + + Say Y here if you want to enable RCU tracing + Say N if you are unsure. + + config RCU_FANOUT + int "Tree-based hierarchical RCU fanout value" + range 2 64 if 64BIT + range 2 32 if !64BIT + depends on TREE_RCU + default 64 if 64BIT + default 32 if !64BIT + help + This option controls the fanout of hierarchical implementations + of RCU, allowing RCU to work efficiently on machines with + large numbers of CPUs. This value must be at least the cube + root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit + systems and up to 262,144 for 64-bit systems. + + Select a specific number if testing RCU itself. + Take the default if unsure. + + config RCU_FANOUT_EXACT + bool "Disable tree-based hierarchical RCU auto-balancing" + depends on TREE_RCU + default n + help + This option forces use of the exact RCU_FANOUT value specified, + regardless of imbalances in the hierarchy. This is useful for + testing RCU itself, and might one day be useful on systems with + strong NUMA behavior. + + Without RCU_FANOUT_EXACT, the code will balance the hierarchy. + + Say N if unsure. + + config TREE_RCU_TRACE + def_bool RCU_TRACE && TREE_RCU + select DEBUG_FS + help + This option provides tracing for the TREE_RCU implementation, + permitting Makefile to trivially select kernel/rcutree_trace.c. + + config PREEMPT_RCU_TRACE + def_bool RCU_TRACE && PREEMPT_RCU + select DEBUG_FS + help + This option provides tracing for the PREEMPT_RCU implementation, + permitting Makefile to trivially select kernel/rcupreempt_trace.c. diff --combined kernel/irq/chip.c index 58d8e31daa4,6eb3c7952b6..f63c706d25e --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@@ -24,9 -24,10 +24,10 @@@ */ void dynamic_irq_init(unsigned int irq) { - struct irq_desc *desc = irq_to_desc(irq); + struct irq_desc *desc; unsigned long flags; + desc = irq_to_desc(irq); if (!desc) { WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); return; @@@ -45,7 -46,7 +46,7 @@@ desc->irq_count = 0; desc->irqs_unhandled = 0; #ifdef CONFIG_SMP - cpus_setall(desc->affinity); + cpumask_setall(&desc->affinity); #endif spin_unlock_irqrestore(&desc->lock, flags); } @@@ -124,6 -125,7 +125,7 @@@ int set_irq_type(unsigned int irq, unsi return -ENODEV; } + type &= IRQ_TYPE_SENSE_MASK; if (type == IRQ_TYPE_NONE) return 0; @@@ -352,6 -354,7 +354,7 @@@ handle_level_irq(unsigned int irq, stru spin_lock(&desc->lock); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@@ -429,6 -432,7 +432,7 @@@ handle_fasteoi_irq(unsigned int irq, st desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@@ -465,12 -469,14 +469,14 @@@ handle_edge_irq(unsigned int irq, struc !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); /* Start handling the irq */ desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@@ -531,8 -537,10 +537,10 @@@ handle_percpu_irq(unsigned int irq, str if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) + if (desc->chip->eoi) { desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); + } } void @@@ -567,8 -575,10 +575,10 @@@ __set_irq_handler(unsigned int irq, irq /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) + if (desc->chip != &no_irq_chip) { mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); + } desc->status |= IRQ_DISABLED; desc->depth = 1; } diff --combined kernel/irq/manage.c index 10ad2f87ed9,540f6c49f3f..61c4a9b6216 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@@ -79,7 -79,7 +79,7 @@@ int irq_can_set_affinity(unsigned int i * @cpumask: cpumask * */ -int irq_set_affinity(unsigned int irq, cpumask_t cpumask) +int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@@ -91,14 -91,14 +91,14 @@@ #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { - desc->affinity = cpumask; + cpumask_copy(&desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); } else { desc->status |= IRQ_MOVE_PENDING; - desc->pending_mask = cpumask; + cpumask_copy(&desc->pending_mask, cpumask); } #else - desc->affinity = cpumask; + cpumask_copy(&desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); #endif desc->status |= IRQ_AFFINITY_SET; @@@ -112,24 -112,26 +112,24 @@@ */ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc) { - cpumask_t mask; - if (!irq_can_set_affinity(irq)) return 0; - cpus_and(mask, cpu_online_map, irq_default_affinity); - /* * Preserve an userspace affinity setup, but make sure that * one of the targets is online. */ if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { - if (cpus_intersects(desc->affinity, cpu_online_map)) - mask = desc->affinity; + if (cpumask_any_and(&desc->affinity, cpu_online_mask) + < nr_cpu_ids) + goto set_affinity; else desc->status &= ~IRQ_AFFINITY_SET; } - desc->affinity = mask; - desc->chip->set_affinity(irq, mask); + cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity); +set_affinity: + desc->chip->set_affinity(irq, &desc->affinity); return 0; } @@@ -368,16 -370,18 +368,18 @@@ int __irq_set_trigger(struct irq_desc * return 0; } - ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK); + /* caller masked out all except trigger mode flags */ + ret = chip->set_type(irq, flags); if (ret) pr_err("setting trigger mode %d for irq %u failed (%pF)\n", - (int)(flags & IRQF_TRIGGER_MASK), - irq, chip->set_type); + (int)flags, irq, chip->set_type); else { + if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) + flags |= IRQ_LEVEL; /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */ - desc->status &= ~IRQ_TYPE_SENSE_MASK; - desc->status |= flags & IRQ_TYPE_SENSE_MASK; + desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); + desc->status |= flags; } return ret; @@@ -457,7 -461,8 +459,8 @@@ __setup_irq(unsigned int irq, struct ir /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { - ret = __irq_set_trigger(desc, irq, new->flags); + ret = __irq_set_trigger(desc, irq, + new->flags & IRQF_TRIGGER_MASK); if (ret) { spin_unlock_irqrestore(&desc->lock, flags); @@@ -671,6 -676,18 +674,18 @@@ int request_irq(unsigned int irq, irq_h struct irq_desc *desc; int retval; + /* + * handle_IRQ_event() always ignores IRQF_DISABLED except for + * the _first_ irqaction (sigh). That can cause oopsing, but + * the behavior is classified as "will not fix" so we need to + * start nudging drivers away from using that idiom. + */ + if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) + == (IRQF_SHARED|IRQF_DISABLED)) + pr_warning("IRQ %d/%s: IRQF_DISABLED is not " + "guaranteed on shared IRQs\n", + irq, devname); + #ifdef CONFIG_LOCKDEP /* * Lockdep wants atomic interrupt handlers: diff --combined kernel/irq/proc.c index 8e91c976252,f6b3440f05b..d2c0e5ee53c --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@@ -40,42 -40,33 +40,42 @@@ static ssize_t irq_affinity_proc_write( const char __user *buffer, size_t count, loff_t *pos) { unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; - cpumask_t new_value; + cpumask_var_t new_value; int err; if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity || irq_balancing_disabled(irq)) return -EIO; + if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) + return -ENOMEM; + err = cpumask_parse_user(buffer, count, new_value); if (err) - return err; + goto free_cpumask; - if (!is_affinity_mask_valid(new_value)) - return -EINVAL; + if (!is_affinity_mask_valid(*new_value)) { + err = -EINVAL; + goto free_cpumask; + } /* * Do not allow disabling IRQs completely - it's a too easy * way to make the system unusable accidentally :-) At least * one online CPU still has to be targeted. */ - if (!cpus_intersects(new_value, cpu_online_map)) + if (!cpumask_intersects(new_value, cpu_online_mask)) { /* Special case for empty set - allow the architecture code to set default SMP affinity. */ - return irq_select_affinity_usr(irq) ? -EINVAL : count; - - irq_set_affinity(irq, new_value); - - return count; + err = irq_select_affinity_usr(irq) ? -EINVAL : count; + } else { + irq_set_affinity(irq, new_value); + err = count; + } + +free_cpumask: + free_cpumask_var(new_value); + return err; } static int irq_affinity_proc_open(struct inode *inode, struct file *file) @@@ -104,7 -95,7 +104,7 @@@ static ssize_t default_affinity_write(s cpumask_t new_value; int err; - err = cpumask_parse_user(buffer, count, new_value); + err = cpumask_parse_user(buffer, count, &new_value); if (err) return err; @@@ -252,7 -243,11 +252,11 @@@ void init_irq_proc(void /* * Create entries for all existing IRQs. */ - for_each_irq_desc(irq, desc) + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + register_irq_proc(irq, desc); + } } diff --combined kernel/sched.c index bdd180a0c6b,fff1c4a20b6..f2095660efe --- a/kernel/sched.c +++ b/kernel/sched.c @@@ -209,7 -209,6 +209,6 @@@ void init_rt_bandwidth(struct rt_bandwi hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rt_b->rt_period_timer.function = sched_rt_period_timer; - rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; } static inline int rt_bandwidth_enabled(void) @@@ -1139,7 -1138,6 +1138,6 @@@ static void init_rq_hrtick(struct rq *r hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rq->hrtick_timer.function = hrtick; - rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; } #else /* CONFIG_SCHED_HRTICK */ static inline void hrtick_clear(struct rq *rq) @@@ -4192,7 -4190,6 +4190,6 @@@ void account_steal_time(struct task_str if (p == rq->idle) { p->stime = cputime_add(p->stime, steal); - account_group_system_time(p, steal); if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else @@@ -4328,7 -4325,7 +4325,7 @@@ void __kprobes sub_preempt_count(int va /* * Underflow? */ - if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) + if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) return; /* * Is the spinlock portion underflowing? @@@ -6647,7 -6644,7 +6644,7 @@@ static int sched_domain_debug_one(struc struct sched_group *group = sd->groups; char str[256]; - cpulist_scnprintf(str, sizeof(str), sd->span); + cpulist_scnprintf(str, sizeof(str), &sd->span); cpus_clear(*groupmask); printk(KERN_DEBUG "%*s domain %d: ", level, "", level); @@@ -6700,7 -6697,7 +6697,7 @@@ cpus_or(*groupmask, *groupmask, group->cpumask); - cpulist_scnprintf(str, sizeof(str), group->cpumask); + cpulist_scnprintf(str, sizeof(str), &group->cpumask); printk(KERN_CONT " %s", str); group = group->next; @@@ -7101,7 -7098,7 +7098,7 @@@ cpu_to_phys_group(int cpu, const cpumas { int group; #ifdef CONFIG_SCHED_MC - *mask = cpu_coregroup_map(cpu); + *mask = *cpu_coregroup_mask(cpu); cpus_and(*mask, *mask, *cpu_map); group = first_cpu(*mask); #elif defined(CONFIG_SCHED_SMT) @@@ -7474,7 -7471,7 +7471,7 @@@ static int __build_sched_domains(const sd = &per_cpu(core_domains, i); SD_INIT(sd, MC); set_domain_attribute(sd, attr); - sd->span = cpu_coregroup_map(i); + sd->span = *cpu_coregroup_mask(i); cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; p->child = sd; @@@ -7517,7 -7514,7 +7514,7 @@@ SCHED_CPUMASK_VAR(this_core_map, allmasks); SCHED_CPUMASK_VAR(send_covered, allmasks); - *this_core_map = cpu_coregroup_map(i); + *this_core_map = *cpu_coregroup_mask(i); cpus_and(*this_core_map, *this_core_map, *cpu_map); if (i != first_cpu(*this_core_map)) continue; diff --combined kernel/trace/trace.c index c8760ec0e46,4185d522163..0e91f43b6ba --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@@ -30,7 -30,6 +30,6 @@@ #include #include #include - #include #include #include @@@ -1310,7 -1309,7 +1309,7 @@@ enum trace_file_type TRACE_FILE_ANNOTATE = 2, }; - static void trace_iterator_increment(struct trace_iterator *iter, int cpu) + static void trace_iterator_increment(struct trace_iterator *iter) { /* Don't allow ftrace to trace into the ring buffers */ ftrace_disable_cpu(); @@@ -1389,7 -1388,7 +1388,7 @@@ static void *find_next_entry_inc(struc iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts); if (iter->ent) - trace_iterator_increment(iter, iter->cpu); + trace_iterator_increment(iter); return iter->ent ? iter : NULL; } @@@ -2675,7 -2674,7 +2674,7 @@@ tracing_cpumask_read(struct file *filp mutex_lock(&tracing_cpumask_update_lock); - len = cpumask_scnprintf(mask_str, count, tracing_cpumask); + len = cpumask_scnprintf(mask_str, count, &tracing_cpumask); if (count - len < 2) { count = -EINVAL; goto out_err; @@@ -2696,7 -2695,7 +2695,7 @@@ tracing_cpumask_write(struct file *filp int err, cpu; mutex_lock(&tracing_cpumask_update_lock); - err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); + err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new); if (err) goto err_unlock; diff --combined mm/slub.c index 8e516e29f98,6cb7ad10785..0d861c3154b --- a/mm/slub.c +++ b/mm/slub.c @@@ -24,6 -24,7 +24,7 @@@ #include #include #include + #include /* * Lock order: @@@ -153,6 -154,10 +154,10 @@@ #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif + #define OO_SHIFT 16 + #define OO_MASK ((1 << OO_SHIFT) - 1) + #define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */ + /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000 /* Poison object */ #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ @@@ -178,7 -183,7 +183,7 @@@ static LIST_HEAD(slab_caches) * Tracking user of a slab. */ struct track { - void *addr; /* Called from address */ + unsigned long addr; /* Called from address */ int cpu; /* Was running on cpu */ int pid; /* Pid context */ unsigned long when; /* When did the operation occur */ @@@ -290,7 -295,7 +295,7 @@@ static inline struct kmem_cache_order_o unsigned long size) { struct kmem_cache_order_objects x = { - (order << 16) + (PAGE_SIZE << order) / size + (order << OO_SHIFT) + (PAGE_SIZE << order) / size }; return x; @@@ -298,12 -303,12 +303,12 @@@ static inline int oo_order(struct kmem_cache_order_objects x) { - return x.x >> 16; + return x.x >> OO_SHIFT; } static inline int oo_objects(struct kmem_cache_order_objects x) { - return x.x & ((1 << 16) - 1); + return x.x & OO_MASK; } #ifdef CONFIG_SLUB_DEBUG @@@ -367,7 -372,7 +372,7 @@@ static struct track *get_track(struct k } static void set_track(struct kmem_cache *s, void *object, - enum track_item alloc, void *addr) + enum track_item alloc, unsigned long addr) { struct track *p; @@@ -391,8 -396,8 +396,8 @@@ static void init_tracking(struct kmem_c if (!(s->flags & SLAB_STORE_USER)) return; - set_track(s, object, TRACK_FREE, NULL); - set_track(s, object, TRACK_ALLOC, NULL); + set_track(s, object, TRACK_FREE, 0UL); + set_track(s, object, TRACK_ALLOC, 0UL); } static void print_track(const char *s, struct track *t) @@@ -401,7 -406,7 +406,7 @@@ return; printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", - s, t->addr, jiffies - t->when, t->cpu, t->pid); + s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); } static void print_tracking(struct kmem_cache *s, void *object) @@@ -692,7 -697,7 +697,7 @@@ static int check_object(struct kmem_cac if (!check_valid_pointer(s, page, get_freepointer(s, p))) { object_err(s, page, p, "Freepointer corrupt"); /* - * No choice but to zap it and thus loose the remainder + * No choice but to zap it and thus lose the remainder * of the free objects in this slab. May cause * another error because the object count is now wrong. */ @@@ -764,8 -769,8 +769,8 @@@ static int on_freelist(struct kmem_cach } max_objects = (PAGE_SIZE << compound_order(page)) / s->size; - if (max_objects > 65535) - max_objects = 65535; + if (max_objects > MAX_OBJS_PER_PAGE) + max_objects = MAX_OBJS_PER_PAGE; if (page->objects != max_objects) { slab_err(s, page, "Wrong number of objects. Found %d but " @@@ -866,7 -871,7 +871,7 @@@ static void setup_object_debug(struct k } static int alloc_debug_processing(struct kmem_cache *s, struct page *page, - void *object, void *addr) + void *object, unsigned long addr) { if (!check_slab(s, page)) goto bad; @@@ -906,7 -911,7 +911,7 @@@ bad } static int free_debug_processing(struct kmem_cache *s, struct page *page, - void *object, void *addr) + void *object, unsigned long addr) { if (!check_slab(s, page)) goto fail; @@@ -1029,10 -1034,10 +1034,10 @@@ static inline void setup_object_debug(s struct page *page, void *object) {} static inline int alloc_debug_processing(struct kmem_cache *s, - struct page *page, void *object, void *addr) { return 0; } + struct page *page, void *object, unsigned long addr) { return 0; } static inline int free_debug_processing(struct kmem_cache *s, - struct page *page, void *object, void *addr) { return 0; } + struct page *page, void *object, unsigned long addr) { return 0; } static inline int slab_pad_check(struct kmem_cache *s, struct page *page) { return 1; } @@@ -1499,8 -1504,8 +1504,8 @@@ static inline int node_match(struct kme * we need to allocate a new slab. This is the slowest path since it involves * a call to the page allocator and the setup of a new slab. */ - static void *__slab_alloc(struct kmem_cache *s, - gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) + static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + unsigned long addr, struct kmem_cache_cpu *c) { void **object; struct page *new; @@@ -1584,13 -1589,18 +1589,18 @@@ debug * Otherwise we can simply pick the next object from the lockless free list. */ static __always_inline void *slab_alloc(struct kmem_cache *s, - gfp_t gfpflags, int node, void *addr) + gfp_t gfpflags, int node, unsigned long addr) { void **object; struct kmem_cache_cpu *c; unsigned long flags; unsigned int objsize; + might_sleep_if(gfpflags & __GFP_WAIT); + + if (should_failslab(s->objsize, gfpflags)) + return NULL; + local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); objsize = c->objsize; @@@ -1613,14 -1623,14 +1623,14 @@@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { - return slab_alloc(s, gfpflags, -1, __builtin_return_address(0)); + return slab_alloc(s, gfpflags, -1, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_alloc); #ifdef CONFIG_NUMA void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { - return slab_alloc(s, gfpflags, node, __builtin_return_address(0)); + return slab_alloc(s, gfpflags, node, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_alloc_node); #endif @@@ -1634,7 -1644,7 +1644,7 @@@ * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, void *addr, unsigned int offset) + void *x, unsigned long addr, unsigned int offset) { void *prior; void **object = (void *)x; @@@ -1704,7 -1714,7 +1714,7 @@@ debug * with all sorts of special processing. */ static __always_inline void slab_free(struct kmem_cache *s, - struct page *page, void *x, void *addr) + struct page *page, void *x, unsigned long addr) { void **object = (void *)x; struct kmem_cache_cpu *c; @@@ -1731,11 -1741,11 +1741,11 @@@ void kmem_cache_free(struct kmem_cache page = virt_to_head_page(x); - slab_free(s, page, x, __builtin_return_address(0)); + slab_free(s, page, x, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_free); - /* Figure out on which slab object the object resides */ + /* Figure out on which slab page the object resides */ static struct page *get_object_page(const void *x) { struct page *page = virt_to_head_page(x); @@@ -1807,8 -1817,8 +1817,8 @@@ static inline int slab_order(int size, int rem; int min_order = slub_min_order; - if ((PAGE_SIZE << min_order) / size > 65535) - return get_order(size * 65535) - 1; + if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE) + return get_order(size * MAX_OBJS_PER_PAGE) - 1; for (order = max(min_order, fls(min_objects * size - 1) - PAGE_SHIFT); @@@ -2073,8 -2083,7 +2083,7 @@@ static inline int alloc_kmem_cache_cpus * when allocating for the kmalloc_node_cache. This is used for bootstrapping * memory on a fresh node that has no slab structures yet. */ - static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, - int node) + static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node) { struct page *page; struct kmem_cache_node *n; @@@ -2112,7 -2121,6 +2121,6 @@@ local_irq_save(flags); add_partial(n, page, 0); local_irq_restore(flags); - return n; } static void free_kmem_cache_nodes(struct kmem_cache *s) @@@ -2144,8 -2152,7 +2152,7 @@@ static int init_kmem_cache_nodes(struc n = &s->local_node; else { if (slab_state == DOWN) { - n = early_kmem_cache_node_alloc(gfpflags, - node); + early_kmem_cache_node_alloc(gfpflags, node); continue; } n = kmem_cache_alloc_node(kmalloc_caches, @@@ -2659,7 -2666,7 +2666,7 @@@ void *__kmalloc(size_t size, gfp_t flag if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - return slab_alloc(s, flags, -1, __builtin_return_address(0)); + return slab_alloc(s, flags, -1, _RET_IP_); } EXPORT_SYMBOL(__kmalloc); @@@ -2687,7 -2694,7 +2694,7 @@@ void *__kmalloc_node(size_t size, gfp_ if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - return slab_alloc(s, flags, node, __builtin_return_address(0)); + return slab_alloc(s, flags, node, _RET_IP_); } EXPORT_SYMBOL(__kmalloc_node); #endif @@@ -2744,7 -2751,7 +2751,7 @@@ void kfree(const void *x put_page(page); return; } - slab_free(page->slab, page, object, __builtin_return_address(0)); + slab_free(page->slab, page, object, _RET_IP_); } EXPORT_SYMBOL(kfree); @@@ -3123,8 -3130,12 +3130,12 @@@ struct kmem_cache *kmem_cache_create(co s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); - if (sysfs_slab_alias(s, name)) + if (sysfs_slab_alias(s, name)) { + down_write(&slub_lock); + s->refcount--; + up_write(&slub_lock); goto err; + } return s; } @@@ -3134,8 -3145,13 +3145,13 @@@ size, align, flags, ctor)) { list_add(&s->list, &slab_caches); up_write(&slub_lock); - if (sysfs_slab_add(s)) + if (sysfs_slab_add(s)) { + down_write(&slub_lock); + list_del(&s->list); + up_write(&slub_lock); + kfree(s); goto err; + } return s; } kfree(s); @@@ -3202,7 -3218,7 +3218,7 @@@ static struct notifier_block __cpuinitd #endif - void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) + void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) { struct kmem_cache *s; @@@ -3218,7 -3234,7 +3234,7 @@@ } void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, - int node, void *caller) + int node, unsigned long caller) { struct kmem_cache *s; @@@ -3429,7 -3445,7 +3445,7 @@@ static void resiliency_test(void) {} struct location { unsigned long count; - void *addr; + unsigned long addr; long long sum_time; long min_time; long max_time; @@@ -3477,7 -3493,7 +3493,7 @@@ static int add_location(struct loc_trac { long start, end, pos; struct location *l; - void *caddr; + unsigned long caddr; unsigned long age = jiffies - track->when; start = -1; @@@ -3626,7 -3642,7 +3642,7 @@@ static int list_locations(struct kmem_c len < PAGE_SIZE - 60) { len += sprintf(buf + len, " cpus="); len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, - l->cpus); + &l->cpus); } if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && @@@ -4345,7 -4361,7 +4361,7 @@@ static void sysfs_slab_remove(struct km /* * Need to buffer aliases during bootup until sysfs becomes - * available lest we loose that information. + * available lest we lose that information. */ struct saved_alias { struct kmem_cache *s;