From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 31 Dec 2008 12:35:57 +0000 (+1030)
Subject: Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
X-Git-Tag: v2.6.29-rc1~521^2~11^2~33
X-Git-Url: http://www.pilppa.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=2ca1a615835d9f4990f42102ab1f2ef434e7e89c;hp=-c;p=linux-2.6-omap-h63xx.git

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

Conflicts:

	arch/x86/kernel/io_apic.c
---

2ca1a615835d9f4990f42102ab1f2ef434e7e89c
diff --combined arch/arm/kernel/smp.c
index bd905c0a736,019237d2162..55fa7ff96a3
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@@ -33,6 -33,16 +33,6 @@@
  #include <asm/tlbflush.h>
  #include <asm/ptrace.h>
  
 -/*
 - * bitmask of present and online CPUs.
 - * The present bitmask indicates that the CPU is physically present.
 - * The online bitmask indicates that the CPU is up and running.
 - */
 -cpumask_t cpu_possible_map;
 -EXPORT_SYMBOL(cpu_possible_map);
 -cpumask_t cpu_online_map;
 -EXPORT_SYMBOL(cpu_online_map);
 -
  /*
   * as from 2.5, kernels no longer have an init_tasks structure
   * so we need some other way of telling a new secondary core
@@@ -171,7 -181,7 +171,7 @@@ int __cpuexit __cpu_disable(void
  	/*
  	 * Stop the local timer for this CPU.
  	 */
- 	local_timer_stop(cpu);
+ 	local_timer_stop();
  
  	/*
  	 * Flush user cache and TLB mappings, and then remove this CPU
@@@ -274,7 -284,7 +274,7 @@@ asmlinkage void __cpuinit secondary_sta
  	/*
  	 * Setup local timer for this CPU.
  	 */
- 	local_timer_setup(cpu);
+ 	local_timer_setup();
  
  	calibrate_delay();
  
diff --combined arch/arm/mach-at91/at91rm9200_time.c
index 72f51d39202,d140eae53de..1ff1bda0a89
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@@ -141,6 -141,15 +141,15 @@@ clkevt32k_next_event(unsigned long delt
  	/* Use "raw" primitives so we behave correctly on RT kernels. */
  	raw_local_irq_save(flags);
  
+ 	/*
+ 	 * According to Thomas Gleixner irqs are already disabled here.  Simply
+ 	 * removing raw_local_irq_save above (and the matching
+ 	 * raw_local_irq_restore) was not accepted.  See
+ 	 * http://thread.gmane.org/gmane.linux.ports.arm.kernel/41174
+ 	 * So for now (2008-11-20) just warn once if irqs were not disabled ...
+ 	 */
+ 	WARN_ON_ONCE(!raw_irqs_disabled_flags(flags));
+ 
  	/* The alarm IRQ uses absolute time (now+delta), not the relative
  	 * time (delta) in our calling convention.  Like all clockevents
  	 * using such "match" hardware, we have a race to defend against.
@@@ -169,6 -178,7 +178,6 @@@ static struct clock_event_device clkev
  	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
  	.shift		= 32,
  	.rating		= 150,
 -	.cpumask	= CPU_MASK_CPU0,
  	.set_next_event	= clkevt32k_next_event,
  	.set_mode	= clkevt32k_mode,
  };
@@@ -196,7 -206,7 +205,7 @@@ void __init at91rm9200_timer_init(void
  	clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift);
  	clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt);
  	clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1;
 -	clkevt.cpumask = cpumask_of_cpu(0);
 +	clkevt.cpumask = cpumask_of(0);
  	clockevents_register_device(&clkevt);
  
  	/* register clocksource */
diff --combined arch/arm/mach-pxa/time.c
index bf3c9a4aad5,00162415851..95656a72268
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@@ -22,8 -22,8 +22,8 @@@
  #include <asm/div64.h>
  #include <asm/mach/irq.h>
  #include <asm/mach/time.h>
+ #include <mach/hardware.h>
  #include <mach/pxa-regs.h>
- #include <asm/mach-types.h>
  
  /*
   * This is PXA's sched_clock implementation. This has a resolution
@@@ -122,6 -122,7 +122,6 @@@ static struct clock_event_device ckevt_
  	.features	= CLOCK_EVT_FEAT_ONESHOT,
  	.shift		= 32,
  	.rating		= 200,
 -	.cpumask	= CPU_MASK_CPU0,
  	.set_next_event	= pxa_osmr0_set_next_event,
  	.set_mode	= pxa_osmr0_set_mode,
  };
@@@ -149,18 -150,11 +149,11 @@@ static struct irqaction pxa_ost0_irq = 
  
  static void __init pxa_timer_init(void)
  {
- 	unsigned long clock_tick_rate;
+ 	unsigned long clock_tick_rate = get_clock_tick_rate();
  
  	OIER = 0;
  	OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
  
- 	if (cpu_is_pxa25x())
- 		clock_tick_rate = 3686400;
- 	else if (machine_is_mainstone())
- 		clock_tick_rate = 3249600;
- 	else
- 		clock_tick_rate = 3250000;
- 
  	set_oscr2ns_scale(clock_tick_rate);
  
  	ckevt_pxa_osmr0.mult =
@@@ -169,7 -163,6 +162,7 @@@
  		clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0);
  	ckevt_pxa_osmr0.min_delta_ns =
  		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
 +	ckevt_pxa_osmr0.cpumask = cpumask_of(0);
  
  	cksrc_pxa_oscr0.mult =
  		clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
diff --combined arch/arm/mach-realview/core.c
index b07cb9b7adb,5f1d55963ce..bd2aa4f1614
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@@ -28,11 -28,14 +28,14 @@@
  #include <linux/clocksource.h>
  #include <linux/clockchips.h>
  #include <linux/io.h>
+ #include <linux/smc911x.h>
  
+ #include <asm/clkdev.h>
  #include <asm/system.h>
  #include <mach/hardware.h>
  #include <asm/irq.h>
  #include <asm/leds.h>
+ #include <asm/mach-types.h>
  #include <asm/hardware/arm_timer.h>
  #include <asm/hardware/icst307.h>
  
@@@ -49,7 -52,7 +52,7 @@@
  
  #define REALVIEW_REFCOUNTER	(__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
  
- /* used by entry-macro.S */
+ /* used by entry-macro.S and platsmp.c */
  void __iomem *gic_cpu_base_addr;
  
  /*
@@@ -124,6 -127,29 +127,29 @@@ int realview_flash_register(struct reso
  	return platform_device_register(&realview_flash_device);
  }
  
+ static struct smc911x_platdata realview_smc911x_platdata = {
+ 	.flags		= SMC911X_USE_32BIT,
+ 	.irq_flags	= IRQF_SHARED,
+ 	.irq_polarity	= 1,
+ };
+ 
+ static struct platform_device realview_eth_device = {
+ 	.name		= "smc911x",
+ 	.id		= 0,
+ 	.num_resources	= 2,
+ };
+ 
+ int realview_eth_register(const char *name, struct resource *res)
+ {
+ 	if (name)
+ 		realview_eth_device.name = name;
+ 	realview_eth_device.resource = res;
+ 	if (strcmp(realview_eth_device.name, "smc911x") == 0)
+ 		realview_eth_device.dev.platform_data = &realview_smc911x_platdata;
+ 
+ 	return platform_device_register(&realview_eth_device);
+ }
+ 
  static struct resource realview_i2c_resource = {
  	.start		= REALVIEW_I2C_BASE,
  	.end		= REALVIEW_I2C_BASE + SZ_4K - 1,
@@@ -177,9 -203,14 +203,14 @@@ static const struct icst307_params real
  static void realview_oscvco_set(struct clk *clk, struct icst307_vco vco)
  {
  	void __iomem *sys_lock = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_LOCK_OFFSET;
- 	void __iomem *sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC4_OFFSET;
+ 	void __iomem *sys_osc;
  	u32 val;
  
+ 	if (machine_is_realview_pb1176())
+ 		sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC0_OFFSET;
+ 	else
+ 		sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC4_OFFSET;
+ 
  	val = readl(sys_osc) & ~0x7ffff;
  	val |= vco.v | (vco.r << 9) | (vco.s << 16);
  
@@@ -188,12 -219,59 +219,59 @@@
  	writel(0, sys_lock);
  }
  
- struct clk realview_clcd_clk = {
- 	.name	= "CLCDCLK",
+ static struct clk oscvco_clk = {
  	.params	= &realview_oscvco_params,
  	.setvco = realview_oscvco_set,
  };
  
+ /*
+  * These are fixed clocks.
+  */
+ static struct clk ref24_clk = {
+ 	.rate	= 24000000,
+ };
+ 
+ static struct clk_lookup lookups[] = {
+ 	{	/* UART0 */
+ 		.dev_id		= "dev:f1",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* UART1 */
+ 		.dev_id		= "dev:f2",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* UART2 */
+ 		.dev_id		= "dev:f3",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* UART3 */
+ 		.dev_id		= "fpga:09",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* KMI0 */
+ 		.dev_id		= "fpga:06",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* KMI1 */
+ 		.dev_id		= "fpga:07",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* MMC0 */
+ 		.dev_id		= "fpga:05",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* EB:CLCD */
+ 		.dev_id		= "dev:20",
+ 		.clk		= &oscvco_clk,
+ 	}, {	/* PB:CLCD */
+ 		.dev_id		= "issp:20",
+ 		.clk		= &oscvco_clk,
+ 	}
+ };
+ 
+ static int __init clk_init(void)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < ARRAY_SIZE(lookups); i++)
+ 		clkdev_add(&lookups[i]);
+ 	return 0;
+ }
+ arch_initcall(clk_init);
+ 
  /*
   * CLCD support.
   */
@@@ -226,7 -304,30 +304,30 @@@ static struct clcd_panel vga = 
  	.width		= -1,
  	.height		= -1,
  	.tim2		= TIM2_BCD | TIM2_IPC,
- 	.cntl		= CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+ 	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+ 	.bpp		= 16,
+ };
+ 
+ static struct clcd_panel xvga = {
+ 	.mode		= {
+ 		.name		= "XVGA",
+ 		.refresh	= 60,
+ 		.xres		= 1024,
+ 		.yres		= 768,
+ 		.pixclock	= 15748,
+ 		.left_margin	= 152,
+ 		.right_margin	= 48,
+ 		.upper_margin	= 23,
+ 		.lower_margin	= 3,
+ 		.hsync_len	= 104,
+ 		.vsync_len	= 4,
+ 		.sync		= 0,
+ 		.vmode		= FB_VMODE_NONINTERLACED,
+ 	},
+ 	.width		= -1,
+ 	.height		= -1,
+ 	.tim2		= TIM2_BCD | TIM2_IPC,
+ 	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
  	.bpp		= 16,
  };
  
@@@ -249,7 -350,7 +350,7 @@@ static struct clcd_panel sanyo_3_8_in 
  	.width		= -1,
  	.height		= -1,
  	.tim2		= TIM2_BCD,
- 	.cntl		= CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+ 	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
  	.bpp		= 16,
  };
  
@@@ -272,7 -373,7 +373,7 @@@ static struct clcd_panel sanyo_2_5_in 
  	.width		= -1,
  	.height		= -1,
  	.tim2		= TIM2_IVS | TIM2_IHS | TIM2_IPC,
- 	.cntl		= CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+ 	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
  	.bpp		= 16,
  };
  
@@@ -295,7 -396,7 +396,7 @@@ static struct clcd_panel epson_2_2_in 
  	.width		= -1,
  	.height		= -1,
  	.tim2		= TIM2_BCD | TIM2_IPC,
- 	.cntl		= CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+ 	.cntl		= CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
  	.bpp		= 16,
  };
  
@@@ -308,9 -409,15 +409,15 @@@
  static struct clcd_panel *realview_clcd_panel(void)
  {
  	void __iomem *sys_clcd = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_CLCD_OFFSET;
- 	struct clcd_panel *panel = &vga;
+ 	struct clcd_panel *vga_panel;
+ 	struct clcd_panel *panel;
  	u32 val;
  
+ 	if (machine_is_realview_eb())
+ 		vga_panel = &vga;
+ 	else
+ 		vga_panel = &xvga;
+ 
  	val = readl(sys_clcd) & SYS_CLCD_ID_MASK;
  	if (val == SYS_CLCD_ID_SANYO_3_8)
  		panel = &sanyo_3_8_in;
@@@ -319,11 -426,11 +426,11 @@@
  	else if (val == SYS_CLCD_ID_EPSON_2_2)
  		panel = &epson_2_2_in;
  	else if (val == SYS_CLCD_ID_VGA)
- 		panel = &vga;
+ 		panel = vga_panel;
  	else {
  		printk(KERN_ERR "CLCD: unknown LCD panel ID 0x%08x, using VGA\n",
  			val);
- 		panel = &vga;
+ 		panel = vga_panel;
  	}
  
  	return panel;
@@@ -358,12 -465,18 +465,18 @@@ static void realview_clcd_enable(struc
  	writel(val, sys_clcd);
  }
  
- static unsigned long framesize = SZ_1M;
- 
  static int realview_clcd_setup(struct clcd_fb *fb)
  {
+ 	unsigned long framesize;
  	dma_addr_t dma;
  
+ 	if (machine_is_realview_eb())
+ 		/* VGA, 16bpp */
+ 		framesize = 640 * 480 * 2;
+ 	else
+ 		/* XVGA, 16bpp */
+ 		framesize = 1024 * 768 * 2;
+ 
  	fb->panel		= realview_clcd_panel();
  
  	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
@@@ -511,7 -624,7 +624,7 @@@ static struct clock_event_device timer0
  	.set_mode	= timer_set_mode,
  	.set_next_event	= timer_set_next_event,
  	.rating		= 300,
 -	.cpumask	= CPU_MASK_ALL,
 +	.cpumask	= cpu_all_mask,
  };
  
  static void __init realview_clockevents_init(unsigned int timer_irq)
@@@ -588,7 -701,7 +701,7 @@@ void __init realview_timer_init(unsigne
  	 * The dummy clock device has to be registered before the main device
  	 * so that the latter will broadcast the clock events
  	 */
- 	local_timer_setup(smp_processor_id());
+ 	local_timer_setup();
  #endif
  
  	/* 
diff --combined arch/arm/mach-realview/localtimer.c
index 504961ef343,9019ef2e561..67d6d9cc68b
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@@ -38,18 -38,14 +38,14 @@@ void local_timer_interrupt(void
  
  #ifdef CONFIG_LOCAL_TIMERS
  
- #define TWD_BASE(cpu)	(twd_base_addr + (cpu) * twd_size)
- 
  /* set up by the platform code */
- void __iomem *twd_base_addr;
- unsigned int twd_size;
+ void __iomem *twd_base;
  
  static unsigned long mpcore_timer_rate;
  
  static void local_timer_set_mode(enum clock_event_mode mode,
  				 struct clock_event_device *clk)
  {
- 	void __iomem *base = TWD_BASE(smp_processor_id());
  	unsigned long ctrl;
  
  	switch(mode) {
@@@ -68,17 -64,16 +64,16 @@@
  		ctrl = 0;
  	}
  
- 	__raw_writel(ctrl, base + TWD_TIMER_CONTROL);
+ 	__raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
  }
  
  static int local_timer_set_next_event(unsigned long evt,
  				      struct clock_event_device *unused)
  {
- 	void __iomem *base = TWD_BASE(smp_processor_id());
- 	unsigned long ctrl = __raw_readl(base + TWD_TIMER_CONTROL);
+ 	unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL);
  
- 	__raw_writel(evt, base + TWD_TIMER_COUNTER);
- 	__raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, base + TWD_TIMER_CONTROL);
+ 	__raw_writel(evt, twd_base + TWD_TIMER_COUNTER);
+ 	__raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, twd_base + TWD_TIMER_CONTROL);
  
  	return 0;
  }
@@@ -91,19 -86,16 +86,16 @@@
   */
  int local_timer_ack(void)
  {
- 	void __iomem *base = TWD_BASE(smp_processor_id());
- 
- 	if (__raw_readl(base + TWD_TIMER_INTSTAT)) {
- 		__raw_writel(1, base + TWD_TIMER_INTSTAT);
+ 	if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) {
+ 		__raw_writel(1, twd_base + TWD_TIMER_INTSTAT);
  		return 1;
  	}
  
  	return 0;
  }
  
- static void __cpuinit twd_calibrate_rate(unsigned int cpu)
+ static void __cpuinit twd_calibrate_rate(void)
  {
- 	void __iomem *base = TWD_BASE(cpu);
  	unsigned long load, count;
  	u64 waitjiffies;
  
@@@ -124,15 -116,15 +116,15 @@@
  		waitjiffies += 5;
  
  				 /* enable, no interrupt or reload */
- 		__raw_writel(0x1, base + TWD_TIMER_CONTROL);
+ 		__raw_writel(0x1, twd_base + TWD_TIMER_CONTROL);
  
  				 /* maximum value */
- 		__raw_writel(0xFFFFFFFFU, base + TWD_TIMER_COUNTER);
+ 		__raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
  
  		while (get_jiffies_64() < waitjiffies)
  			udelay(10);
  
- 		count = __raw_readl(base + TWD_TIMER_COUNTER);
+ 		count = __raw_readl(twd_base + TWD_TIMER_COUNTER);
  
  		mpcore_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
  
@@@ -142,18 -134,19 +134,19 @@@
  
  	load = mpcore_timer_rate / HZ;
  
- 	__raw_writel(load, base + TWD_TIMER_LOAD);
+ 	__raw_writel(load, twd_base + TWD_TIMER_LOAD);
  }
  
  /*
   * Setup the local clock events for a CPU.
   */
- void __cpuinit local_timer_setup(unsigned int cpu)
+ void __cpuinit local_timer_setup(void)
  {
+ 	unsigned int cpu = smp_processor_id();
  	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
  	unsigned long flags;
  
- 	twd_calibrate_rate(cpu);
+ 	twd_calibrate_rate();
  
  	clk->name		= "local_timer";
  	clk->features		= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
@@@ -161,7 -154,7 +154,7 @@@
  	clk->set_mode		= local_timer_set_mode;
  	clk->set_next_event	= local_timer_set_next_event;
  	clk->irq		= IRQ_LOCALTIMER;
 -	clk->cpumask		= cpumask_of_cpu(cpu);
 +	clk->cpumask		= cpumask_of(cpu);
  	clk->shift		= 20;
  	clk->mult		= div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
  	clk->max_delta_ns	= clockevent_delta2ns(0xffffffff, clk);
@@@ -178,9 -171,9 +171,9 @@@
  /*
   * take a local timer down
   */
- void __cpuexit local_timer_stop(unsigned int cpu)
+ void __cpuexit local_timer_stop(void)
  {
- 	__raw_writel(0, TWD_BASE(cpu) + TWD_TIMER_CONTROL);
+ 	__raw_writel(0, twd_base + TWD_TIMER_CONTROL);
  }
  
  #else	/* CONFIG_LOCAL_TIMERS */
@@@ -190,8 -183,9 +183,9 @@@ static void dummy_timer_set_mode(enum c
  {
  }
  
- void __cpuinit local_timer_setup(unsigned int cpu)
+ void __cpuinit local_timer_setup(void)
  {
+ 	unsigned int cpu = smp_processor_id();
  	struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
  
  	clk->name		= "dummy_timer";
@@@ -199,7 -193,7 +193,7 @@@
  	clk->rating		= 200;
  	clk->set_mode		= dummy_timer_set_mode;
  	clk->broadcast		= smp_timer_broadcast;
 -	clk->cpumask		= cpumask_of_cpu(cpu);
 +	clk->cpumask		= cpumask_of(cpu);
  
  	clockevents_register_device(clk);
  }
diff --combined arch/arm/mach-sa1100/time.c
index 1cac4ac0b4b,8c5e727f3b7..711c0295c66
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@@ -2,8 -2,8 +2,8 @@@
   * linux/arch/arm/mach-sa1100/time.c
   *
   * Copyright (C) 1998 Deborah Wallach.
-  * Twiddles  (C) 1999 	Hugo Fiennes <hugo@empeg.com>
-  * 
+  * Twiddles  (C) 1999 Hugo Fiennes <hugo@empeg.com>
+  *
   * 2000/03/29 (C) Nicolas Pitre <nico@cam.org>
   *	Rewritten: big cleanup, much simpler, better HZ accuracy.
   *
@@@ -73,6 -73,7 +73,6 @@@ static struct clock_event_device ckevt_
  	.features	= CLOCK_EVT_FEAT_ONESHOT,
  	.shift		= 32,
  	.rating		= 200,
 -	.cpumask	= CPU_MASK_CPU0,
  	.set_next_event	= sa1100_osmr0_set_next_event,
  	.set_mode	= sa1100_osmr0_set_mode,
  };
@@@ -109,7 -110,6 +109,7 @@@ static void __init sa1100_timer_init(vo
  		clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0);
  	ckevt_sa1100_osmr0.min_delta_ns =
  		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
 +	ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
  
  	cksrc_sa1100_oscr.mult =
  		clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
diff --combined arch/arm/mach-versatile/core.c
index a3f1933434e,df25aa13850..1c43494f5c4
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@@ -31,6 -31,7 +31,7 @@@
  #include <linux/cnt32_to_63.h>
  #include <linux/io.h>
  
+ #include <asm/clkdev.h>
  #include <asm/system.h>
  #include <mach/hardware.h>
  #include <asm/irq.h>
@@@ -373,22 -374,60 +374,60 @@@ static const struct icst307_params vers
  
  static void versatile_oscvco_set(struct clk *clk, struct icst307_vco vco)
  {
- 	void __iomem *sys_lock = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_LOCK_OFFSET;
- 	void __iomem *sys_osc = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_OSCCLCD_OFFSET;
+ 	void __iomem *sys = __io_address(VERSATILE_SYS_BASE);
+ 	void __iomem *sys_lock = sys + VERSATILE_SYS_LOCK_OFFSET;
  	u32 val;
  
- 	val = readl(sys_osc) & ~0x7ffff;
+ 	val = readl(sys + clk->oscoff) & ~0x7ffff;
  	val |= vco.v | (vco.r << 9) | (vco.s << 16);
  
  	writel(0xa05f, sys_lock);
- 	writel(val, sys_osc);
+ 	writel(val, sys + clk->oscoff);
  	writel(0, sys_lock);
  }
  
- static struct clk versatile_clcd_clk = {
- 	.name	= "CLCDCLK",
+ static struct clk osc4_clk = {
  	.params	= &versatile_oscvco_params,
- 	.setvco = versatile_oscvco_set,
+ 	.oscoff	= VERSATILE_SYS_OSCCLCD_OFFSET,
+ 	.setvco	= versatile_oscvco_set,
+ };
+ 
+ /*
+  * These are fixed clocks.
+  */
+ static struct clk ref24_clk = {
+ 	.rate	= 24000000,
+ };
+ 
+ static struct clk_lookup lookups[] __initdata = {
+ 	{	/* UART0 */
+ 		.dev_id		= "dev:f1",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* UART1 */
+ 		.dev_id		= "dev:f2",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* UART2 */
+ 		.dev_id		= "dev:f3",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* UART3 */
+ 		.dev_id		= "fpga:09",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* KMI0 */
+ 		.dev_id		= "fpga:06",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* KMI1 */
+ 		.dev_id		= "fpga:07",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* MMC0 */
+ 		.dev_id		= "fpga:05",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* MMC1 */
+ 		.dev_id		= "fpga:0b",
+ 		.clk		= &ref24_clk,
+ 	}, {	/* CLCD */
+ 		.dev_id		= "dev:20",
+ 		.clk		= &osc4_clk,
+ 	}
  };
  
  /*
@@@ -786,7 -825,8 +825,8 @@@ void __init versatile_init(void
  {
  	int i;
  
- 	clk_register(&versatile_clcd_clk);
+ 	for (i = 0; i < ARRAY_SIZE(lookups); i++)
+ 		clkdev_add(&lookups[i]);
  
  	platform_device_register(&versatile_flash_device);
  	platform_device_register(&versatile_i2c_device);
@@@ -965,7 -1005,7 +1005,7 @@@ static void __init versatile_timer_init
  	timer0_clockevent.min_delta_ns =
  		clockevent_delta2ns(0xf, &timer0_clockevent);
  
 -	timer0_clockevent.cpumask = cpumask_of_cpu(0);
 +	timer0_clockevent.cpumask = cpumask_of(0);
  	clockevents_register_device(&timer0_clockevent);
  }
  
diff --combined arch/sparc/kernel/irq_64.c
index 4aaf18e83c8,a3ea2bcb95d..cab8e028687
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@@ -312,8 -312,7 +312,8 @@@ static void sun4u_irq_enable(unsigned i
  	}
  }
  
 -static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
 +static void sun4u_set_affinity(unsigned int virt_irq,
 +			       const struct cpumask *mask)
  {
  	sun4u_irq_enable(virt_irq);
  }
@@@ -363,8 -362,7 +363,8 @@@ static void sun4v_irq_enable(unsigned i
  		       ino, err);
  }
  
 -static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
 +static void sun4v_set_affinity(unsigned int virt_irq,
 +			       const struct cpumask *mask)
  {
  	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
  	unsigned long cpuid = irq_choose_cpu(virt_irq);
@@@ -431,8 -429,7 +431,8 @@@ static void sun4v_virq_enable(unsigned 
  		       dev_handle, dev_ino, err);
  }
  
 -static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
 +static void sun4v_virt_set_affinity(unsigned int virt_irq,
 +				    const struct cpumask *mask)
  {
  	unsigned long cpuid, dev_handle, dev_ino;
  	int err;
@@@ -778,6 -775,69 +778,69 @@@ void do_softirq(void
  	local_irq_restore(flags);
  }
  
+ static void unhandled_perf_irq(struct pt_regs *regs)
+ {
+ 	unsigned long pcr, pic;
+ 
+ 	read_pcr(pcr);
+ 	read_pic(pic);
+ 
+ 	write_pcr(0);
+ 
+ 	printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
+ 	       smp_processor_id());
+ 	printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
+ 	       smp_processor_id(), pcr, pic);
+ }
+ 
+ /* Almost a direct copy of the powerpc PMC code.  */
+ static DEFINE_SPINLOCK(perf_irq_lock);
+ static void *perf_irq_owner_caller; /* mostly for debugging */
+ static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
+ 
+ /* Invoked from level 15 PIL handler in trap table.  */
+ void perfctr_irq(int irq, struct pt_regs *regs)
+ {
+ 	clear_softint(1 << irq);
+ 	perf_irq(regs);
+ }
+ 
+ int register_perfctr_intr(void (*handler)(struct pt_regs *))
+ {
+ 	int ret;
+ 
+ 	if (!handler)
+ 		return -EINVAL;
+ 
+ 	spin_lock(&perf_irq_lock);
+ 	if (perf_irq != unhandled_perf_irq) {
+ 		printk(KERN_WARNING "register_perfctr_intr: "
+ 		       "perf IRQ busy (reserved by caller %p)\n",
+ 		       perf_irq_owner_caller);
+ 		ret = -EBUSY;
+ 		goto out;
+ 	}
+ 
+ 	perf_irq_owner_caller = __builtin_return_address(0);
+ 	perf_irq = handler;
+ 
+ 	ret = 0;
+ out:
+ 	spin_unlock(&perf_irq_lock);
+ 
+ 	return ret;
+ }
+ EXPORT_SYMBOL_GPL(register_perfctr_intr);
+ 
+ void release_perfctr_intr(void (*handler)(struct pt_regs *))
+ {
+ 	spin_lock(&perf_irq_lock);
+ 	perf_irq_owner_caller = NULL;
+ 	perf_irq = unhandled_perf_irq;
+ 	spin_unlock(&perf_irq_lock);
+ }
+ EXPORT_SYMBOL_GPL(release_perfctr_intr);
+ 
  #ifdef CONFIG_HOTPLUG_CPU
  void fixup_irqs(void)
  {
@@@ -791,7 -851,7 +854,7 @@@
  		    !(irq_desc[irq].status & IRQ_PER_CPU)) {
  			if (irq_desc[irq].chip->set_affinity)
  				irq_desc[irq].chip->set_affinity(irq,
 -					irq_desc[irq].affinity);
 +					&irq_desc[irq].affinity);
  		}
  		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
  	}
diff --combined arch/sparc/kernel/of_device_64.c
index 4f6098d318e,46e231f7c5c..4873f28905b
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@@ -778,9 -778,9 +778,9 @@@ static unsigned int __init build_one_de
  out:
  	nid = of_node_to_nid(dp);
  	if (nid != -1) {
 -		cpumask_t numa_mask = node_to_cpumask(nid);
 +		cpumask_t numa_mask = *cpumask_of_node(nid);
  
 -		irq_set_affinity(irq, numa_mask);
 +		irq_set_affinity(irq, &numa_mask);
  	}
  
  	return irq;
@@@ -811,20 -811,20 +811,20 @@@ static struct of_device * __init scan_o
  
  	irq = of_get_property(dp, "interrupts", &len);
  	if (irq) {
- 		memcpy(op->irqs, irq, len);
  		op->num_irqs = len / 4;
+ 
+ 		/* Prevent overrunning the op->irqs[] array.  */
+ 		if (op->num_irqs > PROMINTR_MAX) {
+ 			printk(KERN_WARNING "%s: Too many irqs (%d), "
+ 			       "limiting to %d.\n",
+ 			       dp->full_name, op->num_irqs, PROMINTR_MAX);
+ 			op->num_irqs = PROMINTR_MAX;
+ 		}
+ 		memcpy(op->irqs, irq, op->num_irqs * 4);
  	} else {
  		op->num_irqs = 0;
  	}
  
- 	/* Prevent overrunning the op->irqs[] array.  */
- 	if (op->num_irqs > PROMINTR_MAX) {
- 		printk(KERN_WARNING "%s: Too many irqs (%d), "
- 		       "limiting to %d.\n",
- 		       dp->full_name, op->num_irqs, PROMINTR_MAX);
- 		op->num_irqs = PROMINTR_MAX;
- 	}
- 
  	build_device_resources(op, parent);
  	for (i = 0; i < op->num_irqs; i++)
  		op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]);
diff --combined arch/sparc/kernel/pci_msi.c
index 4ef282e8191,2e680f34f72..4ef282e8191
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@@ -286,9 -286,9 +286,9 @@@ static int bringup_one_msi_queue(struc
  
  	nid = pbm->numa_node;
  	if (nid != -1) {
 -		cpumask_t numa_mask = node_to_cpumask(nid);
 +		cpumask_t numa_mask = *cpumask_of_node(nid);
  
 -		irq_set_affinity(irq, numa_mask);
 +		irq_set_affinity(irq, &numa_mask);
  	}
  	err = request_irq(irq, sparc64_msiq_interrupt, 0,
  			  "MSIQ",
diff --combined arch/sparc/kernel/smp_32.c
index 1e5ac4e282e,e396c1f17a9..1e5ac4e282e
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@@ -39,6 -39,8 +39,6 @@@ volatile unsigned long cpu_callin_map[N
  unsigned char boot_cpu_id = 0;
  unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
  
 -cpumask_t cpu_online_map = CPU_MASK_NONE;
 -cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
  cpumask_t smp_commenced_mask = CPU_MASK_NONE;
  
  /* The only guaranteed locking primitive available on all Sparc
@@@ -332,7 -334,7 +332,7 @@@ void __init smp_setup_cpu_possible_map(
  	instance = 0;
  	while (!cpu_find_by_instance(instance, NULL, &mid)) {
  		if (mid < NR_CPUS) {
 -			cpu_set(mid, phys_cpu_present_map);
 +			cpu_set(mid, cpu_possible_map);
  			cpu_set(mid, cpu_present_map);
  		}
  		instance++;
@@@ -352,7 -354,7 +352,7 @@@ void __init smp_prepare_boot_cpu(void
  
  	current_thread_info()->cpu = cpuid;
  	cpu_set(cpuid, cpu_online_map);
 -	cpu_set(cpuid, phys_cpu_present_map);
 +	cpu_set(cpuid, cpu_possible_map);
  }
  
  int __cpuinit __cpu_up(unsigned int cpu)
diff --combined arch/sparc/kernel/smp_64.c
index a97b8822c22,bfe99d82d45..46329799f34
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@@ -49,10 -49,14 +49,10 @@@
  
  int sparc64_multi_core __read_mostly;
  
 -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
 -cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
  DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
  cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
  	{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
  
 -EXPORT_SYMBOL(cpu_possible_map);
 -EXPORT_SYMBOL(cpu_online_map);
  EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
  EXPORT_SYMBOL(cpu_core_map);
  
@@@ -159,7 -163,7 +159,7 @@@ static inline long get_delta (long *rt
  	for (i = 0; i < NUM_ITERS; i++) {
  		t0 = tick_ops->get_tick();
  		go[MASTER] = 1;
- 		membar_storeload();
+ 		membar_safe("#StoreLoad");
  		while (!(tm = go[SLAVE]))
  			rmb();
  		go[SLAVE] = 0;
@@@ -253,7 -257,7 +253,7 @@@ static void smp_synchronize_one_tick(in
  
  	/* now let the client proceed into his loop */
  	go[MASTER] = 0;
- 	membar_storeload();
+ 	membar_safe("#StoreLoad");
  
  	spin_lock_irqsave(&itc_sync_lock, flags);
  	{
@@@ -263,7 -267,7 +263,7 @@@
  			go[MASTER] = 0;
  			wmb();
  			go[SLAVE] = tick_ops->get_tick();
- 			membar_storeload();
+ 			membar_safe("#StoreLoad");
  		}
  	}
  	spin_unlock_irqrestore(&itc_sync_lock, flags);
@@@ -769,7 -773,7 +769,7 @@@ static void xcall_deliver(u64 data0, u6
  
  	/* Setup the initial cpu list.  */
  	cnt = 0;
- 	for_each_cpu_mask_nr(i, *mask) {
+ 	for_each_cpu(i, mask) {
  		if (i == this_cpu || !cpu_online(i))
  			continue;
  		cpu_list[cnt++] = i;
@@@ -1118,7 -1122,6 +1118,6 @@@ void smp_capture(void
  		       smp_processor_id());
  #endif
  		penguins_are_doing_time = 1;
- 		membar_storestore_loadstore();
  		atomic_inc(&smp_capture_registry);
  		smp_cross_call(&xcall_capture, 0, 0, 0);
  		while (atomic_read(&smp_capture_registry) != ncpus)
@@@ -1138,13 -1141,13 +1137,13 @@@ void smp_release(void
  		       smp_processor_id());
  #endif
  		penguins_are_doing_time = 0;
- 		membar_storeload_storestore();
+ 		membar_safe("#StoreLoad");
  		atomic_dec(&smp_capture_registry);
  	}
  }
  
- /* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they
-  * can service tlb flush xcalls...
+ /* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE
+  * set, so they can service tlb flush xcalls...
   */
  extern void prom_world(int);
  
@@@ -1157,7 -1160,7 +1156,7 @@@ void smp_penguin_jailcell(int irq, stru
  	__asm__ __volatile__("flushw");
  	prom_world(1);
  	atomic_inc(&smp_capture_registry);
- 	membar_storeload_storestore();
+ 	membar_safe("#StoreLoad");
  	while (penguins_are_doing_time)
  		rmb();
  	atomic_dec(&smp_capture_registry);
diff --combined arch/sparc/kernel/sparc_ksyms_32.c
index 32d11a5fe3a,a4d45fc29b2..e1e97639231
--- a/arch/sparc/kernel/sparc_ksyms_32.c
+++ b/arch/sparc/kernel/sparc_ksyms_32.c
@@@ -61,7 -61,6 +61,6 @@@ extern void (*bzero_1page)(void *)
  extern void *__bzero(void *, size_t);
  extern void *__memscan_zero(void *, size_t);
  extern void *__memscan_generic(void *, int, size_t);
- extern int __memcmp(const void *, const void *, __kernel_size_t);
  extern int __strncmp(const char *, const char *, __kernel_size_t);
  
  extern int __ashrdi3(int, int);
@@@ -113,15 -112,17 +112,13 @@@ EXPORT_PER_CPU_SYMBOL(__cpu_data)
  #ifdef CONFIG_SMP
  /* IRQ implementation. */
  EXPORT_SYMBOL(synchronize_irq);
 -
 -/* CPU online map and active count. */
 -EXPORT_SYMBOL(cpu_online_map);
 -EXPORT_SYMBOL(phys_cpu_present_map);
  #endif
  
  EXPORT_SYMBOL(__udelay);
  EXPORT_SYMBOL(__ndelay);
  EXPORT_SYMBOL(rtc_lock);
- #ifdef CONFIG_SUN_AUXIO
  EXPORT_SYMBOL(set_auxio);
  EXPORT_SYMBOL(get_auxio);
- #endif
  EXPORT_SYMBOL(io_remap_pfn_range);
  
  #ifndef CONFIG_SMP
@@@ -209,7 -210,6 +206,6 @@@ EXPORT_SYMBOL(bzero_1page)
  EXPORT_SYMBOL(__bzero);
  EXPORT_SYMBOL(__memscan_zero);
  EXPORT_SYMBOL(__memscan_generic);
- EXPORT_SYMBOL(__memcmp);
  EXPORT_SYMBOL(__strncmp);
  EXPORT_SYMBOL(__memmove);
  
diff --combined arch/sparc/kernel/time_64.c
index 9df8f095a8b,141da375909..9df8f095a8b
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@@ -763,7 -763,7 +763,7 @@@ void __devinit setup_sparc64_timer(void
  	sevt = &__get_cpu_var(sparc64_events);
  
  	memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
 -	sevt->cpumask = cpumask_of_cpu(smp_processor_id());
 +	sevt->cpumask = cpumask_of(smp_processor_id());
  
  	clockevents_register_device(sevt);
  }
diff --combined arch/x86/include/asm/pci.h
index f8959c7a985,66834c41c04..a977de23cb4
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@@ -84,6 -84,8 +84,8 @@@ static inline void pci_dma_burst_advice
  static inline void early_quirks(void) { }
  #endif
  
+ extern void pci_iommu_alloc(void);
+ 
  #endif  /* __KERNEL__ */
  
  #ifdef CONFIG_X86_32
@@@ -100,9 -102,9 +102,9 @@@
  
  #ifdef CONFIG_NUMA
  /* Returns the node based on pci bus */
 -static inline int __pcibus_to_node(struct pci_bus *bus)
 +static inline int __pcibus_to_node(const struct pci_bus *bus)
  {
 -	struct pci_sysdata *sd = bus->sysdata;
 +	const struct pci_sysdata *sd = bus->sysdata;
  
  	return sd->node;
  }
@@@ -111,12 -113,6 +113,12 @@@ static inline cpumask_t __pcibus_to_cpu
  {
  	return node_to_cpumask(__pcibus_to_node(bus));
  }
 +
 +static inline const struct cpumask *
 +cpumask_of_pcibus(const struct pci_bus *bus)
 +{
 +	return cpumask_of_node(__pcibus_to_node(bus));
 +}
  #endif
  
  #endif /* _ASM_X86_PCI_H */
diff --combined arch/x86/kernel/hpet.c
index b5310ff1259,845ea097383..cd759ad9069
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@@ -248,7 -248,7 +248,7 @@@ static void hpet_legacy_clockevent_regi
  	 * Start hpet with the boot cpu mask and make it
  	 * global after the IO_APIC has been initialized.
  	 */
 -	hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
 +	hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
  	clockevents_register_device(&hpet_clockevent);
  	global_clock_event = &hpet_clockevent;
  	printk(KERN_DEBUG "hpet clockevent registered\n");
@@@ -303,7 -303,7 +303,7 @@@ static void hpet_set_mode(enum clock_ev
  			struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
  			hpet_setup_msi_irq(hdev->irq);
  			disable_irq(hdev->irq);
 -			irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
 +			irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
  			enable_irq(hdev->irq);
  		}
  		break;
@@@ -451,7 -451,7 +451,7 @@@ static int hpet_setup_irq(struct hpet_d
  		return -1;
  
  	disable_irq(dev->irq);
 -	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
 +	irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
  	enable_irq(dev->irq);
  
  	printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@@ -502,7 -502,7 +502,7 @@@ static void init_one_hpet_msi_clockeven
  	/* 5 usec minimum reprogramming delta. */
  	evt->min_delta_ns = 5000;
  
 -	evt->cpumask = cpumask_of_cpu(hdev->cpu);
 +	evt->cpumask = cpumask_of(hdev->cpu);
  	clockevents_register_device(evt);
  }
  
@@@ -813,7 -813,7 +813,7 @@@ int __init hpet_enable(void
  
  out_nohpet:
  	hpet_clear_mapping();
- 	boot_hpet_disable = 1;
+ 	hpet_address = 0;
  	return 0;
  }
  
@@@ -836,10 -836,11 +836,11 @@@ static __init int hpet_late_init(void
  
  		hpet_address = force_hpet_address;
  		hpet_enable();
- 		if (!hpet_virt_address)
- 			return -ENODEV;
  	}
  
+ 	if (!hpet_virt_address)
+ 		return -ENODEV;
+ 
  	hpet_reserve_platform_timers(hpet_readl(HPET_ID));
  
  	for_each_online_cpu(cpu) {
diff --combined arch/x86/kernel/io_apic.c
index 6dbf427175f,f6ea94b74da..e7745961ed3
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@@ -108,94 -108,253 +108,253 @@@ static int __init parse_noapic(char *st
  early_param("noapic", parse_noapic);
  
  struct irq_pin_list;
+ 
+ /*
+  * This is performance-critical, we want to do it O(1)
+  *
+  * the indexing order of this array favors 1:1 mappings
+  * between pins and IRQs.
+  */
+ 
+ struct irq_pin_list {
+ 	int apic, pin;
+ 	struct irq_pin_list *next;
+ };
+ 
+ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+ {
+ 	struct irq_pin_list *pin;
+ 	int node;
+ 
+ 	node = cpu_to_node(cpu);
+ 
+ 	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+ 	printk(KERN_DEBUG "  alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+ 
+ 	return pin;
+ }
+ 
  struct irq_cfg {
- 	unsigned int irq;
  	struct irq_pin_list *irq_2_pin;
  	cpumask_t domain;
  	cpumask_t old_domain;
  	unsigned move_cleanup_count;
  	u8 vector;
  	u8 move_in_progress : 1;
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ 	u8 move_desc_pending : 1;
+ #endif
  };
  
  /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+ #ifdef CONFIG_SPARSE_IRQ
+ static struct irq_cfg irq_cfgx[] = {
+ #else
  static struct irq_cfg irq_cfgx[NR_IRQS] = {
- 	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
- 	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
- 	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
- 	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
- 	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
- 	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
- 	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
- 	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
- 	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
- 	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
- 	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
- 	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
- 	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
- 	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
- 	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
- 	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+ #endif
+ 	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+ 	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+ 	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+ 	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+ 	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+ 	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+ 	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+ 	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+ 	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+ 	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+ 	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+ 	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+ 	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+ 	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+ 	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+ 	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
  };
  
- #define for_each_irq_cfg(irq, cfg)		\
- 	for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
+ void __init arch_early_irq_init(void)
+ {
+ 	struct irq_cfg *cfg;
+ 	struct irq_desc *desc;
+ 	int count;
+ 	int i;
+ 
+ 	cfg = irq_cfgx;
+ 	count = ARRAY_SIZE(irq_cfgx);
  
+ 	for (i = 0; i < count; i++) {
+ 		desc = irq_to_desc(i);
+ 		desc->chip_data = &cfg[i];
+ 	}
+ }
+ 
+ #ifdef CONFIG_SPARSE_IRQ
  static struct irq_cfg *irq_cfg(unsigned int irq)
  {
- 	return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ 	struct irq_cfg *cfg = NULL;
+ 	struct irq_desc *desc;
+ 
+ 	desc = irq_to_desc(irq);
+ 	if (desc)
+ 		cfg = desc->chip_data;
+ 
+ 	return cfg;
  }
  
- static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
  {
- 	return irq_cfg(irq);
+ 	struct irq_cfg *cfg;
+ 	int node;
+ 
+ 	node = cpu_to_node(cpu);
+ 
+ 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+ 	printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
+ 
+ 	return cfg;
  }
  
- /*
-  * Rough estimation of how many shared IRQs there are, can be changed
-  * anytime.
-  */
- #define MAX_PLUS_SHARED_IRQS NR_IRQS
- #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+ void arch_init_chip_data(struct irq_desc *desc, int cpu)
+ {
+ 	struct irq_cfg *cfg;
  
- /*
-  * This is performance-critical, we want to do it O(1)
-  *
-  * the indexing order of this array favors 1:1 mappings
-  * between pins and IRQs.
-  */
+ 	cfg = desc->chip_data;
+ 	if (!cfg) {
+ 		desc->chip_data = get_one_free_irq_cfg(cpu);
+ 		if (!desc->chip_data) {
+ 			printk(KERN_ERR "can not alloc irq_cfg\n");
+ 			BUG_ON(1);
+ 		}
+ 	}
+ }
  
- struct irq_pin_list {
- 	int apic, pin;
- 	struct irq_pin_list *next;
- };
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ 
+ static void
+ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+ {
+ 	struct irq_pin_list *old_entry, *head, *tail, *entry;
+ 
+ 	cfg->irq_2_pin = NULL;
+ 	old_entry = old_cfg->irq_2_pin;
+ 	if (!old_entry)
+ 		return;
+ 
+ 	entry = get_one_free_irq_2_pin(cpu);
+ 	if (!entry)
+ 		return;
+ 
+ 	entry->apic	= old_entry->apic;
+ 	entry->pin	= old_entry->pin;
+ 	head		= entry;
+ 	tail		= entry;
+ 	old_entry	= old_entry->next;
+ 	while (old_entry) {
+ 		entry = get_one_free_irq_2_pin(cpu);
+ 		if (!entry) {
+ 			entry = head;
+ 			while (entry) {
+ 				head = entry->next;
+ 				kfree(entry);
+ 				entry = head;
+ 			}
+ 			/* still use the old one */
+ 			return;
+ 		}
+ 		entry->apic	= old_entry->apic;
+ 		entry->pin	= old_entry->pin;
+ 		tail->next	= entry;
+ 		tail		= entry;
+ 		old_entry	= old_entry->next;
+ 	}
  
- static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
- static struct irq_pin_list *irq_2_pin_ptr;
+ 	tail->next = NULL;
+ 	cfg->irq_2_pin = head;
+ }
  
- static void __init irq_2_pin_init(void)
+ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
  {
- 	struct irq_pin_list *pin = irq_2_pin_head;
- 	int i;
+ 	struct irq_pin_list *entry, *next;
+ 
+ 	if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+ 		return;
  
- 	for (i = 1; i < PIN_MAP_SIZE; i++)
- 		pin[i-1].next = &pin[i];
+ 	entry = old_cfg->irq_2_pin;
  
- 	irq_2_pin_ptr = &pin[0];
+ 	while (entry) {
+ 		next = entry->next;
+ 		kfree(entry);
+ 		entry = next;
+ 	}
+ 	old_cfg->irq_2_pin = NULL;
  }
  
- static struct irq_pin_list *get_one_free_irq_2_pin(void)
+ void arch_init_copy_chip_data(struct irq_desc *old_desc,
+ 				 struct irq_desc *desc, int cpu)
  {
- 	struct irq_pin_list *pin = irq_2_pin_ptr;
+ 	struct irq_cfg *cfg;
+ 	struct irq_cfg *old_cfg;
  
- 	if (!pin)
- 		panic("can not get more irq_2_pin\n");
+ 	cfg = get_one_free_irq_cfg(cpu);
  
- 	irq_2_pin_ptr = pin->next;
- 	pin->next = NULL;
- 	return pin;
+ 	if (!cfg)
+ 		return;
+ 
+ 	desc->chip_data = cfg;
+ 
+ 	old_cfg = old_desc->chip_data;
+ 
+ 	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+ 
+ 	init_copy_irq_2_pin(old_cfg, cfg, cpu);
+ }
+ 
+ static void free_irq_cfg(struct irq_cfg *old_cfg)
+ {
+ 	kfree(old_cfg);
+ }
+ 
+ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+ {
+ 	struct irq_cfg *old_cfg, *cfg;
+ 
+ 	old_cfg = old_desc->chip_data;
+ 	cfg = desc->chip_data;
+ 
+ 	if (old_cfg == cfg)
+ 		return;
+ 
+ 	if (old_cfg) {
+ 		free_irq_2_pin(old_cfg, cfg);
+ 		free_irq_cfg(old_cfg);
+ 		old_desc->chip_data = NULL;
+ 	}
  }
  
+ static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+ {
+ 	struct irq_cfg *cfg = desc->chip_data;
+ 
+ 	if (!cfg->move_in_progress) {
+ 		/* it means that domain is not changed */
+ 		if (!cpus_intersects(desc->affinity, mask))
+ 			cfg->move_desc_pending = 1;
+ 	}
+ }
+ #endif
+ 
+ #else
+ static struct irq_cfg *irq_cfg(unsigned int irq)
+ {
+ 	return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ }
+ 
+ #endif
+ 
+ #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+ {
+ }
+ #endif
+ 
  struct io_apic {
  	unsigned int index;
  	unsigned int unused[3];
@@@ -237,11 -396,10 +396,10 @@@ static inline void io_apic_modify(unsig
  	writel(value, &io_apic->data);
  }
  
- static bool io_apic_level_ack_pending(unsigned int irq)
+ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
  {
  	struct irq_pin_list *entry;
  	unsigned long flags;
- 	struct irq_cfg *cfg = irq_cfg(irq);
  
  	spin_lock_irqsave(&ioapic_lock, flags);
  	entry = cfg->irq_2_pin;
@@@ -323,13 -481,12 +481,12 @@@ static void ioapic_mask_entry(int apic
  }
  
  #ifdef CONFIG_SMP
- static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
  {
  	int apic, pin;
- 	struct irq_cfg *cfg;
  	struct irq_pin_list *entry;
+ 	u8 vector = cfg->vector;
  
- 	cfg = irq_cfg(irq);
  	entry = cfg->irq_2_pin;
  	for (;;) {
  		unsigned int reg;
@@@ -359,37 -516,48 +516,49 @@@
  	}
  }
  
- static int assign_irq_vector(int irq, cpumask_t mask);
+ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
  
- static void set_ioapic_affinity_irq(unsigned int irq,
- 				    const struct cpumask *mask)
+ static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
  {
  	struct irq_cfg *cfg;
  	unsigned long flags;
  	unsigned int dest;
  	cpumask_t tmp;
- 	struct irq_desc *desc;
+ 	unsigned int irq;
  
- 	if (!cpumask_intersects(mask, cpu_online_mask))
+ 	cpus_and(tmp, mask, cpu_online_map);
+ 	if (cpus_empty(tmp))
  		return;
  
- 	cfg = irq_cfg(irq);
- 	if (assign_irq_vector(irq, *mask))
+ 	irq = desc->irq;
+ 	cfg = desc->chip_data;
+ 	if (assign_irq_vector(irq, cfg, mask))
  		return;
  
- 	cpumask_and(&tmp, &cfg->domain, mask);
+ 	set_extra_move_desc(desc, mask);
+ 
+ 	cpus_and(tmp, cfg->domain, mask);
  	dest = cpu_mask_to_apicid(tmp);
  	/*
  	 * Only the high 8 bits are valid.
  	 */
  	dest = SET_APIC_LOGICAL_ID(dest);
  
- 	desc = irq_to_desc(irq);
  	spin_lock_irqsave(&ioapic_lock, flags);
- 	__target_IO_APIC_irq(irq, dest, cfg->vector);
- 	cpumask_copy(&desc->affinity, mask);
+ 	__target_IO_APIC_irq(irq, dest, cfg);
+ 	desc->affinity = mask;
  	spin_unlock_irqrestore(&ioapic_lock, flags);
  }
+ 
 -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
++static void set_ioapic_affinity_irq(unsigned int irq,
++				    const struct cpumask *mask)
+ {
+ 	struct irq_desc *desc;
+ 
+ 	desc = irq_to_desc(irq);
+ 
 -	set_ioapic_affinity_irq_desc(desc, mask);
++	set_ioapic_affinity_irq_desc(desc, *mask);
+ }
  #endif /* CONFIG_SMP */
  
  /*
@@@ -397,16 -565,18 +566,18 @@@
   * shared ISA-space IRQs, so we have to support them. We are super
   * fast in the common case, and fast for shared ISA-space IRQs.
   */
- static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
  {
- 	struct irq_cfg *cfg;
  	struct irq_pin_list *entry;
  
- 	/* first time to refer irq_cfg, so with new */
- 	cfg = irq_cfg_alloc(irq);
  	entry = cfg->irq_2_pin;
  	if (!entry) {
- 		entry = get_one_free_irq_2_pin();
+ 		entry = get_one_free_irq_2_pin(cpu);
+ 		if (!entry) {
+ 			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+ 					apic, pin);
+ 			return;
+ 		}
  		cfg->irq_2_pin = entry;
  		entry->apic = apic;
  		entry->pin = pin;
@@@ -421,7 -591,7 +592,7 @@@
  		entry = entry->next;
  	}
  
- 	entry->next = get_one_free_irq_2_pin();
+ 	entry->next = get_one_free_irq_2_pin(cpu);
  	entry = entry->next;
  	entry->apic = apic;
  	entry->pin = pin;
@@@ -430,11 -600,10 +601,10 @@@
  /*
   * Reroute an IRQ to a different pin.
   */
- static void __init replace_pin_at_irq(unsigned int irq,
+ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
  				      int oldapic, int oldpin,
  				      int newapic, int newpin)
  {
- 	struct irq_cfg *cfg = irq_cfg(irq);
  	struct irq_pin_list *entry = cfg->irq_2_pin;
  	int replaced = 0;
  
@@@ -451,18 -620,16 +621,16 @@@
  
  	/* why? call replace before add? */
  	if (!replaced)
- 		add_pin_to_irq(irq, newapic, newpin);
+ 		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
  }
  
- static inline void io_apic_modify_irq(unsigned int irq,
+ static inline void io_apic_modify_irq(struct irq_cfg *cfg,
  				int mask_and, int mask_or,
  				void (*final)(struct irq_pin_list *entry))
  {
  	int pin;
- 	struct irq_cfg *cfg;
  	struct irq_pin_list *entry;
  
- 	cfg = irq_cfg(irq);
  	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
  		unsigned int reg;
  		pin = entry->pin;
@@@ -475,9 -642,9 +643,9 @@@
  	}
  }
  
- static void __unmask_IO_APIC_irq(unsigned int irq)
+ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
  {
- 	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+ 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
  }
  
  #ifdef CONFIG_X86_64
@@@ -492,47 -659,64 +660,64 @@@ void io_apic_sync(struct irq_pin_list *
  	readl(&io_apic->data);
  }
  
- static void __mask_IO_APIC_irq(unsigned int irq)
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
  {
- 	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+ 	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
  }
  #else /* CONFIG_X86_32 */
- static void __mask_IO_APIC_irq(unsigned int irq)
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
  {
- 	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+ 	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
  }
  
- static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+ static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
  {
- 	io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+ 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
  			IO_APIC_REDIR_MASKED, NULL);
  }
  
- static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
  {
- 	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+ 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
  			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
  }
  #endif /* CONFIG_X86_32 */
  
- static void mask_IO_APIC_irq (unsigned int irq)
+ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
  {
+ 	struct irq_cfg *cfg = desc->chip_data;
  	unsigned long flags;
  
+ 	BUG_ON(!cfg);
+ 
  	spin_lock_irqsave(&ioapic_lock, flags);
- 	__mask_IO_APIC_irq(irq);
+ 	__mask_IO_APIC_irq(cfg);
  	spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  
- static void unmask_IO_APIC_irq (unsigned int irq)
+ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
  {
+ 	struct irq_cfg *cfg = desc->chip_data;
  	unsigned long flags;
  
  	spin_lock_irqsave(&ioapic_lock, flags);
- 	__unmask_IO_APIC_irq(irq);
+ 	__unmask_IO_APIC_irq(cfg);
  	spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  
+ static void mask_IO_APIC_irq(unsigned int irq)
+ {
+ 	struct irq_desc *desc = irq_to_desc(irq);
+ 
+ 	mask_IO_APIC_irq_desc(desc);
+ }
+ static void unmask_IO_APIC_irq(unsigned int irq)
+ {
+ 	struct irq_desc *desc = irq_to_desc(irq);
+ 
+ 	unmask_IO_APIC_irq_desc(desc);
+ }
+ 
  static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
  {
  	struct IO_APIC_route_entry entry;
@@@ -809,7 -993,7 +994,7 @@@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vecto
   */
  static int EISA_ELCR(unsigned int irq)
  {
- 	if (irq < 16) {
+ 	if (irq < NR_IRQS_LEGACY) {
  		unsigned int port = 0x4d0 + (irq >> 3);
  		return (inb(port) >> (irq & 7)) & 1;
  	}
@@@ -1034,7 -1218,7 +1219,7 @@@ void unlock_vector_lock(void
  	spin_unlock(&vector_lock);
  }
  
- static int __assign_irq_vector(int irq, cpumask_t mask)
+ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
  {
  	/*
  	 * NOTE! The local APIC isn't very good at handling
@@@ -1050,16 -1234,13 +1235,13 @@@
  	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
  	unsigned int old_vector;
  	int cpu;
- 	struct irq_cfg *cfg;
  
- 	cfg = irq_cfg(irq);
+ 	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+ 		return -EBUSY;
  
  	/* Only try and allocate irqs on cpus that are present */
  	cpus_and(mask, mask, cpu_online_map);
  
- 	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
- 		return -EBUSY;
- 
  	old_vector = cfg->vector;
  	if (old_vector) {
  		cpumask_t tmp;
@@@ -1113,24 -1294,22 +1295,22 @@@ next
  	return -ENOSPC;
  }
  
- static int assign_irq_vector(int irq, cpumask_t mask)
+ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
  {
  	int err;
  	unsigned long flags;
  
  	spin_lock_irqsave(&vector_lock, flags);
- 	err = __assign_irq_vector(irq, mask);
+ 	err = __assign_irq_vector(irq, cfg, mask);
  	spin_unlock_irqrestore(&vector_lock, flags);
  	return err;
  }
  
- static void __clear_irq_vector(int irq)
+ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
  {
- 	struct irq_cfg *cfg;
  	cpumask_t mask;
  	int cpu, vector;
  
- 	cfg = irq_cfg(irq);
  	BUG_ON(!cfg->vector);
  
  	vector = cfg->vector;
@@@ -1162,9 -1341,13 +1342,13 @@@ void __setup_vector_irq(int cpu
  	/* This function must be called with vector_lock held */
  	int irq, vector;
  	struct irq_cfg *cfg;
+ 	struct irq_desc *desc;
  
  	/* Mark the inuse vectors */
- 	for_each_irq_cfg(irq, cfg) {
+ 	for_each_irq_desc(irq, desc) {
+ 		if (!desc)
+ 			continue;
+ 		cfg = desc->chip_data;
  		if (!cpu_isset(cpu, cfg->domain))
  			continue;
  		vector = cfg->vector;
@@@ -1215,11 -1398,8 +1399,8 @@@ static inline int IO_APIC_irq_trigger(i
  }
  #endif
  
- static void ioapic_register_intr(int irq, unsigned long trigger)
+ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
  {
- 	struct irq_desc *desc;
- 
- 	desc = irq_to_desc(irq);
  
  	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
  	    trigger == IOAPIC_LEVEL)
@@@ -1311,7 -1491,7 +1492,7 @@@ static int setup_ioapic_entry(int apic
  	return 0;
  }
  
- static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
  			      int trigger, int polarity)
  {
  	struct irq_cfg *cfg;
@@@ -1321,10 -1501,10 +1502,10 @@@
  	if (!IO_APIC_IRQ(irq))
  		return;
  
- 	cfg = irq_cfg(irq);
+ 	cfg = desc->chip_data;
  
  	mask = TARGET_CPUS;
- 	if (assign_irq_vector(irq, mask))
+ 	if (assign_irq_vector(irq, cfg, mask))
  		return;
  
  	cpus_and(mask, cfg->domain, mask);
@@@ -1341,12 -1521,12 +1522,12 @@@
  			       cfg->vector)) {
  		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
  		       mp_ioapics[apic].mp_apicid, pin);
- 		__clear_irq_vector(irq);
+ 		__clear_irq_vector(irq, cfg);
  		return;
  	}
  
- 	ioapic_register_intr(irq, trigger);
- 	if (irq < 16)
+ 	ioapic_register_intr(irq, desc, trigger);
+ 	if (irq < NR_IRQS_LEGACY)
  		disable_8259A_irq(irq);
  
  	ioapic_write_entry(apic, pin, entry);
@@@ -1356,6 -1536,9 +1537,9 @@@ static void __init setup_IO_APIC_irqs(v
  {
  	int apic, pin, idx, irq;
  	int notcon = 0;
+ 	struct irq_desc *desc;
+ 	struct irq_cfg *cfg;
+ 	int cpu = boot_cpu_id;
  
  	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
  
@@@ -1387,9 -1570,15 +1571,15 @@@
  			if (multi_timer_check(apic, irq))
  				continue;
  #endif
- 			add_pin_to_irq(irq, apic, pin);
+ 			desc = irq_to_desc_alloc_cpu(irq, cpu);
+ 			if (!desc) {
+ 				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ 				continue;
+ 			}
+ 			cfg = desc->chip_data;
+ 			add_pin_to_irq_cpu(cfg, cpu, apic, pin);
  
- 			setup_IO_APIC_irq(apic, pin, irq,
+ 			setup_IO_APIC_irq(apic, pin, irq, desc,
  					irq_trigger(idx), irq_polarity(idx));
  		}
  	}
@@@ -1448,6 -1637,7 +1638,7 @@@ __apicdebuginit(void) print_IO_APIC(voi
  	union IO_APIC_reg_03 reg_03;
  	unsigned long flags;
  	struct irq_cfg *cfg;
+ 	struct irq_desc *desc;
  	unsigned int irq;
  
  	if (apic_verbosity == APIC_QUIET)
@@@ -1537,8 -1727,13 +1728,13 @@@
  	}
  	}
  	printk(KERN_DEBUG "IRQ to pin mappings:\n");
- 	for_each_irq_cfg(irq, cfg) {
- 		struct irq_pin_list *entry = cfg->irq_2_pin;
+ 	for_each_irq_desc(irq, desc) {
+ 		struct irq_pin_list *entry;
+ 
+ 		if (!desc)
+ 			continue;
+ 		cfg = desc->chip_data;
+ 		entry = cfg->irq_2_pin;
  		if (!entry)
  			continue;
  		printk(KERN_DEBUG "IRQ%d ", irq);
@@@ -2022,14 -2217,16 +2218,16 @@@ static unsigned int startup_ioapic_irq(
  {
  	int was_pending = 0;
  	unsigned long flags;
+ 	struct irq_cfg *cfg;
  
  	spin_lock_irqsave(&ioapic_lock, flags);
- 	if (irq < 16) {
+ 	if (irq < NR_IRQS_LEGACY) {
  		disable_8259A_irq(irq);
  		if (i8259A_irq_pending(irq))
  			was_pending = 1;
  	}
- 	__unmask_IO_APIC_irq(irq);
+ 	cfg = irq_cfg(irq);
+ 	__unmask_IO_APIC_irq(cfg);
  	spin_unlock_irqrestore(&ioapic_lock, flags);
  
  	return was_pending;
@@@ -2092,35 -2289,37 +2290,37 @@@ static DECLARE_DELAYED_WORK(ir_migratio
   * as simple as edge triggered migration and we can do the irq migration
   * with a simple atomic update to IO-APIC RTE.
   */
- static void migrate_ioapic_irq(int irq, cpumask_t mask)
+ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
  {
  	struct irq_cfg *cfg;
- 	struct irq_desc *desc;
  	cpumask_t tmp, cleanup_mask;
  	struct irte irte;
  	int modify_ioapic_rte;
  	unsigned int dest;
  	unsigned long flags;
+ 	unsigned int irq;
  
  	cpus_and(tmp, mask, cpu_online_map);
  	if (cpus_empty(tmp))
  		return;
  
+ 	irq = desc->irq;
  	if (get_irte(irq, &irte))
  		return;
  
- 	if (assign_irq_vector(irq, mask))
+ 	cfg = desc->chip_data;
+ 	if (assign_irq_vector(irq, cfg, mask))
  		return;
  
- 	cfg = irq_cfg(irq);
+ 	set_extra_move_desc(desc, mask);
+ 
  	cpus_and(tmp, cfg->domain, mask);
  	dest = cpu_mask_to_apicid(tmp);
  
- 	desc = irq_to_desc(irq);
  	modify_ioapic_rte = desc->status & IRQ_LEVEL;
  	if (modify_ioapic_rte) {
  		spin_lock_irqsave(&ioapic_lock, flags);
- 		__target_IO_APIC_irq(irq, dest, cfg->vector);
+ 		__target_IO_APIC_irq(irq, dest, cfg);
  		spin_unlock_irqrestore(&ioapic_lock, flags);
  	}
  
@@@ -2142,14 -2341,14 +2342,14 @@@
  	desc->affinity = mask;
  }
  
- static int migrate_irq_remapped_level(int irq)
+ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
  {
  	int ret = -1;
- 	struct irq_desc *desc = irq_to_desc(irq);
+ 	struct irq_cfg *cfg = desc->chip_data;
  
- 	mask_IO_APIC_irq(irq);
+ 	mask_IO_APIC_irq_desc(desc);
  
- 	if (io_apic_level_ack_pending(irq)) {
+ 	if (io_apic_level_ack_pending(cfg)) {
  		/*
  		 * Interrupt in progress. Migrating irq now will change the
  		 * vector information in the IO-APIC RTE and that will confuse
@@@ -2161,14 -2360,15 +2361,15 @@@
  	}
  
  	/* everthing is clear. we have right of way */
- 	migrate_ioapic_irq(irq, desc->pending_mask);
+ 	migrate_ioapic_irq_desc(desc, desc->pending_mask);
  
  	ret = 0;
  	desc->status &= ~IRQ_MOVE_PENDING;
  	cpus_clear(desc->pending_mask);
  
  unmask:
- 	unmask_IO_APIC_irq(irq);
+ 	unmask_IO_APIC_irq_desc(desc);
+ 
  	return ret;
  }
  
@@@ -2178,6 -2378,9 +2379,9 @@@ static void ir_irq_migration(struct wor
  	struct irq_desc *desc;
  
  	for_each_irq_desc(irq, desc) {
+ 		if (!desc)
+ 			continue;
+ 
  		if (desc->status & IRQ_MOVE_PENDING) {
  			unsigned long flags;
  
@@@ -2189,7 -2392,7 +2393,7 @@@
  				continue;
  			}
  
 -			desc->chip->set_affinity(irq, desc->pending_mask);
 +			desc->chip->set_affinity(irq, &desc->pending_mask);
  			spin_unlock_irqrestore(&desc->lock, flags);
  		}
  	}
@@@ -2198,19 -2401,22 +2402,23 @@@
  /*
   * Migrates the IRQ destination in the process context.
   */
- static void set_ir_ioapic_affinity_irq(unsigned int irq,
- 				       const struct cpumask *mask)
+ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
  {
- 	struct irq_desc *desc = irq_to_desc(irq);
- 
  	if (desc->status & IRQ_LEVEL) {
  		desc->status |= IRQ_MOVE_PENDING;
- 		cpumask_copy(&desc->pending_mask, mask);
- 		migrate_irq_remapped_level(irq);
+ 		desc->pending_mask = mask;
+ 		migrate_irq_remapped_level_desc(desc);
  		return;
  	}
  
- 	migrate_ioapic_irq(irq, *mask);
+ 	migrate_ioapic_irq_desc(desc, mask);
+ }
 -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
++static void set_ir_ioapic_affinity_irq(unsigned int irq,
++				       const struct cpumask *mask)
+ {
+ 	struct irq_desc *desc = irq_to_desc(irq);
+ 
 -	set_ir_ioapic_affinity_irq_desc(desc, mask);
++	set_ir_ioapic_affinity_irq_desc(desc, *mask);
  }
  #endif
  
@@@ -2229,6 -2435,9 +2437,9 @@@ asmlinkage void smp_irq_move_cleanup_in
  		struct irq_cfg *cfg;
  		irq = __get_cpu_var(vector_irq)[vector];
  
+ 		if (irq == -1)
+ 			continue;
+ 
  		desc = irq_to_desc(irq);
  		if (!desc)
  			continue;
@@@ -2250,19 -2459,40 +2461,40 @@@ unlock
  	irq_exit();
  }
  
- static void irq_complete_move(unsigned int irq)
+ static void irq_complete_move(struct irq_desc **descp)
  {
- 	struct irq_cfg *cfg = irq_cfg(irq);
+ 	struct irq_desc *desc = *descp;
+ 	struct irq_cfg *cfg = desc->chip_data;
  	unsigned vector, me;
  
- 	if (likely(!cfg->move_in_progress))
+ 	if (likely(!cfg->move_in_progress)) {
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ 		if (likely(!cfg->move_desc_pending))
+ 			return;
+ 
+ 		/* domain has not changed, but affinity did */
+ 		me = smp_processor_id();
+ 		if (cpu_isset(me, desc->affinity)) {
+ 			*descp = desc = move_irq_desc(desc, me);
+ 			/* get the new one */
+ 			cfg = desc->chip_data;
+ 			cfg->move_desc_pending = 0;
+ 		}
+ #endif
  		return;
+ 	}
  
  	vector = ~get_irq_regs()->orig_ax;
  	me = smp_processor_id();
  	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
  		cpumask_t cleanup_mask;
  
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ 		*descp = desc = move_irq_desc(desc, me);
+ 		/* get the new one */
+ 		cfg = desc->chip_data;
+ #endif
+ 
  		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
  		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
  		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
@@@ -2270,8 -2500,9 +2502,9 @@@
  	}
  }
  #else
- static inline void irq_complete_move(unsigned int irq) {}
+ static inline void irq_complete_move(struct irq_desc **descp) {}
  #endif
+ 
  #ifdef CONFIG_INTR_REMAP
  static void ack_x2apic_level(unsigned int irq)
  {
@@@ -2282,11 -2513,14 +2515,14 @@@ static void ack_x2apic_edge(unsigned in
  {
  	ack_x2APIC_irq();
  }
+ 
  #endif
  
  static void ack_apic_edge(unsigned int irq)
  {
- 	irq_complete_move(irq);
+ 	struct irq_desc *desc = irq_to_desc(irq);
+ 
+ 	irq_complete_move(&desc);
  	move_native_irq(irq);
  	ack_APIC_irq();
  }
@@@ -2295,18 -2529,21 +2531,21 @@@ atomic_t irq_mis_count
  
  static void ack_apic_level(unsigned int irq)
  {
+ 	struct irq_desc *desc = irq_to_desc(irq);
+ 
  #ifdef CONFIG_X86_32
  	unsigned long v;
  	int i;
  #endif
+ 	struct irq_cfg *cfg;
  	int do_unmask_irq = 0;
  
- 	irq_complete_move(irq);
+ 	irq_complete_move(&desc);
  #ifdef CONFIG_GENERIC_PENDING_IRQ
  	/* If we are moving the irq we need to mask it */
- 	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+ 	if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
  		do_unmask_irq = 1;
- 		mask_IO_APIC_irq(irq);
+ 		mask_IO_APIC_irq_desc(desc);
  	}
  #endif
  
@@@ -2330,7 -2567,8 +2569,8 @@@
  	* operation to prevent an edge-triggered interrupt escaping meanwhile.
  	* The idea is from Manfred Spraul.  --macro
  	*/
- 	i = irq_cfg(irq)->vector;
+ 	cfg = desc->chip_data;
+ 	i = cfg->vector;
  
  	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
  #endif
@@@ -2369,17 -2607,18 +2609,18 @@@
  		 * accurate and is causing problems then it is a hardware bug
  		 * and you can go talk to the chipset vendor about it.
  		 */
- 		if (!io_apic_level_ack_pending(irq))
+ 		cfg = desc->chip_data;
+ 		if (!io_apic_level_ack_pending(cfg))
  			move_masked_irq(irq);
- 		unmask_IO_APIC_irq(irq);
+ 		unmask_IO_APIC_irq_desc(desc);
  	}
  
  #ifdef CONFIG_X86_32
  	if (!(v & (1 << (i & 0x1f)))) {
  		atomic_inc(&irq_mis_count);
  		spin_lock(&ioapic_lock);
- 		__mask_and_edge_IO_APIC_irq(irq);
- 		__unmask_and_level_IO_APIC_irq(irq);
+ 		__mask_and_edge_IO_APIC_irq(cfg);
+ 		__unmask_and_level_IO_APIC_irq(cfg);
  		spin_unlock(&ioapic_lock);
  	}
  #endif
@@@ -2430,20 -2669,22 +2671,22 @@@ static inline void init_IO_APIC_traps(v
  	 * Also, we've got to be careful not to trash gate
  	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
  	 */
- 	for_each_irq_cfg(irq, cfg) {
- 		if (IO_APIC_IRQ(irq) && !cfg->vector) {
+ 	for_each_irq_desc(irq, desc) {
+ 		if (!desc)
+ 			continue;
+ 
+ 		cfg = desc->chip_data;
+ 		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
  			/*
  			 * Hmm.. We don't have an entry for this,
  			 * so default to an old-fashioned 8259
  			 * interrupt if we can..
  			 */
- 			if (irq < 16)
+ 			if (irq < NR_IRQS_LEGACY)
  				make_8259A_irq(irq);
- 			else {
- 				desc = irq_to_desc(irq);
+ 			else
  				/* Strange. Oh, well.. */
  				desc->chip = &no_irq_chip;
- 			}
  		}
  	}
  }
@@@ -2468,7 -2709,7 +2711,7 @@@ static void unmask_lapic_irq(unsigned i
  	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
  }
  
- static void ack_lapic_irq (unsigned int irq)
+ static void ack_lapic_irq(unsigned int irq)
  {
  	ack_APIC_irq();
  }
@@@ -2480,11 -2721,8 +2723,8 @@@ static struct irq_chip lapic_chip __rea
  	.ack		= ack_lapic_irq,
  };
  
- static void lapic_register_intr(int irq)
+ static void lapic_register_intr(int irq, struct irq_desc *desc)
  {
- 	struct irq_desc *desc;
- 
- 	desc = irq_to_desc(irq);
  	desc->status &= ~IRQ_LEVEL;
  	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
  				      "edge");
@@@ -2588,7 -2826,9 +2828,9 @@@ int timer_through_8259 __initdata
   */
  static inline void __init check_timer(void)
  {
- 	struct irq_cfg *cfg = irq_cfg(0);
+ 	struct irq_desc *desc = irq_to_desc(0);
+ 	struct irq_cfg *cfg = desc->chip_data;
+ 	int cpu = boot_cpu_id;
  	int apic1, pin1, apic2, pin2;
  	unsigned long flags;
  	unsigned int ver;
@@@ -2603,7 -2843,7 +2845,7 @@@
  	 * get/set the timer IRQ vector:
  	 */
  	disable_8259A_irq(0);
- 	assign_irq_vector(0, TARGET_CPUS);
+ 	assign_irq_vector(0, cfg, TARGET_CPUS);
  
  	/*
  	 * As IRQ0 is to be enabled in the 8259A, the virtual
@@@ -2654,10 -2894,10 +2896,10 @@@
  		 * Ok, does IRQ0 through the IOAPIC work?
  		 */
  		if (no_pin1) {
- 			add_pin_to_irq(0, apic1, pin1);
+ 			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
  			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
  		}
- 		unmask_IO_APIC_irq(0);
+ 		unmask_IO_APIC_irq_desc(desc);
  		if (timer_irq_works()) {
  			if (nmi_watchdog == NMI_IO_APIC) {
  				setup_nmi();
@@@ -2683,9 -2923,9 +2925,9 @@@
  		/*
  		 * legacy devices should be connected to IO APIC #0
  		 */
- 		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+ 		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
  		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
- 		unmask_IO_APIC_irq(0);
+ 		unmask_IO_APIC_irq_desc(desc);
  		enable_8259A_irq(0);
  		if (timer_irq_works()) {
  			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@@ -2717,7 -2957,7 +2959,7 @@@
  	apic_printk(APIC_QUIET, KERN_INFO
  		    "...trying to set up timer as Virtual Wire IRQ...\n");
  
- 	lapic_register_intr(0);
+ 	lapic_register_intr(0, desc);
  	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
  	enable_8259A_irq(0);
  
@@@ -2902,22 -3142,26 +3144,26 @@@ unsigned int create_irq_nr(unsigned in
  	unsigned int irq;
  	unsigned int new;
  	unsigned long flags;
- 	struct irq_cfg *cfg_new;
- 
- 	irq_want = nr_irqs - 1;
+ 	struct irq_cfg *cfg_new = NULL;
+ 	int cpu = boot_cpu_id;
+ 	struct irq_desc *desc_new = NULL;
  
  	irq = 0;
  	spin_lock_irqsave(&vector_lock, flags);
- 	for (new = irq_want; new > 0; new--) {
+ 	for (new = irq_want; new < NR_IRQS; new++) {
  		if (platform_legacy_irq(new))
  			continue;
- 		cfg_new = irq_cfg(new);
- 		if (cfg_new && cfg_new->vector != 0)
+ 
+ 		desc_new = irq_to_desc_alloc_cpu(new, cpu);
+ 		if (!desc_new) {
+ 			printk(KERN_INFO "can not get irq_desc for %d\n", new);
  			continue;
- 		/* check if need to create one */
- 		if (!cfg_new)
- 			cfg_new = irq_cfg_alloc(new);
- 		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+ 		}
+ 		cfg_new = desc_new->chip_data;
+ 
+ 		if (cfg_new->vector != 0)
+ 			continue;
+ 		if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
  			irq = new;
  		break;
  	}
@@@ -2925,15 -3169,21 +3171,21 @@@
  
  	if (irq > 0) {
  		dynamic_irq_init(irq);
+ 		/* restore it, in case dynamic_irq_init clear it */
+ 		if (desc_new)
+ 			desc_new->chip_data = cfg_new;
  	}
  	return irq;
  }
  
+ static int nr_irqs_gsi = NR_IRQS_LEGACY;
  int create_irq(void)
  {
+ 	unsigned int irq_want;
  	int irq;
  
- 	irq = create_irq_nr(nr_irqs - 1);
+ 	irq_want = nr_irqs_gsi;
+ 	irq = create_irq_nr(irq_want);
  
  	if (irq == 0)
  		irq = -1;
@@@ -2944,14 -3194,22 +3196,22 @@@
  void destroy_irq(unsigned int irq)
  {
  	unsigned long flags;
+ 	struct irq_cfg *cfg;
+ 	struct irq_desc *desc;
  
+ 	/* store it, in case dynamic_irq_cleanup clear it */
+ 	desc = irq_to_desc(irq);
+ 	cfg = desc->chip_data;
  	dynamic_irq_cleanup(irq);
+ 	/* connect back irq_cfg */
+ 	if (desc)
+ 		desc->chip_data = cfg;
  
  #ifdef CONFIG_INTR_REMAP
  	free_irte(irq);
  #endif
  	spin_lock_irqsave(&vector_lock, flags);
- 	__clear_irq_vector(irq);
+ 	__clear_irq_vector(irq, cfg);
  	spin_unlock_irqrestore(&vector_lock, flags);
  }
  
@@@ -2966,12 -3224,12 +3226,12 @@@ static int msi_compose_msg(struct pci_d
  	unsigned dest;
  	cpumask_t tmp;
  
+ 	cfg = irq_cfg(irq);
  	tmp = TARGET_CPUS;
- 	err = assign_irq_vector(irq, tmp);
+ 	err = assign_irq_vector(irq, cfg, tmp);
  	if (err)
  		return err;
  
- 	cfg = irq_cfg(irq);
  	cpus_and(tmp, cfg->domain, tmp);
  	dest = cpu_mask_to_apicid(tmp);
  
@@@ -3027,61 -3285,64 +3287,63 @@@
  }
  
  #ifdef CONFIG_SMP
 -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 +static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
  {
+ 	struct irq_desc *desc = irq_to_desc(irq);
  	struct irq_cfg *cfg;
  	struct msi_msg msg;
  	unsigned int dest;
  	cpumask_t tmp;
- 	struct irq_desc *desc;
  
 -	cpus_and(tmp, mask, cpu_online_map);
 -	if (cpus_empty(tmp))
 +	if (!cpumask_intersects(mask, cpu_online_mask))
  		return;
  
- 	if (assign_irq_vector(irq, *mask))
+ 	cfg = desc->chip_data;
 -	if (assign_irq_vector(irq, cfg, mask))
++	if (assign_irq_vector(irq, cfg, *mask))
  		return;
  
- 	cfg = irq_cfg(irq);
 -	set_extra_move_desc(desc, mask);
++	set_extra_move_desc(desc, *mask);
+ 
 -	cpus_and(tmp, cfg->domain, mask);
 +	cpumask_and(&tmp, &cfg->domain, mask);
  	dest = cpu_mask_to_apicid(tmp);
  
- 	read_msi_msg(irq, &msg);
+ 	read_msi_msg_desc(desc, &msg);
  
  	msg.data &= ~MSI_DATA_VECTOR_MASK;
  	msg.data |= MSI_DATA_VECTOR(cfg->vector);
  	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
  	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
- 	write_msi_msg(irq, &msg);
- 	desc = irq_to_desc(irq);
+ 	write_msi_msg_desc(desc, &msg);
 -	desc->affinity = mask;
 +	cpumask_copy(&desc->affinity, mask);
  }
- 
  #ifdef CONFIG_INTR_REMAP
  /*
   * Migrate the MSI irq to another cpumask. This migration is
   * done in the process context using interrupt-remapping hardware.
   */
 -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 +static void ir_set_msi_irq_affinity(unsigned int irq,
 +				    const struct cpumask *mask)
  {
+ 	struct irq_desc *desc = irq_to_desc(irq);
  	struct irq_cfg *cfg;
  	unsigned int dest;
  	cpumask_t tmp, cleanup_mask;
  	struct irte irte;
- 	struct irq_desc *desc;
  
 -	cpus_and(tmp, mask, cpu_online_map);
 -	if (cpus_empty(tmp))
 +	if (!cpumask_intersects(mask, cpu_online_mask))
  		return;
  
  	if (get_irte(irq, &irte))
  		return;
  
- 	if (assign_irq_vector(irq, *mask))
+ 	cfg = desc->chip_data;
 -	if (assign_irq_vector(irq, cfg, mask))
++	if (assign_irq_vector(irq, cfg, *mask))
  		return;
  
- 	cfg = irq_cfg(irq);
 -	set_extra_move_desc(desc, mask);
++	set_extra_move_desc(desc, *mask);
+ 
 -	cpus_and(tmp, cfg->domain, mask);
 +	cpumask_and(&tmp, &cfg->domain, mask);
  	dest = cpu_mask_to_apicid(tmp);
  
  	irte.vector = cfg->vector;
@@@ -3104,9 -3365,9 +3366,9 @@@
  		cfg->move_in_progress = 0;
  	}
  
- 	desc = irq_to_desc(irq);
 -	desc->affinity = mask;
 +	cpumask_copy(&desc->affinity, mask);
  }
+ 
  #endif
  #endif /* CONFIG_SMP */
  
@@@ -3165,7 -3426,7 +3427,7 @@@ static int msi_alloc_irte(struct pci_de
  }
  #endif
  
- static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
  {
  	int ret;
  	struct msi_msg msg;
@@@ -3174,7 -3435,7 +3436,7 @@@
  	if (ret < 0)
  		return ret;
  
- 	set_irq_msi(irq, desc);
+ 	set_irq_msi(irq, msidesc);
  	write_msi_msg(irq, &msg);
  
  #ifdef CONFIG_INTR_REMAP
@@@ -3194,26 -3455,13 +3456,13 @@@
  	return 0;
  }
  
- static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
- {
- 	unsigned int irq;
- 
- 	irq = dev->bus->number;
- 	irq <<= 8;
- 	irq |= dev->devfn;
- 	irq <<= 12;
- 
- 	return irq;
- }
- 
- int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
  {
  	unsigned int irq;
  	int ret;
  	unsigned int irq_want;
  
- 	irq_want = build_irq_for_pci_dev(dev) + 0x100;
- 
+ 	irq_want = nr_irqs_gsi;
  	irq = create_irq_nr(irq_want);
  	if (irq == 0)
  		return -1;
@@@ -3227,7 -3475,7 +3476,7 @@@
  		goto error;
  no_ir:
  #endif
- 	ret = setup_msi_irq(dev, desc, irq);
+ 	ret = setup_msi_irq(dev, msidesc, irq);
  	if (ret < 0) {
  		destroy_irq(irq);
  		return ret;
@@@ -3245,7 -3493,7 +3494,7 @@@ int arch_setup_msi_irqs(struct pci_dev 
  {
  	unsigned int irq;
  	int ret, sub_handle;
- 	struct msi_desc *desc;
+ 	struct msi_desc *msidesc;
  	unsigned int irq_want;
  
  #ifdef CONFIG_INTR_REMAP
@@@ -3253,10 -3501,11 +3502,11 @@@
  	int index = 0;
  #endif
  
- 	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+ 	irq_want = nr_irqs_gsi;
  	sub_handle = 0;
- 	list_for_each_entry(desc, &dev->msi_list, list) {
- 		irq = create_irq_nr(irq_want--);
+ 	list_for_each_entry(msidesc, &dev->msi_list, list) {
+ 		irq = create_irq_nr(irq_want);
+ 		irq_want++;
  		if (irq == 0)
  			return -1;
  #ifdef CONFIG_INTR_REMAP
@@@ -3288,7 -3537,7 +3538,7 @@@
  		}
  no_ir:
  #endif
- 		ret = setup_msi_irq(dev, desc, irq);
+ 		ret = setup_msi_irq(dev, msidesc, irq);
  		if (ret < 0)
  			goto error;
  		sub_handle++;
@@@ -3307,22 -3556,25 +3557,24 @@@ void arch_teardown_msi_irq(unsigned in
  
  #ifdef CONFIG_DMAR
  #ifdef CONFIG_SMP
 -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 +static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
  {
+ 	struct irq_desc *desc = irq_to_desc(irq);
  	struct irq_cfg *cfg;
  	struct msi_msg msg;
  	unsigned int dest;
  	cpumask_t tmp;
- 	struct irq_desc *desc;
  
 -	cpus_and(tmp, mask, cpu_online_map);
 -	if (cpus_empty(tmp))
 +	if (!cpumask_intersects(mask, cpu_online_mask))
  		return;
  
- 	if (assign_irq_vector(irq, *mask))
+ 	cfg = desc->chip_data;
 -	if (assign_irq_vector(irq, cfg, mask))
++	if (assign_irq_vector(irq, cfg, *mask))
  		return;
  
- 	cfg = irq_cfg(irq);
 -	set_extra_move_desc(desc, mask);
++	set_extra_move_desc(desc, *mask);
+ 
 -	cpus_and(tmp, cfg->domain, mask);
 +	cpumask_and(&tmp, &cfg->domain, mask);
  	dest = cpu_mask_to_apicid(tmp);
  
  	dmar_msi_read(irq, &msg);
@@@ -3333,9 -3585,9 +3585,9 @@@
  	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
  	dmar_msi_write(irq, &msg);
- 	desc = irq_to_desc(irq);
 -	desc->affinity = mask;
 +	cpumask_copy(&desc->affinity, mask);
  }
+ 
  #endif /* CONFIG_SMP */
  
  struct irq_chip dmar_msi_type = {
@@@ -3367,22 -3619,25 +3619,24 @@@ int arch_setup_dmar_msi(unsigned int ir
  #ifdef CONFIG_HPET_TIMER
  
  #ifdef CONFIG_SMP
 -static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 +static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
  {
+ 	struct irq_desc *desc = irq_to_desc(irq);
  	struct irq_cfg *cfg;
- 	struct irq_desc *desc;
  	struct msi_msg msg;
  	unsigned int dest;
  	cpumask_t tmp;
  
 -	cpus_and(tmp, mask, cpu_online_map);
 -	if (cpus_empty(tmp))
 +	if (!cpumask_intersects(mask, cpu_online_mask))
  		return;
  
- 	if (assign_irq_vector(irq, *mask))
+ 	cfg = desc->chip_data;
 -	if (assign_irq_vector(irq, cfg, mask))
++	if (assign_irq_vector(irq, cfg, *mask))
  		return;
  
- 	cfg = irq_cfg(irq);
 -	set_extra_move_desc(desc, mask);
++	set_extra_move_desc(desc, *mask);
+ 
 -	cpus_and(tmp, cfg->domain, mask);
 +	cpumask_and(&tmp, &cfg->domain, mask);
  	dest = cpu_mask_to_apicid(tmp);
  
  	hpet_msi_read(irq, &msg);
@@@ -3393,9 -3648,9 +3647,9 @@@
  	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
  	hpet_msi_write(irq, &msg);
- 	desc = irq_to_desc(irq);
 -	desc->affinity = mask;
 +	cpumask_copy(&desc->affinity, mask);
  }
+ 
  #endif /* CONFIG_SMP */
  
  struct irq_chip hpet_msi_type = {
@@@ -3448,27 -3703,30 +3702,29 @@@ static void target_ht_irq(unsigned int 
  	write_ht_irq_msg(irq, &msg);
  }
  
 -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 +static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
  {
+ 	struct irq_desc *desc = irq_to_desc(irq);
  	struct irq_cfg *cfg;
  	unsigned int dest;
  	cpumask_t tmp;
- 	struct irq_desc *desc;
  
 -	cpus_and(tmp, mask, cpu_online_map);
 -	if (cpus_empty(tmp))
 +	if (!cpumask_intersects(mask, cpu_online_mask))
  		return;
  
- 	if (assign_irq_vector(irq, *mask))
+ 	cfg = desc->chip_data;
 -	if (assign_irq_vector(irq, cfg, mask))
++	if (assign_irq_vector(irq, cfg, *mask))
  		return;
  
- 	cfg = irq_cfg(irq);
 -	set_extra_move_desc(desc, mask);
++	set_extra_move_desc(desc, *mask);
+ 
 -	cpus_and(tmp, cfg->domain, mask);
 +	cpumask_and(&tmp, &cfg->domain, mask);
  	dest = cpu_mask_to_apicid(tmp);
  
  	target_ht_irq(irq, dest, cfg->vector);
- 	desc = irq_to_desc(irq);
 -	desc->affinity = mask;
 +	cpumask_copy(&desc->affinity, mask);
  }
+ 
  #endif
  
  static struct irq_chip ht_irq_chip = {
@@@ -3488,13 -3746,13 +3744,13 @@@ int arch_setup_ht_irq(unsigned int irq
  	int err;
  	cpumask_t tmp;
  
+ 	cfg = irq_cfg(irq);
  	tmp = TARGET_CPUS;
- 	err = assign_irq_vector(irq, tmp);
+ 	err = assign_irq_vector(irq, cfg, tmp);
  	if (!err) {
  		struct ht_irq_msg msg;
  		unsigned dest;
  
- 		cfg = irq_cfg(irq);
  		cpus_and(tmp, cfg->domain, tmp);
  		dest = cpu_mask_to_apicid(tmp);
  
@@@ -3540,7 -3798,9 +3796,9 @@@ int arch_enable_uv_irq(char *irq_name, 
  	unsigned long flags;
  	int err;
  
- 	err = assign_irq_vector(irq, *eligible_cpu);
+ 	cfg = irq_cfg(irq);
+ 
+ 	err = assign_irq_vector(irq, cfg, *eligible_cpu);
  	if (err != 0)
  		return err;
  
@@@ -3549,8 -3809,6 +3807,6 @@@
  				      irq_name);
  	spin_unlock_irqrestore(&vector_lock, flags);
  
- 	cfg = irq_cfg(irq);
- 
  	mmr_value = 0;
  	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
  	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@@ -3602,9 -3860,16 +3858,16 @@@ int __init io_apic_get_redir_entries (i
  	return reg_01.bits.entries;
  }
  
- int __init probe_nr_irqs(void)
+ void __init probe_nr_irqs_gsi(void)
  {
- 	return NR_IRQS;
+ 	int idx;
+ 	int nr = 0;
+ 
+ 	for (idx = 0; idx < nr_ioapics; idx++)
+ 		nr += io_apic_get_redir_entries(idx) + 1;
+ 
+ 	if (nr > nr_irqs_gsi)
+ 		nr_irqs_gsi = nr;
  }
  
  /* --------------------------------------------------------------------------
@@@ -3703,19 -3968,31 +3966,31 @@@ int __init io_apic_get_version(int ioap
  
  int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
  {
+ 	struct irq_desc *desc;
+ 	struct irq_cfg *cfg;
+ 	int cpu = boot_cpu_id;
+ 
  	if (!IO_APIC_IRQ(irq)) {
  		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
  			ioapic);
  		return -EINVAL;
  	}
  
+ 	desc = irq_to_desc_alloc_cpu(irq, cpu);
+ 	if (!desc) {
+ 		printk(KERN_INFO "can not get irq_desc %d\n", irq);
+ 		return 0;
+ 	}
+ 
  	/*
  	 * IRQs < 16 are already in the irq_2_pin[] map
  	 */
- 	if (irq >= 16)
- 		add_pin_to_irq(irq, ioapic, pin);
+ 	if (irq >= NR_IRQS_LEGACY) {
+ 		cfg = desc->chip_data;
+ 		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+ 	}
  
- 	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+ 	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
  
  	return 0;
  }
@@@ -3769,9 -4046,10 +4044,10 @@@ void __init setup_ioapic_dest(void
  			 * when you have too many devices, because at that time only boot
  			 * cpu is online.
  			 */
- 			cfg = irq_cfg(irq);
+ 			desc = irq_to_desc(irq);
+ 			cfg = desc->chip_data;
  			if (!cfg->vector) {
- 				setup_IO_APIC_irq(ioapic, pin, irq,
+ 				setup_IO_APIC_irq(ioapic, pin, irq, desc,
  						  irq_trigger(irq_entry),
  						  irq_polarity(irq_entry));
  				continue;
@@@ -3781,7 -4059,6 +4057,6 @@@
  			/*
  			 * Honour affinities which have been set in early boot
  			 */
- 			desc = irq_to_desc(irq);
  			if (desc->status &
  			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
  				mask = desc->affinity;
@@@ -3790,10 -4067,10 +4065,10 @@@
  
  #ifdef CONFIG_INTR_REMAP
  			if (intr_remapping_enabled)
- 				set_ir_ioapic_affinity_irq(irq, &mask);
+ 				set_ir_ioapic_affinity_irq_desc(desc, mask);
  			else
  #endif
- 				set_ioapic_affinity_irq(irq, &mask);
+ 				set_ioapic_affinity_irq_desc(desc, mask);
  		}
  
  	}
@@@ -3842,7 -4119,6 +4117,6 @@@ void __init ioapic_init_mappings(void
  	struct resource *ioapic_res;
  	int i;
  
- 	irq_2_pin_init();
  	ioapic_res = ioapic_setup_resources();
  	for (i = 0; i < nr_ioapics; i++) {
  		if (smp_found_config) {
diff --combined arch/x86/kernel/irq_32.c
index 87870a49be4,119fc9c8ff7..9cf9cbbf7a0
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@@ -242,6 -242,8 +242,8 @@@ void fixup_irqs(cpumask_t map
  	for_each_irq_desc(irq, desc) {
  		cpumask_t mask;
  
+ 		if (!desc)
+ 			continue;
  		if (irq == 2)
  			continue;
  
@@@ -251,7 -253,7 +253,7 @@@
  			mask = map;
  		}
  		if (desc->chip->set_affinity)
 -			desc->chip->set_affinity(irq, mask);
 +			desc->chip->set_affinity(irq, &mask);
  		else if (desc->action && !(warned++))
  			printk("Cannot set affinity for irq %i\n", irq);
  	}
diff --combined arch/x86/kernel/irq_64.c
index 8cbd069e5b4,a174a217eb1..54c69d47a77
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@@ -91,6 -91,8 +91,8 @@@ void fixup_irqs(cpumask_t map
  		int break_affinity = 0;
  		int set_affinity = 1;
  
+ 		if (!desc)
+ 			continue;
  		if (irq == 2)
  			continue;
  
@@@ -113,7 -115,7 +115,7 @@@
  			desc->chip->mask(irq);
  
  		if (desc->chip->set_affinity)
 -			desc->chip->set_affinity(irq, mask);
 +			desc->chip->set_affinity(irq, &mask);
  		else if (!(warned++))
  			set_affinity = 0;
  
diff --combined drivers/xen/events.c
index eba5ec5b020,46625cd3874..add640ff5c6
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@@ -141,8 -141,12 +141,12 @@@ static void init_evtchn_cpu_bindings(vo
  	int i;
  
  	/* By default all event channels notify CPU#0. */
- 	for_each_irq_desc(i, desc)
+ 	for_each_irq_desc(i, desc) {
+ 		if (!desc)
+ 			continue;
+ 
  		desc->affinity = cpumask_of_cpu(0);
+ 	}
  #endif
  
  	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@@ -229,15 -233,20 +233,20 @@@ static void unmask_evtchn(int port
  static int find_unbound_irq(void)
  {
  	int irq;
+ 	struct irq_desc *desc;
  
  	/* Only allocate from dynirq range */
- 	for_each_irq_nr(irq)
+ 	for (irq = 0; irq < nr_irqs; irq++)
  		if (irq_bindcount[irq] == 0)
  			break;
  
  	if (irq == nr_irqs)
  		panic("No available IRQ to bind to: increase nr_irqs!\n");
  
+ 	desc = irq_to_desc_alloc_cpu(irq, 0);
+ 	if (WARN_ON(desc == NULL))
+ 		return -1;
+ 
  	return irq;
  }
  
@@@ -579,7 -588,7 +588,7 @@@ void rebind_evtchn_irq(int evtchn, int 
  	spin_unlock(&irq_mapping_update_lock);
  
  	/* new event channels are always bound to cpu 0 */
 -	irq_set_affinity(irq, cpumask_of_cpu(0));
 +	irq_set_affinity(irq, cpumask_of(0));
  
  	/* Unmask the event channel. */
  	enable_irq(irq);
@@@ -608,9 -617,9 +617,9 @@@ static void rebind_irq_to_cpu(unsigned 
  }
  
  
 -static void set_affinity_irq(unsigned irq, cpumask_t dest)
 +static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
  {
 -	unsigned tcpu = first_cpu(dest);
 +	unsigned tcpu = cpumask_first(dest);
  	rebind_irq_to_cpu(irq, tcpu);
  }
  
@@@ -792,7 -801,7 +801,7 @@@ void xen_irq_resume(void
  		mask_evtchn(evtchn);
  
  	/* No IRQ <-> event-channel mappings. */
- 	for_each_irq_nr(irq)
+ 	for (irq = 0; irq < nr_irqs; irq++)
  		irq_info[irq].evtchn = 0; /* zap event-channel binding */
  
  	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@@ -824,7 -833,7 +833,7 @@@ void __init xen_init_IRQ(void
  		mask_evtchn(i);
  
  	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
- 	for_each_irq_nr(i)
+ 	for (i = 0; i < nr_irqs; i++)
  		irq_bindcount[i] = 0;
  
  	irq_ctx_init(smp_processor_id());
diff --combined include/linux/interrupt.h
index 48e63934fab,be3c484b524..dfaee6bd265
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@@ -14,6 -14,8 +14,8 @@@
  #include <linux/irqflags.h>
  #include <linux/smp.h>
  #include <linux/percpu.h>
+ #include <linux/irqnr.h>
+ 
  #include <asm/atomic.h>
  #include <asm/ptrace.h>
  #include <asm/system.h>
@@@ -109,13 -111,13 +111,13 @@@ extern void enable_irq(unsigned int irq
  
  extern cpumask_t irq_default_affinity;
  
 -extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
 +extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
  extern int irq_can_set_affinity(unsigned int irq);
  extern int irq_select_affinity(unsigned int irq);
  
  #else /* CONFIG_SMP */
  
 -static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 +static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
  {
  	return -EINVAL;
  }
@@@ -251,9 -253,6 +253,6 @@@ enu
  	BLOCK_SOFTIRQ,
  	TASKLET_SOFTIRQ,
  	SCHED_SOFTIRQ,
- #ifdef CONFIG_HIGH_RES_TIMERS
- 	HRTIMER_SOFTIRQ,
- #endif
  	RCU_SOFTIRQ, 	/* Preferable RCU should always be the last softirq */
  
  	NR_SOFTIRQS
diff --combined include/linux/irq.h
index ab70fd604d3,98564dc6447..5845bdc1ac0
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@@ -113,8 -113,7 +113,8 @@@ struct irq_chip 
  	void		(*eoi)(unsigned int irq);
  
  	void		(*end)(unsigned int irq);
 -	void		(*set_affinity)(unsigned int irq, cpumask_t dest);
 +	void		(*set_affinity)(unsigned int irq,
 +					const struct cpumask *dest);
  	int		(*retrigger)(unsigned int irq);
  	int		(*set_type)(unsigned int irq, unsigned int flow_type);
  	int		(*set_wake)(unsigned int irq, unsigned int on);
@@@ -130,9 -129,14 +130,14 @@@
  	const char	*typename;
  };
  
+ struct timer_rand_state;
+ struct irq_2_iommu;
  /**
   * struct irq_desc - interrupt descriptor
   * @irq:		interrupt number for this descriptor
+  * @timer_rand_state:	pointer to timer rand state struct
+  * @kstat_irqs:		irq stats per cpu
+  * @irq_2_iommu:	iommu with this irq
   * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
   * @chip:		low level interrupt hardware access
   * @msi_desc:		MSI descriptor
@@@ -144,8 -148,8 +149,8 @@@
   * @depth:		disable-depth, for nested irq_disable() calls
   * @wake_depth:		enable depth, for multiple set_irq_wake() callers
   * @irq_count:		stats field to detect stalled irqs
-  * @irqs_unhandled:	stats field for spurious unhandled interrupts
   * @last_unhandled:	aging timer for unhandled count
+  * @irqs_unhandled:	stats field for spurious unhandled interrupts
   * @lock:		locking for SMP
   * @affinity:		IRQ affinity on SMP
   * @cpu:		cpu index useful for balancing
@@@ -155,6 -159,13 +160,13 @@@
   */
  struct irq_desc {
  	unsigned int		irq;
+ #ifdef CONFIG_SPARSE_IRQ
+ 	struct timer_rand_state *timer_rand_state;
+ 	unsigned int            *kstat_irqs;
+ # ifdef CONFIG_INTR_REMAP
+ 	struct irq_2_iommu      *irq_2_iommu;
+ # endif
+ #endif
  	irq_flow_handler_t	handle_irq;
  	struct irq_chip		*chip;
  	struct msi_desc		*msi_desc;
@@@ -166,8 -177,8 +178,8 @@@
  	unsigned int		depth;		/* nested irq disables */
  	unsigned int		wake_depth;	/* nested wake enables */
  	unsigned int		irq_count;	/* For detecting broken IRQs */
- 	unsigned int		irqs_unhandled;
  	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
+ 	unsigned int		irqs_unhandled;
  	spinlock_t		lock;
  #ifdef CONFIG_SMP
  	cpumask_t		affinity;
@@@ -182,12 -193,51 +194,51 @@@
  	const char		*name;
  } ____cacheline_internodealigned_in_smp;
  
+ extern void early_irq_init(void);
+ extern void arch_early_irq_init(void);
+ extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
+ extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
+ 					struct irq_desc *desc, int cpu);
+ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
  
+ #ifndef CONFIG_SPARSE_IRQ
  extern struct irq_desc irq_desc[NR_IRQS];
  
  static inline struct irq_desc *irq_to_desc(unsigned int irq)
  {
- 	return (irq < nr_irqs) ? irq_desc + irq : NULL;
+ 	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+ }
+ static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+ {
+ 	return irq_to_desc(irq);
+ }
+ 
+ #else
+ 
+ extern struct irq_desc *irq_to_desc(unsigned int irq);
+ extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
+ 
+ # define for_each_irq_desc(irq, desc)		\
+ 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
+ # define for_each_irq_desc_reverse(irq, desc)                          \
+ 	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
+ 
+ #define kstat_irqs_this_cpu(DESC) \
+ 	((DESC)->kstat_irqs[smp_processor_id()])
+ #define kstat_incr_irqs_this_cpu(irqno, DESC) \
+ 	((DESC)->kstat_irqs[smp_processor_id()]++)
+ 
+ #endif
+ 
+ static inline struct irq_desc *
+ irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
+ {
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ 	return irq_to_desc(irq);
+ #else
+ 	return desc;
+ #endif
  }
  
  /*
@@@ -381,6 -431,11 +432,11 @@@ extern int set_irq_msi(unsigned int irq
  #define get_irq_data(irq)	(irq_to_desc(irq)->handler_data)
  #define get_irq_msi(irq)	(irq_to_desc(irq)->msi_desc)
  
+ #define get_irq_desc_chip(desc)		((desc)->chip)
+ #define get_irq_desc_chip_data(desc)	((desc)->chip_data)
+ #define get_irq_desc_data(desc)		((desc)->handler_data)
+ #define get_irq_desc_msi(desc)		((desc)->msi_desc)
+ 
  #endif /* CONFIG_GENERIC_HARDIRQS */
  
  #endif /* !CONFIG_S390 */
diff --combined init/Kconfig
index 8e9904fc302,13627191a60..f6281711166
--- a/init/Kconfig
+++ b/init/Kconfig
@@@ -924,15 -924,6 +924,15 @@@ config KMO
  
  endif # MODULES
  
 +config INIT_ALL_POSSIBLE
 +	bool
 +	help
 +	  Back when each arch used to define their own cpu_online_map and
 +	  cpu_possible_map, some of them chose to initialize cpu_possible_map
 +	  with all 1s, and others with all 0s.  When they were centralised,
 +	  it was better to provide this option than to break all the archs
 +	  and have several arch maintainers persuing me down dark alleys.
 +
  config STOP_MACHINE
  	bool
  	default y
@@@ -945,10 -936,90 +945,90 @@@ source "block/Kconfig
  config PREEMPT_NOTIFIERS
  	bool
  
+ choice
+ 	prompt "RCU Implementation"
+ 	default CLASSIC_RCU
+ 
  config CLASSIC_RCU
- 	def_bool !PREEMPT_RCU
+ 	bool "Classic RCU"
  	help
  	  This option selects the classic RCU implementation that is
  	  designed for best read-side performance on non-realtime
- 	  systems.  Classic RCU is the default.  Note that the
- 	  PREEMPT_RCU symbol is used to select/deselect this option.
+ 	  systems.
+ 
+ 	  Select this option if you are unsure.
+ 
+ config TREE_RCU
+ 	bool "Tree-based hierarchical RCU"
+ 	help
+ 	  This option selects the RCU implementation that is
+ 	  designed for very large SMP system with hundreds or
+ 	  thousands of CPUs.
+ 
+ config PREEMPT_RCU
+ 	bool "Preemptible RCU"
+ 	depends on PREEMPT
+ 	help
+ 	  This option reduces the latency of the kernel by making certain
+ 	  RCU sections preemptible. Normally RCU code is non-preemptible, if
+ 	  this option is selected then read-only RCU sections become
+ 	  preemptible. This helps latency, but may expose bugs due to
+ 	  now-naive assumptions about each RCU read-side critical section
+ 	  remaining on a given CPU through its execution.
+ 
+ endchoice
+ 
+ config RCU_TRACE
+ 	bool "Enable tracing for RCU"
+ 	depends on TREE_RCU || PREEMPT_RCU
+ 	help
+ 	  This option provides tracing in RCU which presents stats
+ 	  in debugfs for debugging RCU implementation.
+ 
+ 	  Say Y here if you want to enable RCU tracing
+ 	  Say N if you are unsure.
+ 
+ config RCU_FANOUT
+ 	int "Tree-based hierarchical RCU fanout value"
+ 	range 2 64 if 64BIT
+ 	range 2 32 if !64BIT
+ 	depends on TREE_RCU
+ 	default 64 if 64BIT
+ 	default 32 if !64BIT
+ 	help
+ 	  This option controls the fanout of hierarchical implementations
+ 	  of RCU, allowing RCU to work efficiently on machines with
+ 	  large numbers of CPUs.  This value must be at least the cube
+ 	  root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+ 	  systems and up to 262,144 for 64-bit systems.
+ 
+ 	  Select a specific number if testing RCU itself.
+ 	  Take the default if unsure.
+ 
+ config RCU_FANOUT_EXACT
+ 	bool "Disable tree-based hierarchical RCU auto-balancing"
+ 	depends on TREE_RCU
+ 	default n
+ 	help
+ 	  This option forces use of the exact RCU_FANOUT value specified,
+ 	  regardless of imbalances in the hierarchy.  This is useful for
+ 	  testing RCU itself, and might one day be useful on systems with
+ 	  strong NUMA behavior.
+ 
+ 	  Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+ 
+ 	  Say N if unsure.
+ 
+ config TREE_RCU_TRACE
+ 	def_bool RCU_TRACE && TREE_RCU
+ 	select DEBUG_FS
+ 	help
+ 	  This option provides tracing for the TREE_RCU implementation,
+ 	  permitting Makefile to trivially select kernel/rcutree_trace.c.
+ 
+ config PREEMPT_RCU_TRACE
+ 	def_bool RCU_TRACE && PREEMPT_RCU
+ 	select DEBUG_FS
+ 	help
+ 	  This option provides tracing for the PREEMPT_RCU implementation,
+ 	  permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --combined kernel/irq/chip.c
index 58d8e31daa4,6eb3c7952b6..f63c706d25e
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@@ -24,9 -24,10 +24,10 @@@
   */
  void dynamic_irq_init(unsigned int irq)
  {
- 	struct irq_desc *desc = irq_to_desc(irq);
+ 	struct irq_desc *desc;
  	unsigned long flags;
  
+ 	desc = irq_to_desc(irq);
  	if (!desc) {
  		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
  		return;
@@@ -45,7 -46,7 +46,7 @@@
  	desc->irq_count = 0;
  	desc->irqs_unhandled = 0;
  #ifdef CONFIG_SMP
 -	cpus_setall(desc->affinity);
 +	cpumask_setall(&desc->affinity);
  #endif
  	spin_unlock_irqrestore(&desc->lock, flags);
  }
@@@ -124,6 -125,7 +125,7 @@@ int set_irq_type(unsigned int irq, unsi
  		return -ENODEV;
  	}
  
+ 	type &= IRQ_TYPE_SENSE_MASK;
  	if (type == IRQ_TYPE_NONE)
  		return 0;
  
@@@ -352,6 -354,7 +354,7 @@@ handle_level_irq(unsigned int irq, stru
  
  	spin_lock(&desc->lock);
  	mask_ack_irq(desc, irq);
+ 	desc = irq_remap_to_desc(irq, desc);
  
  	if (unlikely(desc->status & IRQ_INPROGRESS))
  		goto out_unlock;
@@@ -429,6 -432,7 +432,7 @@@ handle_fasteoi_irq(unsigned int irq, st
  	desc->status &= ~IRQ_INPROGRESS;
  out:
  	desc->chip->eoi(irq);
+ 	desc = irq_remap_to_desc(irq, desc);
  
  	spin_unlock(&desc->lock);
  }
@@@ -465,12 -469,14 +469,14 @@@ handle_edge_irq(unsigned int irq, struc
  		    !desc->action)) {
  		desc->status |= (IRQ_PENDING | IRQ_MASKED);
  		mask_ack_irq(desc, irq);
+ 		desc = irq_remap_to_desc(irq, desc);
  		goto out_unlock;
  	}
  	kstat_incr_irqs_this_cpu(irq, desc);
  
  	/* Start handling the irq */
  	desc->chip->ack(irq);
+ 	desc = irq_remap_to_desc(irq, desc);
  
  	/* Mark the IRQ currently in progress.*/
  	desc->status |= IRQ_INPROGRESS;
@@@ -531,8 -537,10 +537,10 @@@ handle_percpu_irq(unsigned int irq, str
  	if (!noirqdebug)
  		note_interrupt(irq, desc, action_ret);
  
- 	if (desc->chip->eoi)
+ 	if (desc->chip->eoi) {
  		desc->chip->eoi(irq);
+ 		desc = irq_remap_to_desc(irq, desc);
+ 	}
  }
  
  void
@@@ -567,8 -575,10 +575,10 @@@ __set_irq_handler(unsigned int irq, irq
  
  	/* Uninstall? */
  	if (handle == handle_bad_irq) {
- 		if (desc->chip != &no_irq_chip)
+ 		if (desc->chip != &no_irq_chip) {
  			mask_ack_irq(desc, irq);
+ 			desc = irq_remap_to_desc(irq, desc);
+ 		}
  		desc->status |= IRQ_DISABLED;
  		desc->depth = 1;
  	}
diff --combined kernel/irq/manage.c
index 10ad2f87ed9,540f6c49f3f..61c4a9b6216
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@@ -79,7 -79,7 +79,7 @@@ int irq_can_set_affinity(unsigned int i
   *	@cpumask:	cpumask
   *
   */
 -int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 +int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
  {
  	struct irq_desc *desc = irq_to_desc(irq);
  	unsigned long flags;
@@@ -91,14 -91,14 +91,14 @@@
  
  #ifdef CONFIG_GENERIC_PENDING_IRQ
  	if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
 -		desc->affinity = cpumask;
 +		cpumask_copy(&desc->affinity, cpumask);
  		desc->chip->set_affinity(irq, cpumask);
  	} else {
  		desc->status |= IRQ_MOVE_PENDING;
 -		desc->pending_mask = cpumask;
 +		cpumask_copy(&desc->pending_mask, cpumask);
  	}
  #else
 -	desc->affinity = cpumask;
 +	cpumask_copy(&desc->affinity, cpumask);
  	desc->chip->set_affinity(irq, cpumask);
  #endif
  	desc->status |= IRQ_AFFINITY_SET;
@@@ -112,24 -112,26 +112,24 @@@
   */
  int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
  {
 -	cpumask_t mask;
 -
  	if (!irq_can_set_affinity(irq))
  		return 0;
  
 -	cpus_and(mask, cpu_online_map, irq_default_affinity);
 -
  	/*
  	 * Preserve an userspace affinity setup, but make sure that
  	 * one of the targets is online.
  	 */
  	if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
 -		if (cpus_intersects(desc->affinity, cpu_online_map))
 -			mask = desc->affinity;
 +		if (cpumask_any_and(&desc->affinity, cpu_online_mask)
 +		    < nr_cpu_ids)
 +			goto set_affinity;
  		else
  			desc->status &= ~IRQ_AFFINITY_SET;
  	}
  
 -	desc->affinity = mask;
 -	desc->chip->set_affinity(irq, mask);
 +	cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
 +set_affinity:
 +	desc->chip->set_affinity(irq, &desc->affinity);
  
  	return 0;
  }
@@@ -368,16 -370,18 +368,18 @@@ int __irq_set_trigger(struct irq_desc *
  		return 0;
  	}
  
- 	ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK);
+ 	/* caller masked out all except trigger mode flags */
+ 	ret = chip->set_type(irq, flags);
  
  	if (ret)
  		pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
- 				(int)(flags & IRQF_TRIGGER_MASK),
- 				irq, chip->set_type);
+ 				(int)flags, irq, chip->set_type);
  	else {
+ 		if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
+ 			flags |= IRQ_LEVEL;
  		/* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
- 		desc->status &= ~IRQ_TYPE_SENSE_MASK;
- 		desc->status |= flags & IRQ_TYPE_SENSE_MASK;
+ 		desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);
+ 		desc->status |= flags;
  	}
  
  	return ret;
@@@ -457,7 -461,8 +459,8 @@@ __setup_irq(unsigned int irq, struct ir
  
  		/* Setup the type (level, edge polarity) if configured: */
  		if (new->flags & IRQF_TRIGGER_MASK) {
- 			ret = __irq_set_trigger(desc, irq, new->flags);
+ 			ret = __irq_set_trigger(desc, irq,
+ 					new->flags & IRQF_TRIGGER_MASK);
  
  			if (ret) {
  				spin_unlock_irqrestore(&desc->lock, flags);
@@@ -671,6 -676,18 +674,18 @@@ int request_irq(unsigned int irq, irq_h
  	struct irq_desc *desc;
  	int retval;
  
+ 	/*
+ 	 * handle_IRQ_event() always ignores IRQF_DISABLED except for
+ 	 * the _first_ irqaction (sigh).  That can cause oopsing, but
+ 	 * the behavior is classified as "will not fix" so we need to
+ 	 * start nudging drivers away from using that idiom.
+ 	 */
+ 	if ((irqflags & (IRQF_SHARED|IRQF_DISABLED))
+ 			== (IRQF_SHARED|IRQF_DISABLED))
+ 		pr_warning("IRQ %d/%s: IRQF_DISABLED is not "
+ 				"guaranteed on shared IRQs\n",
+ 				irq, devname);
+ 
  #ifdef CONFIG_LOCKDEP
  	/*
  	 * Lockdep wants atomic interrupt handlers:
diff --combined kernel/irq/proc.c
index 8e91c976252,f6b3440f05b..d2c0e5ee53c
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@@ -40,42 -40,33 +40,42 @@@ static ssize_t irq_affinity_proc_write(
  		const char __user *buffer, size_t count, loff_t *pos)
  {
  	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
 -	cpumask_t new_value;
 +	cpumask_var_t new_value;
  	int err;
  
  	if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
  	    irq_balancing_disabled(irq))
  		return -EIO;
  
 +	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
 +		return -ENOMEM;
 +
  	err = cpumask_parse_user(buffer, count, new_value);
  	if (err)
 -		return err;
 +		goto free_cpumask;
  
 -	if (!is_affinity_mask_valid(new_value))
 -		return -EINVAL;
 +	if (!is_affinity_mask_valid(*new_value)) {
 +		err = -EINVAL;
 +		goto free_cpumask;
 +	}
  
  	/*
  	 * Do not allow disabling IRQs completely - it's a too easy
  	 * way to make the system unusable accidentally :-) At least
  	 * one online CPU still has to be targeted.
  	 */
 -	if (!cpus_intersects(new_value, cpu_online_map))
 +	if (!cpumask_intersects(new_value, cpu_online_mask)) {
  		/* Special case for empty set - allow the architecture
  		   code to set default SMP affinity. */
 -		return irq_select_affinity_usr(irq) ? -EINVAL : count;
 -
 -	irq_set_affinity(irq, new_value);
 -
 -	return count;
 +		err = irq_select_affinity_usr(irq) ? -EINVAL : count;
 +	} else {
 +		irq_set_affinity(irq, new_value);
 +		err = count;
 +	}
 +
 +free_cpumask:
 +	free_cpumask_var(new_value);
 +	return err;
  }
  
  static int irq_affinity_proc_open(struct inode *inode, struct file *file)
@@@ -104,7 -95,7 +104,7 @@@ static ssize_t default_affinity_write(s
  	cpumask_t new_value;
  	int err;
  
 -	err = cpumask_parse_user(buffer, count, new_value);
 +	err = cpumask_parse_user(buffer, count, &new_value);
  	if (err)
  		return err;
  
@@@ -252,7 -243,11 +252,11 @@@ void init_irq_proc(void
  	/*
  	 * Create entries for all existing IRQs.
  	 */
- 	for_each_irq_desc(irq, desc)
+ 	for_each_irq_desc(irq, desc) {
+ 		if (!desc)
+ 			continue;
+ 
  		register_irq_proc(irq, desc);
+ 	}
  }
  
diff --combined kernel/sched.c
index bdd180a0c6b,fff1c4a20b6..f2095660efe
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@@ -209,7 -209,6 +209,6 @@@ void init_rt_bandwidth(struct rt_bandwi
  	hrtimer_init(&rt_b->rt_period_timer,
  			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  	rt_b->rt_period_timer.function = sched_rt_period_timer;
- 	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
  }
  
  static inline int rt_bandwidth_enabled(void)
@@@ -1139,7 -1138,6 +1138,6 @@@ static void init_rq_hrtick(struct rq *r
  
  	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  	rq->hrtick_timer.function = hrtick;
- 	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
  }
  #else	/* CONFIG_SCHED_HRTICK */
  static inline void hrtick_clear(struct rq *rq)
@@@ -4192,7 -4190,6 +4190,6 @@@ void account_steal_time(struct task_str
  
  	if (p == rq->idle) {
  		p->stime = cputime_add(p->stime, steal);
- 		account_group_system_time(p, steal);
  		if (atomic_read(&rq->nr_iowait) > 0)
  			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
  		else
@@@ -4328,7 -4325,7 +4325,7 @@@ void __kprobes sub_preempt_count(int va
  	/*
  	 * Underflow?
  	 */
- 	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+        if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
  		return;
  	/*
  	 * Is the spinlock portion underflowing?
@@@ -6647,7 -6644,7 +6644,7 @@@ static int sched_domain_debug_one(struc
  	struct sched_group *group = sd->groups;
  	char str[256];
  
 -	cpulist_scnprintf(str, sizeof(str), sd->span);
 +	cpulist_scnprintf(str, sizeof(str), &sd->span);
  	cpus_clear(*groupmask);
  
  	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@@ -6700,7 -6697,7 +6697,7 @@@
  
  		cpus_or(*groupmask, *groupmask, group->cpumask);
  
 -		cpulist_scnprintf(str, sizeof(str), group->cpumask);
 +		cpulist_scnprintf(str, sizeof(str), &group->cpumask);
  		printk(KERN_CONT " %s", str);
  
  		group = group->next;
@@@ -7101,7 -7098,7 +7098,7 @@@ cpu_to_phys_group(int cpu, const cpumas
  {
  	int group;
  #ifdef CONFIG_SCHED_MC
 -	*mask = cpu_coregroup_map(cpu);
 +	*mask = *cpu_coregroup_mask(cpu);
  	cpus_and(*mask, *mask, *cpu_map);
  	group = first_cpu(*mask);
  #elif defined(CONFIG_SCHED_SMT)
@@@ -7474,7 -7471,7 +7471,7 @@@ static int __build_sched_domains(const 
  		sd = &per_cpu(core_domains, i);
  		SD_INIT(sd, MC);
  		set_domain_attribute(sd, attr);
 -		sd->span = cpu_coregroup_map(i);
 +		sd->span = *cpu_coregroup_mask(i);
  		cpus_and(sd->span, sd->span, *cpu_map);
  		sd->parent = p;
  		p->child = sd;
@@@ -7517,7 -7514,7 +7514,7 @@@
  		SCHED_CPUMASK_VAR(this_core_map, allmasks);
  		SCHED_CPUMASK_VAR(send_covered, allmasks);
  
 -		*this_core_map = cpu_coregroup_map(i);
 +		*this_core_map = *cpu_coregroup_mask(i);
  		cpus_and(*this_core_map, *this_core_map, *cpu_map);
  		if (i != first_cpu(*this_core_map))
  			continue;
diff --combined kernel/trace/trace.c
index c8760ec0e46,4185d522163..0e91f43b6ba
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -30,7 -30,6 +30,6 @@@
  #include <linux/gfp.h>
  #include <linux/fs.h>
  #include <linux/kprobes.h>
- #include <linux/seq_file.h>
  #include <linux/writeback.h>
  
  #include <linux/stacktrace.h>
@@@ -1310,7 -1309,7 +1309,7 @@@ enum trace_file_type 
  	TRACE_FILE_ANNOTATE	= 2,
  };
  
- static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+ static void trace_iterator_increment(struct trace_iterator *iter)
  {
  	/* Don't allow ftrace to trace into the ring buffers */
  	ftrace_disable_cpu();
@@@ -1389,7 -1388,7 +1388,7 @@@ static void *find_next_entry_inc(struc
  	iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
  
  	if (iter->ent)
- 		trace_iterator_increment(iter, iter->cpu);
+ 		trace_iterator_increment(iter);
  
  	return iter->ent ? iter : NULL;
  }
@@@ -2675,7 -2674,7 +2674,7 @@@ tracing_cpumask_read(struct file *filp
  
  	mutex_lock(&tracing_cpumask_update_lock);
  
 -	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
 +	len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
  	if (count - len < 2) {
  		count = -EINVAL;
  		goto out_err;
@@@ -2696,7 -2695,7 +2695,7 @@@ tracing_cpumask_write(struct file *filp
  	int err, cpu;
  
  	mutex_lock(&tracing_cpumask_update_lock);
 -	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
 +	err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
  	if (err)
  		goto err_unlock;
  
diff --combined mm/slub.c
index 8e516e29f98,6cb7ad10785..0d861c3154b
--- a/mm/slub.c
+++ b/mm/slub.c
@@@ -24,6 -24,7 +24,7 @@@
  #include <linux/kallsyms.h>
  #include <linux/memory.h>
  #include <linux/math64.h>
+ #include <linux/fault-inject.h>
  
  /*
   * Lock order:
@@@ -153,6 -154,10 +154,10 @@@
  #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
  #endif
  
+ #define OO_SHIFT	16
+ #define OO_MASK		((1 << OO_SHIFT) - 1)
+ #define MAX_OBJS_PER_PAGE	65535 /* since page.objects is u16 */
+ 
  /* Internal SLUB flags */
  #define __OBJECT_POISON		0x80000000 /* Poison object */
  #define __SYSFS_ADD_DEFERRED	0x40000000 /* Not yet visible via sysfs */
@@@ -178,7 -183,7 +183,7 @@@ static LIST_HEAD(slab_caches)
   * Tracking user of a slab.
   */
  struct track {
- 	void *addr;		/* Called from address */
+ 	unsigned long addr;	/* Called from address */
  	int cpu;		/* Was running on cpu */
  	int pid;		/* Pid context */
  	unsigned long when;	/* When did the operation occur */
@@@ -290,7 -295,7 +295,7 @@@ static inline struct kmem_cache_order_o
  						unsigned long size)
  {
  	struct kmem_cache_order_objects x = {
- 		(order << 16) + (PAGE_SIZE << order) / size
+ 		(order << OO_SHIFT) + (PAGE_SIZE << order) / size
  	};
  
  	return x;
@@@ -298,12 -303,12 +303,12 @@@
  
  static inline int oo_order(struct kmem_cache_order_objects x)
  {
- 	return x.x >> 16;
+ 	return x.x >> OO_SHIFT;
  }
  
  static inline int oo_objects(struct kmem_cache_order_objects x)
  {
- 	return x.x & ((1 << 16) - 1);
+ 	return x.x & OO_MASK;
  }
  
  #ifdef CONFIG_SLUB_DEBUG
@@@ -367,7 -372,7 +372,7 @@@ static struct track *get_track(struct k
  }
  
  static void set_track(struct kmem_cache *s, void *object,
- 				enum track_item alloc, void *addr)
+ 			enum track_item alloc, unsigned long addr)
  {
  	struct track *p;
  
@@@ -391,8 -396,8 +396,8 @@@ static void init_tracking(struct kmem_c
  	if (!(s->flags & SLAB_STORE_USER))
  		return;
  
- 	set_track(s, object, TRACK_FREE, NULL);
- 	set_track(s, object, TRACK_ALLOC, NULL);
+ 	set_track(s, object, TRACK_FREE, 0UL);
+ 	set_track(s, object, TRACK_ALLOC, 0UL);
  }
  
  static void print_track(const char *s, struct track *t)
@@@ -401,7 -406,7 +406,7 @@@
  		return;
  
  	printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
- 		s, t->addr, jiffies - t->when, t->cpu, t->pid);
+ 		s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
  }
  
  static void print_tracking(struct kmem_cache *s, void *object)
@@@ -692,7 -697,7 +697,7 @@@ static int check_object(struct kmem_cac
  	if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
  		object_err(s, page, p, "Freepointer corrupt");
  		/*
- 		 * No choice but to zap it and thus loose the remainder
+ 		 * No choice but to zap it and thus lose the remainder
  		 * of the free objects in this slab. May cause
  		 * another error because the object count is now wrong.
  		 */
@@@ -764,8 -769,8 +769,8 @@@ static int on_freelist(struct kmem_cach
  	}
  
  	max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
- 	if (max_objects > 65535)
- 		max_objects = 65535;
+ 	if (max_objects > MAX_OBJS_PER_PAGE)
+ 		max_objects = MAX_OBJS_PER_PAGE;
  
  	if (page->objects != max_objects) {
  		slab_err(s, page, "Wrong number of objects. Found %d but "
@@@ -866,7 -871,7 +871,7 @@@ static void setup_object_debug(struct k
  }
  
  static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
- 						void *object, void *addr)
+ 					void *object, unsigned long addr)
  {
  	if (!check_slab(s, page))
  		goto bad;
@@@ -906,7 -911,7 +911,7 @@@ bad
  }
  
  static int free_debug_processing(struct kmem_cache *s, struct page *page,
- 						void *object, void *addr)
+ 					void *object, unsigned long addr)
  {
  	if (!check_slab(s, page))
  		goto fail;
@@@ -1029,10 -1034,10 +1034,10 @@@ static inline void setup_object_debug(s
  			struct page *page, void *object) {}
  
  static inline int alloc_debug_processing(struct kmem_cache *s,
- 	struct page *page, void *object, void *addr) { return 0; }
+ 	struct page *page, void *object, unsigned long addr) { return 0; }
  
  static inline int free_debug_processing(struct kmem_cache *s,
- 	struct page *page, void *object, void *addr) { return 0; }
+ 	struct page *page, void *object, unsigned long addr) { return 0; }
  
  static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
  			{ return 1; }
@@@ -1499,8 -1504,8 +1504,8 @@@ static inline int node_match(struct kme
   * we need to allocate a new slab. This is the slowest path since it involves
   * a call to the page allocator and the setup of a new slab.
   */
- static void *__slab_alloc(struct kmem_cache *s,
- 		gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
+ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 			  unsigned long addr, struct kmem_cache_cpu *c)
  {
  	void **object;
  	struct page *new;
@@@ -1584,13 -1589,18 +1589,18 @@@ debug
   * Otherwise we can simply pick the next object from the lockless free list.
   */
  static __always_inline void *slab_alloc(struct kmem_cache *s,
- 		gfp_t gfpflags, int node, void *addr)
+ 		gfp_t gfpflags, int node, unsigned long addr)
  {
  	void **object;
  	struct kmem_cache_cpu *c;
  	unsigned long flags;
  	unsigned int objsize;
  
+ 	might_sleep_if(gfpflags & __GFP_WAIT);
+ 
+ 	if (should_failslab(s->objsize, gfpflags))
+ 		return NULL;
+ 
  	local_irq_save(flags);
  	c = get_cpu_slab(s, smp_processor_id());
  	objsize = c->objsize;
@@@ -1613,14 -1623,14 +1623,14 @@@
  
  void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
  {
- 	return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
+ 	return slab_alloc(s, gfpflags, -1, _RET_IP_);
  }
  EXPORT_SYMBOL(kmem_cache_alloc);
  
  #ifdef CONFIG_NUMA
  void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
  {
- 	return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
+ 	return slab_alloc(s, gfpflags, node, _RET_IP_);
  }
  EXPORT_SYMBOL(kmem_cache_alloc_node);
  #endif
@@@ -1634,7 -1644,7 +1644,7 @@@
   * handling required then we can return immediately.
   */
  static void __slab_free(struct kmem_cache *s, struct page *page,
- 				void *x, void *addr, unsigned int offset)
+ 			void *x, unsigned long addr, unsigned int offset)
  {
  	void *prior;
  	void **object = (void *)x;
@@@ -1704,7 -1714,7 +1714,7 @@@ debug
   * with all sorts of special processing.
   */
  static __always_inline void slab_free(struct kmem_cache *s,
- 			struct page *page, void *x, void *addr)
+ 			struct page *page, void *x, unsigned long addr)
  {
  	void **object = (void *)x;
  	struct kmem_cache_cpu *c;
@@@ -1731,11 -1741,11 +1741,11 @@@ void kmem_cache_free(struct kmem_cache 
  
  	page = virt_to_head_page(x);
  
- 	slab_free(s, page, x, __builtin_return_address(0));
+ 	slab_free(s, page, x, _RET_IP_);
  }
  EXPORT_SYMBOL(kmem_cache_free);
  
- /* Figure out on which slab object the object resides */
+ /* Figure out on which slab page the object resides */
  static struct page *get_object_page(const void *x)
  {
  	struct page *page = virt_to_head_page(x);
@@@ -1807,8 -1817,8 +1817,8 @@@ static inline int slab_order(int size, 
  	int rem;
  	int min_order = slub_min_order;
  
- 	if ((PAGE_SIZE << min_order) / size > 65535)
- 		return get_order(size * 65535) - 1;
+ 	if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
+ 		return get_order(size * MAX_OBJS_PER_PAGE) - 1;
  
  	for (order = max(min_order,
  				fls(min_objects * size - 1) - PAGE_SHIFT);
@@@ -2073,8 -2083,7 +2083,7 @@@ static inline int alloc_kmem_cache_cpus
   * when allocating for the kmalloc_node_cache. This is used for bootstrapping
   * memory on a fresh node that has no slab structures yet.
   */
- static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
- 							   int node)
+ static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
  {
  	struct page *page;
  	struct kmem_cache_node *n;
@@@ -2112,7 -2121,6 +2121,6 @@@
  	local_irq_save(flags);
  	add_partial(n, page, 0);
  	local_irq_restore(flags);
- 	return n;
  }
  
  static void free_kmem_cache_nodes(struct kmem_cache *s)
@@@ -2144,8 -2152,7 +2152,7 @@@ static int init_kmem_cache_nodes(struc
  			n = &s->local_node;
  		else {
  			if (slab_state == DOWN) {
- 				n = early_kmem_cache_node_alloc(gfpflags,
- 								node);
+ 				early_kmem_cache_node_alloc(gfpflags, node);
  				continue;
  			}
  			n = kmem_cache_alloc_node(kmalloc_caches,
@@@ -2659,7 -2666,7 +2666,7 @@@ void *__kmalloc(size_t size, gfp_t flag
  	if (unlikely(ZERO_OR_NULL_PTR(s)))
  		return s;
  
- 	return slab_alloc(s, flags, -1, __builtin_return_address(0));
+ 	return slab_alloc(s, flags, -1, _RET_IP_);
  }
  EXPORT_SYMBOL(__kmalloc);
  
@@@ -2687,7 -2694,7 +2694,7 @@@ void *__kmalloc_node(size_t size, gfp_
  	if (unlikely(ZERO_OR_NULL_PTR(s)))
  		return s;
  
- 	return slab_alloc(s, flags, node, __builtin_return_address(0));
+ 	return slab_alloc(s, flags, node, _RET_IP_);
  }
  EXPORT_SYMBOL(__kmalloc_node);
  #endif
@@@ -2744,7 -2751,7 +2751,7 @@@ void kfree(const void *x
  		put_page(page);
  		return;
  	}
- 	slab_free(page->slab, page, object, __builtin_return_address(0));
+ 	slab_free(page->slab, page, object, _RET_IP_);
  }
  EXPORT_SYMBOL(kfree);
  
@@@ -3123,8 -3130,12 +3130,12 @@@ struct kmem_cache *kmem_cache_create(co
  		s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
  		up_write(&slub_lock);
  
- 		if (sysfs_slab_alias(s, name))
+ 		if (sysfs_slab_alias(s, name)) {
+ 			down_write(&slub_lock);
+ 			s->refcount--;
+ 			up_write(&slub_lock);
  			goto err;
+ 		}
  		return s;
  	}
  
@@@ -3134,8 -3145,13 +3145,13 @@@
  				size, align, flags, ctor)) {
  			list_add(&s->list, &slab_caches);
  			up_write(&slub_lock);
- 			if (sysfs_slab_add(s))
+ 			if (sysfs_slab_add(s)) {
+ 				down_write(&slub_lock);
+ 				list_del(&s->list);
+ 				up_write(&slub_lock);
+ 				kfree(s);
  				goto err;
+ 			}
  			return s;
  		}
  		kfree(s);
@@@ -3202,7 -3218,7 +3218,7 @@@ static struct notifier_block __cpuinitd
  
  #endif
  
- void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
+ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
  {
  	struct kmem_cache *s;
  
@@@ -3218,7 -3234,7 +3234,7 @@@
  }
  
  void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
- 					int node, void *caller)
+ 					int node, unsigned long caller)
  {
  	struct kmem_cache *s;
  
@@@ -3429,7 -3445,7 +3445,7 @@@ static void resiliency_test(void) {}
  
  struct location {
  	unsigned long count;
- 	void *addr;
+ 	unsigned long addr;
  	long long sum_time;
  	long min_time;
  	long max_time;
@@@ -3477,7 -3493,7 +3493,7 @@@ static int add_location(struct loc_trac
  {
  	long start, end, pos;
  	struct location *l;
- 	void *caddr;
+ 	unsigned long caddr;
  	unsigned long age = jiffies - track->when;
  
  	start = -1;
@@@ -3626,7 -3642,7 +3642,7 @@@ static int list_locations(struct kmem_c
  				len < PAGE_SIZE - 60) {
  			len += sprintf(buf + len, " cpus=");
  			len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
 -					l->cpus);
 +					&l->cpus);
  		}
  
  		if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
@@@ -4345,7 -4361,7 +4361,7 @@@ static void sysfs_slab_remove(struct km
  
  /*
   * Need to buffer aliases during bootup until sysfs becomes
-  * available lest we loose that information.
+  * available lest we lose that information.
   */
  struct saved_alias {
  	struct kmem_cache *s;