Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

author Rusty Russell <rusty@rustcorp.com.au>

Wed, 31 Dec 2008 12:35:57 +0000 (23:05 +1030)

committer Rusty Russell <rusty@rustcorp.com.au>

Wed, 31 Dec 2008 12:35:57 +0000 (23:05 +1030)
author Rusty Russell <rusty@rustcorp.com.au>
Wed, 31 Dec 2008 12:35:57 +0000 (23:05 +1030)
committer Rusty Russell <rusty@rustcorp.com.au>
Wed, 31 Dec 2008 12:35:57 +0000 (23:05 +1030)
diff --combined arch/arm/kernel/smp.c

index bd905c0a73651f85d54fffb25146b50d7d267368,019237d21622ba2cde3669d4915dca02a590ea26..55fa7ff96a3e7aaf654d30c64677b1a4d666ee4b
--- 1/arch/arm/kernel/smp.c
--- 2/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@@ -33,6 -33,16 +33,6 @@@
   #include <asm/tlbflush.h>
   #include <asm/ptrace.h>
   
- -/*
- - * bitmask of present and online CPUs.
- - * The present bitmask indicates that the CPU is physically present.
- - * The online bitmask indicates that the CPU is up and running.
- - */
- -cpumask_t cpu_possible_map;
- -EXPORT_SYMBOL(cpu_possible_map);
- -cpumask_t cpu_online_map;
- -EXPORT_SYMBOL(cpu_online_map);
- -
   /*
    * as from 2.5, kernels no longer have an init_tasks structure
    * so we need some other way of telling a new secondary core
@@@ -171,7 -181,7 +171,7 @@@ int __cpuexit __cpu_disable(void
         /*
          * Stop the local timer for this CPU.
          */
-       local_timer_stop(cpu);
+       local_timer_stop();
   
         /*
          * Flush user cache and TLB mappings, and then remove this CPU
@@@ -274,7 -284,7 +274,7 @@@ asmlinkage void __cpuinit secondary_sta
         /*
          * Setup local timer for this CPU.
          */
-       local_timer_setup(cpu);
+       local_timer_setup();
   
         calibrate_delay();
   
diff --combined arch/arm/mach-at91/at91rm9200_time.c

index 72f51d39202c7a22d49a393657255d283b339889,d140eae53ded281bcc9f33fc6b991b1086c5fd55..1ff1bda0a894a4ce313ecb0896eb1011bd4a2413
--- 1/arch/arm/mach-at91/at91rm9200_time.c
--- 2/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@@ -141,6 -141,15 +141,15 @@@ clkevt32k_next_event(unsigned long delt
         /* Use "raw" primitives so we behave correctly on RT kernels. */
         raw_local_irq_save(flags);
   
+       /*
+        * According to Thomas Gleixner irqs are already disabled here.  Simply
+        * removing raw_local_irq_save above (and the matching
+        * raw_local_irq_restore) was not accepted.  See
+        * http://thread.gmane.org/gmane.linux.ports.arm.kernel/41174
+        * So for now (2008-11-20) just warn once if irqs were not disabled ...
+        */
+       WARN_ON_ONCE(!raw_irqs_disabled_flags(flags));
+ 
         /* The alarm IRQ uses absolute time (now+delta), not the relative
          * time (delta) in our calling convention.  Like all clockevents
          * using such "match" hardware, we have a race to defend against.
@@@ -169,6 -178,7 +178,6 @@@ static struct clock_event_device clkev
         .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
         .shift          = 32,
         .rating         = 150,
- -      .cpumask        = CPU_MASK_CPU0,
         .set_next_event = clkevt32k_next_event,
         .set_mode       = clkevt32k_mode,
   };
@@@ -196,7 -206,7 +205,7 @@@ void __init at91rm9200_timer_init(void
         clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift);
         clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt);
         clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1;
- -      clkevt.cpumask = cpumask_of_cpu(0);
+ +      clkevt.cpumask = cpumask_of(0);
         clockevents_register_device(&clkevt);
   
         /* register clocksource */
diff --combined arch/arm/mach-pxa/time.c

index bf3c9a4aad509fc8fde9a6858f38e65e14bf60b9,0016241585190e3770de17ac6e02b0db57547a2f..95656a72268dd9f1a1ee7177c4ca5286d9d0a570
--- 1/arch/arm/mach-pxa/time.c
--- 2/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@@ -22,8 -22,8 +22,8 @@@
   #include <asm/div64.h>
   #include <asm/mach/irq.h>
   #include <asm/mach/time.h>
+ #include <mach/hardware.h>
   #include <mach/pxa-regs.h>
- #include <asm/mach-types.h>
   
   /*
    * This is PXA's sched_clock implementation. This has a resolution
@@@ -122,6 -122,7 +122,6 @@@ static struct clock_event_device ckevt_
         .features       = CLOCK_EVT_FEAT_ONESHOT,
         .shift          = 32,
         .rating         = 200,
- -      .cpumask        = CPU_MASK_CPU0,
         .set_next_event = pxa_osmr0_set_next_event,
         .set_mode       = pxa_osmr0_set_mode,
   };
@@@ -149,18 -150,11 +149,11 @@@ static struct irqaction pxa_ost0_irq = 
   
   static void __init pxa_timer_init(void)
   {
-       unsigned long clock_tick_rate;
+       unsigned long clock_tick_rate = get_clock_tick_rate();
   
         OIER = 0;
         OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
   
-       if (cpu_is_pxa25x())
-               clock_tick_rate = 3686400;
-       else if (machine_is_mainstone())
-               clock_tick_rate = 3249600;
-       else
-               clock_tick_rate = 3250000;
- 
         set_oscr2ns_scale(clock_tick_rate);
   
         ckevt_pxa_osmr0.mult =
@@@ -169,7 -163,6 +162,7 @@@
                 clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0);
         ckevt_pxa_osmr0.min_delta_ns =
                 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
+ +      ckevt_pxa_osmr0.cpumask = cpumask_of(0);
   
         cksrc_pxa_oscr0.mult =
                 clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
diff --combined arch/arm/mach-realview/core.c

index b07cb9b7adb15d5f4a6580ecc9315a401184ae25,5f1d55963cedb8e7b4d3cbd0b6e71a24c84b10c9..bd2aa4f16141d72895e655f63efe75cf6ffcd298
--- 1/arch/arm/mach-realview/core.c
--- 2/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@@ -28,11 -28,14 +28,14 @@@
   #include <linux/clocksource.h>
   #include <linux/clockchips.h>
   #include <linux/io.h>
+ #include <linux/smc911x.h>
   
+ #include <asm/clkdev.h>
   #include <asm/system.h>
   #include <mach/hardware.h>
   #include <asm/irq.h>
   #include <asm/leds.h>
+ #include <asm/mach-types.h>
   #include <asm/hardware/arm_timer.h>
   #include <asm/hardware/icst307.h>
   
@@@ -49,7 -52,7 +52,7 @@@
   
   #define REALVIEW_REFCOUNTER   (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
   
- /* used by entry-macro.S */
+ /* used by entry-macro.S and platsmp.c */
   void __iomem *gic_cpu_base_addr;
   
   /*
@@@ -124,6 -127,29 +127,29 @@@ int realview_flash_register(struct reso
         return platform_device_register(&realview_flash_device);
   }
   
+ static struct smc911x_platdata realview_smc911x_platdata = {
+       .flags          = SMC911X_USE_32BIT,
+       .irq_flags      = IRQF_SHARED,
+       .irq_polarity   = 1,
+ };
+ 
+ static struct platform_device realview_eth_device = {
+       .name           = "smc911x",
+       .id             = 0,
+       .num_resources  = 2,
+ };
+ 
+ int realview_eth_register(const char *name, struct resource *res)
+ {
+       if (name)
+               realview_eth_device.name = name;
+       realview_eth_device.resource = res;
+       if (strcmp(realview_eth_device.name, "smc911x") == 0)
+               realview_eth_device.dev.platform_data = &realview_smc911x_platdata;
+ 
+       return platform_device_register(&realview_eth_device);
+ }
+ 
   static struct resource realview_i2c_resource = {
         .start          = REALVIEW_I2C_BASE,
         .end            = REALVIEW_I2C_BASE + SZ_4K - 1,
@@@ -177,9 -203,14 +203,14 @@@ static const struct icst307_params real
   static void realview_oscvco_set(struct clk *clk, struct icst307_vco vco)
   {
         void __iomem *sys_lock = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_LOCK_OFFSET;
-       void __iomem *sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC4_OFFSET;
+       void __iomem *sys_osc;
         u32 val;
   
+       if (machine_is_realview_pb1176())
+               sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC0_OFFSET;
+       else
+               sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC4_OFFSET;
+ 
         val = readl(sys_osc) & ~0x7ffff;
         val |= vco.v | (vco.r << 9) | (vco.s << 16);
   
@@@ -188,12 -219,59 +219,59 @@@
         writel(0, sys_lock);
   }
   
- struct clk realview_clcd_clk = {
-       .name   = "CLCDCLK",
+ static struct clk oscvco_clk = {
         .params = &realview_oscvco_params,
         .setvco = realview_oscvco_set,
   };
   
+ /*
+  * These are fixed clocks.
+  */
+ static struct clk ref24_clk = {
+       .rate   = 24000000,
+ };
+ 
+ static struct clk_lookup lookups[] = {
+       {       /* UART0 */
+               .dev_id         = "dev:f1",
+               .clk            = &ref24_clk,
+       }, {    /* UART1 */
+               .dev_id         = "dev:f2",
+               .clk            = &ref24_clk,
+       }, {    /* UART2 */
+               .dev_id         = "dev:f3",
+               .clk            = &ref24_clk,
+       }, {    /* UART3 */
+               .dev_id         = "fpga:09",
+               .clk            = &ref24_clk,
+       }, {    /* KMI0 */
+               .dev_id         = "fpga:06",
+               .clk            = &ref24_clk,
+       }, {    /* KMI1 */
+               .dev_id         = "fpga:07",
+               .clk            = &ref24_clk,
+       }, {    /* MMC0 */
+               .dev_id         = "fpga:05",
+               .clk            = &ref24_clk,
+       }, {    /* EB:CLCD */
+               .dev_id         = "dev:20",
+               .clk            = &oscvco_clk,
+       }, {    /* PB:CLCD */
+               .dev_id         = "issp:20",
+               .clk            = &oscvco_clk,
+       }
+ };
+ 
+ static int __init clk_init(void)
+ {
+       int i;
+ 
+       for (i = 0; i < ARRAY_SIZE(lookups); i++)
+               clkdev_add(&lookups[i]);
+       return 0;
+ }
+ arch_initcall(clk_init);
+ 
   /*
    * CLCD support.
    */
@@@ -226,7 -304,30 +304,30 @@@ static struct clcd_panel vga = 
         .width          = -1,
         .height         = -1,
         .tim2           = TIM2_BCD | TIM2_IPC,
-       .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+       .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+       .bpp            = 16,
+ };
+ 
+ static struct clcd_panel xvga = {
+       .mode           = {
+               .name           = "XVGA",
+               .refresh        = 60,
+               .xres           = 1024,
+               .yres           = 768,
+               .pixclock       = 15748,
+               .left_margin    = 152,
+               .right_margin   = 48,
+               .upper_margin   = 23,
+               .lower_margin   = 3,
+               .hsync_len      = 104,
+               .vsync_len      = 4,
+               .sync           = 0,
+               .vmode          = FB_VMODE_NONINTERLACED,
+       },
+       .width          = -1,
+       .height         = -1,
+       .tim2           = TIM2_BCD | TIM2_IPC,
+       .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
         .bpp            = 16,
   };
   
@@@ -249,7 -350,7 +350,7 @@@ static struct clcd_panel sanyo_3_8_in 
         .width          = -1,
         .height         = -1,
         .tim2           = TIM2_BCD,
-       .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+       .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
         .bpp            = 16,
   };
   
@@@ -272,7 -373,7 +373,7 @@@ static struct clcd_panel sanyo_2_5_in 
         .width          = -1,
         .height         = -1,
         .tim2           = TIM2_IVS | TIM2_IHS | TIM2_IPC,
-       .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+       .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
         .bpp            = 16,
   };
   
@@@ -295,7 -396,7 +396,7 @@@ static struct clcd_panel epson_2_2_in 
         .width          = -1,
         .height         = -1,
         .tim2           = TIM2_BCD | TIM2_IPC,
-       .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+       .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
         .bpp            = 16,
   };
   
@@@ -308,9 -409,15 +409,15 @@@
   static struct clcd_panel *realview_clcd_panel(void)
   {
         void __iomem *sys_clcd = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_CLCD_OFFSET;
-       struct clcd_panel *panel = &vga;
+       struct clcd_panel *vga_panel;
+       struct clcd_panel *panel;
         u32 val;
   
+       if (machine_is_realview_eb())
+               vga_panel = &vga;
+       else
+               vga_panel = &xvga;
+ 
         val = readl(sys_clcd) & SYS_CLCD_ID_MASK;
         if (val == SYS_CLCD_ID_SANYO_3_8)
                 panel = &sanyo_3_8_in;
@@@ -319,11 -426,11 +426,11 @@@
         else if (val == SYS_CLCD_ID_EPSON_2_2)
                 panel = &epson_2_2_in;
         else if (val == SYS_CLCD_ID_VGA)
-               panel = &vga;
+               panel = vga_panel;
         else {
                 printk(KERN_ERR "CLCD: unknown LCD panel ID 0x%08x, using VGA\n",
                         val);
-               panel = &vga;
+               panel = vga_panel;
         }
   
         return panel;
@@@ -358,12 -465,18 +465,18 @@@ static void realview_clcd_enable(struc
         writel(val, sys_clcd);
   }
   
- static unsigned long framesize = SZ_1M;
- 
   static int realview_clcd_setup(struct clcd_fb *fb)
   {
+       unsigned long framesize;
         dma_addr_t dma;
   
+       if (machine_is_realview_eb())
+               /* VGA, 16bpp */
+               framesize = 640 * 480 * 2;
+       else
+               /* XVGA, 16bpp */
+               framesize = 1024 * 768 * 2;
+ 
         fb->panel               = realview_clcd_panel();
   
         fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
@@@ -511,7 -624,7 +624,7 @@@ static struct clock_event_device timer0
         .set_mode       = timer_set_mode,
         .set_next_event = timer_set_next_event,
         .rating         = 300,
- -      .cpumask        = CPU_MASK_ALL,
+ +      .cpumask        = cpu_all_mask,
   };
   
   static void __init realview_clockevents_init(unsigned int timer_irq)
@@@ -588,7 -701,7 +701,7 @@@ void __init realview_timer_init(unsigne
          * The dummy clock device has to be registered before the main device
          * so that the latter will broadcast the clock events
          */
-       local_timer_setup(smp_processor_id());
+       local_timer_setup();
   #endif
   
         /* 
diff --combined arch/arm/mach-realview/localtimer.c

index 504961ef343c2ef0f0e6be6bc052b997d90405fd,9019ef2e56115ac72f5b359bd2da1d68999a5d7b..67d6d9cc68b2a693b5edc2a89aa8d78125e294d3
--- 1/arch/arm/mach-realview/localtimer.c
--- 2/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@@ -38,18 -38,14 +38,14 @@@ void local_timer_interrupt(void
   
   #ifdef CONFIG_LOCAL_TIMERS
   
- #define TWD_BASE(cpu) (twd_base_addr + (cpu) * twd_size)
- 
   /* set up by the platform code */
- void __iomem *twd_base_addr;
- unsigned int twd_size;
+ void __iomem *twd_base;
   
   static unsigned long mpcore_timer_rate;
   
   static void local_timer_set_mode(enum clock_event_mode mode,
                                  struct clock_event_device *clk)
   {
-       void __iomem *base = TWD_BASE(smp_processor_id());
         unsigned long ctrl;
   
         switch(mode) {
@@@ -68,17 -64,16 +64,16 @@@
                 ctrl = 0;
         }
   
-       __raw_writel(ctrl, base + TWD_TIMER_CONTROL);
+       __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
   }
   
   static int local_timer_set_next_event(unsigned long evt,
                                       struct clock_event_device *unused)
   {
-       void __iomem *base = TWD_BASE(smp_processor_id());
-       unsigned long ctrl = __raw_readl(base + TWD_TIMER_CONTROL);
+       unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL);
   
-       __raw_writel(evt, base + TWD_TIMER_COUNTER);
-       __raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, base + TWD_TIMER_CONTROL);
+       __raw_writel(evt, twd_base + TWD_TIMER_COUNTER);
+       __raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, twd_base + TWD_TIMER_CONTROL);
   
         return 0;
   }
@@@ -91,19 -86,16 +86,16 @@@
    */
   int local_timer_ack(void)
   {
-       void __iomem *base = TWD_BASE(smp_processor_id());
- 
-       if (__raw_readl(base + TWD_TIMER_INTSTAT)) {
-               __raw_writel(1, base + TWD_TIMER_INTSTAT);
+       if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) {
+               __raw_writel(1, twd_base + TWD_TIMER_INTSTAT);
                 return 1;
         }
   
         return 0;
   }
   
- static void __cpuinit twd_calibrate_rate(unsigned int cpu)
+ static void __cpuinit twd_calibrate_rate(void)
   {
-       void __iomem *base = TWD_BASE(cpu);
         unsigned long load, count;
         u64 waitjiffies;
   
@@@ -124,15 -116,15 +116,15 @@@
                 waitjiffies += 5;
   
                                  /* enable, no interrupt or reload */
-               __raw_writel(0x1, base + TWD_TIMER_CONTROL);
+               __raw_writel(0x1, twd_base + TWD_TIMER_CONTROL);
   
                                  /* maximum value */
-               __raw_writel(0xFFFFFFFFU, base + TWD_TIMER_COUNTER);
+               __raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
   
                 while (get_jiffies_64() < waitjiffies)
                         udelay(10);
   
-               count = __raw_readl(base + TWD_TIMER_COUNTER);
+               count = __raw_readl(twd_base + TWD_TIMER_COUNTER);
   
                 mpcore_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
   
@@@ -142,18 -134,19 +134,19 @@@
   
         load = mpcore_timer_rate / HZ;
   
-       __raw_writel(load, base + TWD_TIMER_LOAD);
+       __raw_writel(load, twd_base + TWD_TIMER_LOAD);
   }
   
   /*
    * Setup the local clock events for a CPU.
    */
- void __cpuinit local_timer_setup(unsigned int cpu)
+ void __cpuinit local_timer_setup(void)
   {
+       unsigned int cpu = smp_processor_id();
         struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
         unsigned long flags;
   
-       twd_calibrate_rate(cpu);
+       twd_calibrate_rate();
   
         clk->name               = "local_timer";
         clk->features           = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
@@@ -161,7 -154,7 +154,7 @@@
         clk->set_mode           = local_timer_set_mode;
         clk->set_next_event     = local_timer_set_next_event;
         clk->irq                = IRQ_LOCALTIMER;
- -      clk->cpumask            = cpumask_of_cpu(cpu);
+ +      clk->cpumask            = cpumask_of(cpu);
         clk->shift              = 20;
         clk->mult               = div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
         clk->max_delta_ns       = clockevent_delta2ns(0xffffffff, clk);
@@@ -178,9 -171,9 +171,9 @@@
   /*
    * take a local timer down
    */
- void __cpuexit local_timer_stop(unsigned int cpu)
+ void __cpuexit local_timer_stop(void)
   {
-       __raw_writel(0, TWD_BASE(cpu) + TWD_TIMER_CONTROL);
+       __raw_writel(0, twd_base + TWD_TIMER_CONTROL);
   }
   
   #else /* CONFIG_LOCAL_TIMERS */
@@@ -190,8 -183,9 +183,9 @@@ static void dummy_timer_set_mode(enum c
   {
   }
   
- void __cpuinit local_timer_setup(unsigned int cpu)
+ void __cpuinit local_timer_setup(void)
   {
+       unsigned int cpu = smp_processor_id();
         struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
   
         clk->name               = "dummy_timer";
@@@ -199,7 -193,7 +193,7 @@@
         clk->rating             = 200;
         clk->set_mode           = dummy_timer_set_mode;
         clk->broadcast          = smp_timer_broadcast;
- -      clk->cpumask            = cpumask_of_cpu(cpu);
+ +      clk->cpumask            = cpumask_of(cpu);
   
         clockevents_register_device(clk);
   }
diff --combined arch/arm/mach-sa1100/time.c

index 1cac4ac0b4b89e7af0dc56116817d163744a542c,8c5e727f3b751ffb0e87b176402ae75adb276cfc..711c0295c66f1710de34e20f3d9a88c2eeb99609
--- 1/arch/arm/mach-sa1100/time.c
--- 2/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@@ -2,8 -2,8 +2,8 @@@
    * linux/arch/arm/mach-sa1100/time.c
    *
    * Copyright (C) 1998 Deborah Wallach.
-  * Twiddles  (C) 1999         Hugo Fiennes <hugo@empeg.com>
-  * 
+  * Twiddles  (C) 1999 Hugo Fiennes <hugo@empeg.com>
+  *
    * 2000/03/29 (C) Nicolas Pitre <nico@cam.org>
    *    Rewritten: big cleanup, much simpler, better HZ accuracy.
    *
@@@ -73,6 -73,7 +73,6 @@@ static struct clock_event_device ckevt_
         .features       = CLOCK_EVT_FEAT_ONESHOT,
         .shift          = 32,
         .rating         = 200,
- -      .cpumask        = CPU_MASK_CPU0,
         .set_next_event = sa1100_osmr0_set_next_event,
         .set_mode       = sa1100_osmr0_set_mode,
   };
@@@ -109,7 -110,6 +109,7 @@@ static void __init sa1100_timer_init(vo
                 clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0);
         ckevt_sa1100_osmr0.min_delta_ns =
                 clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
+ +      ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
   
         cksrc_sa1100_oscr.mult =
                 clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
diff --combined arch/arm/mach-versatile/core.c

index a3f1933434e261d604c20f665a8b2b6dde450b0b,df25aa138509c95aec6dfcb391f8409de5b182a8..1c43494f5c422092d713a860e1623e83078b9d05
--- 1/arch/arm/mach-versatile/core.c
--- 2/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@@ -31,6 -31,7 +31,7 @@@
   #include <linux/cnt32_to_63.h>
   #include <linux/io.h>
   
+ #include <asm/clkdev.h>
   #include <asm/system.h>
   #include <mach/hardware.h>
   #include <asm/irq.h>
@@@ -373,22 -374,60 +374,60 @@@ static const struct icst307_params vers
   
   static void versatile_oscvco_set(struct clk *clk, struct icst307_vco vco)
   {
-       void __iomem *sys_lock = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_LOCK_OFFSET;
-       void __iomem *sys_osc = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_OSCCLCD_OFFSET;
+       void __iomem *sys = __io_address(VERSATILE_SYS_BASE);
+       void __iomem *sys_lock = sys + VERSATILE_SYS_LOCK_OFFSET;
         u32 val;
   
-       val = readl(sys_osc) & ~0x7ffff;
+       val = readl(sys + clk->oscoff) & ~0x7ffff;
         val |= vco.v | (vco.r << 9) | (vco.s << 16);
   
         writel(0xa05f, sys_lock);
-       writel(val, sys_osc);
+       writel(val, sys + clk->oscoff);
         writel(0, sys_lock);
   }
   
- static struct clk versatile_clcd_clk = {
-       .name   = "CLCDCLK",
+ static struct clk osc4_clk = {
         .params = &versatile_oscvco_params,
-       .setvco = versatile_oscvco_set,
+       .oscoff = VERSATILE_SYS_OSCCLCD_OFFSET,
+       .setvco = versatile_oscvco_set,
+ };
+ 
+ /*
+  * These are fixed clocks.
+  */
+ static struct clk ref24_clk = {
+       .rate   = 24000000,
+ };
+ 
+ static struct clk_lookup lookups[] __initdata = {
+       {       /* UART0 */
+               .dev_id         = "dev:f1",
+               .clk            = &ref24_clk,
+       }, {    /* UART1 */
+               .dev_id         = "dev:f2",
+               .clk            = &ref24_clk,
+       }, {    /* UART2 */
+               .dev_id         = "dev:f3",
+               .clk            = &ref24_clk,
+       }, {    /* UART3 */
+               .dev_id         = "fpga:09",
+               .clk            = &ref24_clk,
+       }, {    /* KMI0 */
+               .dev_id         = "fpga:06",
+               .clk            = &ref24_clk,
+       }, {    /* KMI1 */
+               .dev_id         = "fpga:07",
+               .clk            = &ref24_clk,
+       }, {    /* MMC0 */
+               .dev_id         = "fpga:05",
+               .clk            = &ref24_clk,
+       }, {    /* MMC1 */
+               .dev_id         = "fpga:0b",
+               .clk            = &ref24_clk,
+       }, {    /* CLCD */
+               .dev_id         = "dev:20",
+               .clk            = &osc4_clk,
+       }
   };
   
   /*
@@@ -786,7 -825,8 +825,8 @@@ void __init versatile_init(void
   {
         int i;
   
-       clk_register(&versatile_clcd_clk);
+       for (i = 0; i < ARRAY_SIZE(lookups); i++)
+               clkdev_add(&lookups[i]);
   
         platform_device_register(&versatile_flash_device);
         platform_device_register(&versatile_i2c_device);
@@@ -965,7 -1005,7 +1005,7 @@@ static void __init versatile_timer_init
         timer0_clockevent.min_delta_ns =
                 clockevent_delta2ns(0xf, &timer0_clockevent);
   
- -      timer0_clockevent.cpumask = cpumask_of_cpu(0);
+ +      timer0_clockevent.cpumask = cpumask_of(0);
         clockevents_register_device(&timer0_clockevent);
   }
   
diff --combined arch/sparc/kernel/irq_64.c

index 4aaf18e83c8c24766c4a18ae5a3716f7a6249402,a3ea2bcb95de6a39ebb7bf297ee2355f507f178c..cab8e02868716d691a38b9bad239ec754dd39134
--- 1/arch/sparc64/kernel/irq.c
--- 2/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@@ -312,8 -312,7 +312,8 @@@ static void sun4u_irq_enable(unsigned i
         }
   }
   
- -static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
+ +static void sun4u_set_affinity(unsigned int virt_irq,
+ +                             const struct cpumask *mask)
   {
         sun4u_irq_enable(virt_irq);
   }
@@@ -363,8 -362,7 +363,8 @@@ static void sun4v_irq_enable(unsigned i
                        ino, err);
   }
   
- -static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
+ +static void sun4v_set_affinity(unsigned int virt_irq,
+ +                             const struct cpumask *mask)
   {
         unsigned int ino = virt_irq_table[virt_irq].dev_ino;
         unsigned long cpuid = irq_choose_cpu(virt_irq);
@@@ -431,8 -429,7 +431,8 @@@ static void sun4v_virq_enable(unsigned 
                        dev_handle, dev_ino, err);
   }
   
- -static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
+ +static void sun4v_virt_set_affinity(unsigned int virt_irq,
+ +                                  const struct cpumask *mask)
   {
         unsigned long cpuid, dev_handle, dev_ino;
         int err;
@@@ -778,6 -775,69 +778,69 @@@ void do_softirq(void
         local_irq_restore(flags);
   }
   
+ static void unhandled_perf_irq(struct pt_regs *regs)
+ {
+       unsigned long pcr, pic;
+ 
+       read_pcr(pcr);
+       read_pic(pic);
+ 
+       write_pcr(0);
+ 
+       printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
+              smp_processor_id());
+       printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
+              smp_processor_id(), pcr, pic);
+ }
+ 
+ /* Almost a direct copy of the powerpc PMC code.  */
+ static DEFINE_SPINLOCK(perf_irq_lock);
+ static void *perf_irq_owner_caller; /* mostly for debugging */
+ static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
+ 
+ /* Invoked from level 15 PIL handler in trap table.  */
+ void perfctr_irq(int irq, struct pt_regs *regs)
+ {
+       clear_softint(1 << irq);
+       perf_irq(regs);
+ }
+ 
+ int register_perfctr_intr(void (*handler)(struct pt_regs *))
+ {
+       int ret;
+ 
+       if (!handler)
+               return -EINVAL;
+ 
+       spin_lock(&perf_irq_lock);
+       if (perf_irq != unhandled_perf_irq) {
+               printk(KERN_WARNING "register_perfctr_intr: "
+                      "perf IRQ busy (reserved by caller %p)\n",
+                      perf_irq_owner_caller);
+               ret = -EBUSY;
+               goto out;
+       }
+ 
+       perf_irq_owner_caller = __builtin_return_address(0);
+       perf_irq = handler;
+ 
+       ret = 0;
+ out:
+       spin_unlock(&perf_irq_lock);
+ 
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(register_perfctr_intr);
+ 
+ void release_perfctr_intr(void (*handler)(struct pt_regs *))
+ {
+       spin_lock(&perf_irq_lock);
+       perf_irq_owner_caller = NULL;
+       perf_irq = unhandled_perf_irq;
+       spin_unlock(&perf_irq_lock);
+ }
+ EXPORT_SYMBOL_GPL(release_perfctr_intr);
+ 
   #ifdef CONFIG_HOTPLUG_CPU
   void fixup_irqs(void)
   {
@@@ -791,7 -851,7 +854,7 @@@
                     !(irq_desc[irq].status & IRQ_PER_CPU)) {
                         if (irq_desc[irq].chip->set_affinity)
                                 irq_desc[irq].chip->set_affinity(irq,
- -                                      irq_desc[irq].affinity);
+ +                                      &irq_desc[irq].affinity);
                 }
                 spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
         }
diff --combined arch/sparc/kernel/of_device_64.c

index 4f6098d318ec21993f6173769eeca4898989fdbb,46e231f7c5ce2c37cf51b6111a0175f9dcc4280e..4873f28905b082b07859d15389984b8f7dff55a8
--- 1/arch/sparc64/kernel/of_device.c
--- 2/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@@ -778,9 -778,9 +778,9 @@@ static unsigned int __init build_one_de
   out:
         nid = of_node_to_nid(dp);
         if (nid != -1) {
- -              cpumask_t numa_mask = node_to_cpumask(nid);
+ +              cpumask_t numa_mask = *cpumask_of_node(nid);
   
- -              irq_set_affinity(irq, numa_mask);
+ +              irq_set_affinity(irq, &numa_mask);
         }
   
         return irq;
@@@ -811,20 -811,20 +811,20 @@@ static struct of_device * __init scan_o
   
         irq = of_get_property(dp, "interrupts", &len);
         if (irq) {
-               memcpy(op->irqs, irq, len);
                 op->num_irqs = len / 4;
+ 
+               /* Prevent overrunning the op->irqs[] array.  */
+               if (op->num_irqs > PROMINTR_MAX) {
+                       printk(KERN_WARNING "%s: Too many irqs (%d), "
+                              "limiting to %d.\n",
+                              dp->full_name, op->num_irqs, PROMINTR_MAX);
+                       op->num_irqs = PROMINTR_MAX;
+               }
+               memcpy(op->irqs, irq, op->num_irqs * 4);
         } else {
                 op->num_irqs = 0;
         }
   
-       /* Prevent overrunning the op->irqs[] array.  */
-       if (op->num_irqs > PROMINTR_MAX) {
-               printk(KERN_WARNING "%s: Too many irqs (%d), "
-                      "limiting to %d.\n",
-                      dp->full_name, op->num_irqs, PROMINTR_MAX);
-               op->num_irqs = PROMINTR_MAX;
-       }
- 
         build_device_resources(op, parent);
         for (i = 0; i < op->num_irqs; i++)
                 op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]);
diff --combined arch/sparc/kernel/pci_msi.c

index 4ef282e8191208b4bc954614d8f1b9d675385177,2e680f34f727fa61f5defef92e63b8144e365d70..4ef282e8191208b4bc954614d8f1b9d675385177
--- 1/arch/sparc64/kernel/pci_msi.c
--- 2/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@@ -286,9 -286,9 +286,9 @@@ static int bringup_one_msi_queue(struc
   
         nid = pbm->numa_node;
         if (nid != -1) {
- -              cpumask_t numa_mask = node_to_cpumask(nid);
+ +              cpumask_t numa_mask = *cpumask_of_node(nid);
   
- -              irq_set_affinity(irq, numa_mask);
+ +              irq_set_affinity(irq, &numa_mask);
         }
         err = request_irq(irq, sparc64_msiq_interrupt, 0,
                           "MSIQ",
diff --combined arch/sparc/kernel/smp_32.c

index 1e5ac4e282e1285030aaa43380b91c01b7bcff48,e396c1f17a922deaef7b03a0751d06f1345770fd..1e5ac4e282e1285030aaa43380b91c01b7bcff48
--- 1/arch/sparc/kernel/smp.c
--- 2/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@@ -39,6 -39,8 +39,6 @@@ volatile unsigned long cpu_callin_map[N
   unsigned char boot_cpu_id = 0;
   unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
   
- -cpumask_t cpu_online_map = CPU_MASK_NONE;
- -cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
   cpumask_t smp_commenced_mask = CPU_MASK_NONE;
   
   /* The only guaranteed locking primitive available on all Sparc
@@@ -332,7 -334,7 +332,7 @@@ void __init smp_setup_cpu_possible_map(
         instance = 0;
         while (!cpu_find_by_instance(instance, NULL, &mid)) {
                 if (mid < NR_CPUS) {
- -                      cpu_set(mid, phys_cpu_present_map);
+ +                      cpu_set(mid, cpu_possible_map);
                         cpu_set(mid, cpu_present_map);
                 }
                 instance++;
@@@ -352,7 -354,7 +352,7 @@@ void __init smp_prepare_boot_cpu(void
   
         current_thread_info()->cpu = cpuid;
         cpu_set(cpuid, cpu_online_map);
- -      cpu_set(cpuid, phys_cpu_present_map);
+ +      cpu_set(cpuid, cpu_possible_map);
   }
   
   int __cpuinit __cpu_up(unsigned int cpu)
diff --combined arch/sparc/kernel/smp_64.c

index a97b8822c22ca029acc8ea1fe0fc6cc90c88efd0,bfe99d82d458702d32bf52a863cf00345e04a8e2..46329799f3462bb4002024558be74f204e679f07
--- 1/arch/sparc64/kernel/smp.c
--- 2/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@@ -49,10 -49,14 +49,10 @@@
   
   int sparc64_multi_core __read_mostly;
   
- -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
- -cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
   DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
   cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
         { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
   
- -EXPORT_SYMBOL(cpu_possible_map);
- -EXPORT_SYMBOL(cpu_online_map);
   EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
   EXPORT_SYMBOL(cpu_core_map);
   
@@@ -159,7 -163,7 +159,7 @@@ static inline long get_delta (long *rt
         for (i = 0; i < NUM_ITERS; i++) {
                 t0 = tick_ops->get_tick();
                 go[MASTER] = 1;
-               membar_storeload();
+               membar_safe("#StoreLoad");
                 while (!(tm = go[SLAVE]))
                         rmb();
                 go[SLAVE] = 0;
@@@ -253,7 -257,7 +253,7 @@@ static void smp_synchronize_one_tick(in
   
         /* now let the client proceed into his loop */
         go[MASTER] = 0;
-       membar_storeload();
+       membar_safe("#StoreLoad");
   
         spin_lock_irqsave(&itc_sync_lock, flags);
         {
@@@ -263,7 -267,7 +263,7 @@@
                         go[MASTER] = 0;
                         wmb();
                         go[SLAVE] = tick_ops->get_tick();
-                       membar_storeload();
+                       membar_safe("#StoreLoad");
                 }
         }
         spin_unlock_irqrestore(&itc_sync_lock, flags);
@@@ -769,7 -773,7 +769,7 @@@ static void xcall_deliver(u64 data0, u6
   
         /* Setup the initial cpu list.  */
         cnt = 0;
-       for_each_cpu_mask_nr(i, *mask) {
+       for_each_cpu(i, mask) {
                 if (i == this_cpu || !cpu_online(i))
                         continue;
                 cpu_list[cnt++] = i;
@@@ -1118,7 -1122,6 +1118,6 @@@ void smp_capture(void
                        smp_processor_id());
   #endif
                 penguins_are_doing_time = 1;
-               membar_storestore_loadstore();
                 atomic_inc(&smp_capture_registry);
                 smp_cross_call(&xcall_capture, 0, 0, 0);
                 while (atomic_read(&smp_capture_registry) != ncpus)
@@@ -1138,13 -1141,13 +1137,13 @@@ void smp_release(void
                        smp_processor_id());
   #endif
                 penguins_are_doing_time = 0;
-               membar_storeload_storestore();
+               membar_safe("#StoreLoad");
                 atomic_dec(&smp_capture_registry);
         }
   }
   
- /* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they
-  * can service tlb flush xcalls...
+ /* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE
+  * set, so they can service tlb flush xcalls...
    */
   extern void prom_world(int);
   
@@@ -1157,7 -1160,7 +1156,7 @@@ void smp_penguin_jailcell(int irq, stru
         __asm__ __volatile__("flushw");
         prom_world(1);
         atomic_inc(&smp_capture_registry);
-       membar_storeload_storestore();
+       membar_safe("#StoreLoad");
         while (penguins_are_doing_time)
                 rmb();
         atomic_dec(&smp_capture_registry);
diff --combined arch/sparc/kernel/sparc_ksyms_32.c

index 32d11a5fe3a86f9e5f60acfefff09a2dd0d98c41,a4d45fc29b21e4ac9c9c069bdab7b8c277c2f6f5..e1e97639231b208e8b01ee99373771020b49ec2f
--- 1/arch/sparc/kernel/sparc_ksyms.c
--- 2/arch/sparc/kernel/sparc_ksyms_32.c
+++ b/arch/sparc/kernel/sparc_ksyms_32.c
@@@ -61,7 -61,6 +61,6 @@@ extern void (*bzero_1page)(void *)
   extern void *__bzero(void *, size_t);
   extern void *__memscan_zero(void *, size_t);
   extern void *__memscan_generic(void *, int, size_t);
- extern int __memcmp(const void *, const void *, __kernel_size_t);
   extern int __strncmp(const char *, const char *, __kernel_size_t);
   
   extern int __ashrdi3(int, int);
@@@ -113,15 -112,17 +112,13 @@@ EXPORT_PER_CPU_SYMBOL(__cpu_data)
   #ifdef CONFIG_SMP
   /* IRQ implementation. */
   EXPORT_SYMBOL(synchronize_irq);
- -
- -/* CPU online map and active count. */
- -EXPORT_SYMBOL(cpu_online_map);
- -EXPORT_SYMBOL(phys_cpu_present_map);
   #endif
   
   EXPORT_SYMBOL(__udelay);
   EXPORT_SYMBOL(__ndelay);
   EXPORT_SYMBOL(rtc_lock);
- #ifdef CONFIG_SUN_AUXIO
   EXPORT_SYMBOL(set_auxio);
   EXPORT_SYMBOL(get_auxio);
- #endif
   EXPORT_SYMBOL(io_remap_pfn_range);
   
   #ifndef CONFIG_SMP
@@@ -209,7 -210,6 +206,6 @@@ EXPORT_SYMBOL(bzero_1page)
   EXPORT_SYMBOL(__bzero);
   EXPORT_SYMBOL(__memscan_zero);
   EXPORT_SYMBOL(__memscan_generic);
- EXPORT_SYMBOL(__memcmp);
   EXPORT_SYMBOL(__strncmp);
   EXPORT_SYMBOL(__memmove);
   
diff --combined arch/sparc/kernel/time_64.c

index 9df8f095a8b11a59e448bec001ac6aec852c480c,141da375909129dea0ab0fffc9d5359d85e9a039..9df8f095a8b11a59e448bec001ac6aec852c480c
--- 1/arch/sparc64/kernel/time.c
--- 2/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@@ -763,7 -763,7 +763,7 @@@ void __devinit setup_sparc64_timer(void
         sevt = &__get_cpu_var(sparc64_events);
   
         memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
- -      sevt->cpumask = cpumask_of_cpu(smp_processor_id());
+ +      sevt->cpumask = cpumask_of(smp_processor_id());
   
         clockevents_register_device(sevt);
   }
diff --combined arch/x86/include/asm/pci.h

index f8959c7a985f4f6f8ce581f1967ad95100fd1b64,66834c41c0493eccf1b117b1565443c10ec706b6..a977de23cb4d83320e5255de21ffc073a0859403
--- 1/arch/x86/include/asm/pci.h
--- 2/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@@ -84,6 -84,8 +84,8 @@@ static inline void pci_dma_burst_advice
   static inline void early_quirks(void) { }
   #endif
   
+ extern void pci_iommu_alloc(void);
+ 
   #endif  /* __KERNEL__ */
   
   #ifdef CONFIG_X86_32
@@@ -100,9 -102,9 +102,9 @@@
   
   #ifdef CONFIG_NUMA
   /* Returns the node based on pci bus */
- -static inline int __pcibus_to_node(struct pci_bus *bus)
+ +static inline int __pcibus_to_node(const struct pci_bus *bus)
   {
- -      struct pci_sysdata *sd = bus->sysdata;
+ +      const struct pci_sysdata *sd = bus->sysdata;
   
         return sd->node;
   }
@@@ -111,12 -113,6 +113,12 @@@ static inline cpumask_t __pcibus_to_cpu
   {
         return node_to_cpumask(__pcibus_to_node(bus));
   }
+ +
+ +static inline const struct cpumask *
+ +cpumask_of_pcibus(const struct pci_bus *bus)
+ +{
+ +      return cpumask_of_node(__pcibus_to_node(bus));
+ +}
   #endif
   
   #endif /* _ASM_X86_PCI_H */
diff --combined arch/x86/kernel/hpet.c

index b5310ff1259e891d1f07891d43e9ee80efb01205,845ea097383ee4051a24b54bca8da4c29bd9f6d1..cd759ad90690e72d109aed4309adeb87755977e5
--- 1/arch/x86/kernel/hpet.c
--- 2/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@@ -248,7 -248,7 +248,7 @@@ static void hpet_legacy_clockevent_regi
          * Start hpet with the boot cpu mask and make it
          * global after the IO_APIC has been initialized.
          */
- -      hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+ +      hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
         clockevents_register_device(&hpet_clockevent);
         global_clock_event = &hpet_clockevent;
         printk(KERN_DEBUG "hpet clockevent registered\n");
@@@ -303,7 -303,7 +303,7 @@@ static void hpet_set_mode(enum clock_ev
                         struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
                         hpet_setup_msi_irq(hdev->irq);
                         disable_irq(hdev->irq);
- -                      irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+ +                      irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
                         enable_irq(hdev->irq);
                 }
                 break;
@@@ -451,7 -451,7 +451,7 @@@ static int hpet_setup_irq(struct hpet_d
                 return -1;
   
         disable_irq(dev->irq);
- -      irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+ +      irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
         enable_irq(dev->irq);
   
         printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@@ -502,7 -502,7 +502,7 @@@ static void init_one_hpet_msi_clockeven
         /* 5 usec minimum reprogramming delta. */
         evt->min_delta_ns = 5000;
   
- -      evt->cpumask = cpumask_of_cpu(hdev->cpu);
+ +      evt->cpumask = cpumask_of(hdev->cpu);
         clockevents_register_device(evt);
   }
   
@@@ -813,7 -813,7 +813,7 @@@ int __init hpet_enable(void
   
   out_nohpet:
         hpet_clear_mapping();
-       boot_hpet_disable = 1;
+       hpet_address = 0;
         return 0;
   }
   
@@@ -836,10 -836,11 +836,11 @@@ static __init int hpet_late_init(void
   
                 hpet_address = force_hpet_address;
                 hpet_enable();
-               if (!hpet_virt_address)
-                       return -ENODEV;
         }
   
+       if (!hpet_virt_address)
+               return -ENODEV;
+ 
         hpet_reserve_platform_timers(hpet_readl(HPET_ID));
   
         for_each_online_cpu(cpu) {
diff --combined arch/x86/kernel/io_apic.c

index 6dbf427175ffa349ee377d86b09c6f339eb8db74,f6ea94b74da146072cca138aa824ed5a3d5eeec7..e7745961ed314c7a03e32ce2db46edf7d44e63c9
--- 1/arch/x86/kernel/io_apic.c
--- 2/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@@ -108,94 -108,253 +108,253 @@@ static int __init parse_noapic(char *st
   early_param("noapic", parse_noapic);
   
   struct irq_pin_list;
+ 
+ /*
+  * This is performance-critical, we want to do it O(1)
+  *
+  * the indexing order of this array favors 1:1 mappings
+  * between pins and IRQs.
+  */
+ 
+ struct irq_pin_list {
+       int apic, pin;
+       struct irq_pin_list *next;
+ };
+ 
+ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+ {
+       struct irq_pin_list *pin;
+       int node;
+ 
+       node = cpu_to_node(cpu);
+ 
+       pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+       printk(KERN_DEBUG "  alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+ 
+       return pin;
+ }
+ 
   struct irq_cfg {
-       unsigned int irq;
         struct irq_pin_list *irq_2_pin;
         cpumask_t domain;
         cpumask_t old_domain;
         unsigned move_cleanup_count;
         u8 vector;
         u8 move_in_progress : 1;
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+       u8 move_desc_pending : 1;
+ #endif
   };
   
   /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+ #ifdef CONFIG_SPARSE_IRQ
+ static struct irq_cfg irq_cfgx[] = {
+ #else
   static struct irq_cfg irq_cfgx[NR_IRQS] = {
-       [0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+ #endif
+       [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+       [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+       [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+       [3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+       [4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+       [5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+       [6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+       [7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+       [8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+       [9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+       [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+       [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+       [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+       [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+       [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
   };
   
- #define for_each_irq_cfg(irq, cfg)            \
-       for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
+ void __init arch_early_irq_init(void)
+ {
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       int count;
+       int i;
+ 
+       cfg = irq_cfgx;
+       count = ARRAY_SIZE(irq_cfgx);
   
+       for (i = 0; i < count; i++) {
+               desc = irq_to_desc(i);
+               desc->chip_data = &cfg[i];
+       }
+ }
+ 
+ #ifdef CONFIG_SPARSE_IRQ
   static struct irq_cfg *irq_cfg(unsigned int irq)
   {
-       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+       struct irq_cfg *cfg = NULL;
+       struct irq_desc *desc;
+ 
+       desc = irq_to_desc(irq);
+       if (desc)
+               cfg = desc->chip_data;
+ 
+       return cfg;
   }
   
- static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
   {
-       return irq_cfg(irq);
+       struct irq_cfg *cfg;
+       int node;
+ 
+       node = cpu_to_node(cpu);
+ 
+       cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+       printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
+ 
+       return cfg;
   }
   
- /*
-  * Rough estimation of how many shared IRQs there are, can be changed
-  * anytime.
-  */
- #define MAX_PLUS_SHARED_IRQS NR_IRQS
- #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+ void arch_init_chip_data(struct irq_desc *desc, int cpu)
+ {
+       struct irq_cfg *cfg;
   
- /*
-  * This is performance-critical, we want to do it O(1)
-  *
-  * the indexing order of this array favors 1:1 mappings
-  * between pins and IRQs.
-  */
+       cfg = desc->chip_data;
+       if (!cfg) {
+               desc->chip_data = get_one_free_irq_cfg(cpu);
+               if (!desc->chip_data) {
+                       printk(KERN_ERR "can not alloc irq_cfg\n");
+                       BUG_ON(1);
+               }
+       }
+ }
   
- struct irq_pin_list {
-       int apic, pin;
-       struct irq_pin_list *next;
- };
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ 
+ static void
+ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+ {
+       struct irq_pin_list *old_entry, *head, *tail, *entry;
+ 
+       cfg->irq_2_pin = NULL;
+       old_entry = old_cfg->irq_2_pin;
+       if (!old_entry)
+               return;
+ 
+       entry = get_one_free_irq_2_pin(cpu);
+       if (!entry)
+               return;
+ 
+       entry->apic     = old_entry->apic;
+       entry->pin      = old_entry->pin;
+       head            = entry;
+       tail            = entry;
+       old_entry       = old_entry->next;
+       while (old_entry) {
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       entry = head;
+                       while (entry) {
+                               head = entry->next;
+                               kfree(entry);
+                               entry = head;
+                       }
+                       /* still use the old one */
+                       return;
+               }
+               entry->apic     = old_entry->apic;
+               entry->pin      = old_entry->pin;
+               tail->next      = entry;
+               tail            = entry;
+               old_entry       = old_entry->next;
+       }
   
- static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
- static struct irq_pin_list *irq_2_pin_ptr;
+       tail->next = NULL;
+       cfg->irq_2_pin = head;
+ }
   
- static void __init irq_2_pin_init(void)
+ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
   {
-       struct irq_pin_list *pin = irq_2_pin_head;
-       int i;
+       struct irq_pin_list *entry, *next;
+ 
+       if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+               return;
   
-       for (i = 1; i < PIN_MAP_SIZE; i++)
-               pin[i-1].next = &pin[i];
+       entry = old_cfg->irq_2_pin;
   
-       irq_2_pin_ptr = &pin[0];
+       while (entry) {
+               next = entry->next;
+               kfree(entry);
+               entry = next;
+       }
+       old_cfg->irq_2_pin = NULL;
   }
   
- static struct irq_pin_list *get_one_free_irq_2_pin(void)
+ void arch_init_copy_chip_data(struct irq_desc *old_desc,
+                                struct irq_desc *desc, int cpu)
   {
-       struct irq_pin_list *pin = irq_2_pin_ptr;
+       struct irq_cfg *cfg;
+       struct irq_cfg *old_cfg;
   
-       if (!pin)
-               panic("can not get more irq_2_pin\n");
+       cfg = get_one_free_irq_cfg(cpu);
   
-       irq_2_pin_ptr = pin->next;
-       pin->next = NULL;
-       return pin;
+       if (!cfg)
+               return;
+ 
+       desc->chip_data = cfg;
+ 
+       old_cfg = old_desc->chip_data;
+ 
+       memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+ 
+       init_copy_irq_2_pin(old_cfg, cfg, cpu);
+ }
+ 
+ static void free_irq_cfg(struct irq_cfg *old_cfg)
+ {
+       kfree(old_cfg);
+ }
+ 
+ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+ {
+       struct irq_cfg *old_cfg, *cfg;
+ 
+       old_cfg = old_desc->chip_data;
+       cfg = desc->chip_data;
+ 
+       if (old_cfg == cfg)
+               return;
+ 
+       if (old_cfg) {
+               free_irq_2_pin(old_cfg, cfg);
+               free_irq_cfg(old_cfg);
+               old_desc->chip_data = NULL;
+       }
   }
   
+ static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+ {
+       struct irq_cfg *cfg = desc->chip_data;
+ 
+       if (!cfg->move_in_progress) {
+               /* it means that domain is not changed */
+               if (!cpus_intersects(desc->affinity, mask))
+                       cfg->move_desc_pending = 1;
+       }
+ }
+ #endif
+ 
+ #else
+ static struct irq_cfg *irq_cfg(unsigned int irq)
+ {
+       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ }
+ 
+ #endif
+ 
+ #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+ {
+ }
+ #endif
+ 
   struct io_apic {
         unsigned int index;
         unsigned int unused[3];
@@@ -237,11 -396,10 +396,10 @@@ static inline void io_apic_modify(unsig
         writel(value, &io_apic->data);
   }
   
- static bool io_apic_level_ack_pending(unsigned int irq)
+ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
   {
         struct irq_pin_list *entry;
         unsigned long flags;
-       struct irq_cfg *cfg = irq_cfg(irq);
   
         spin_lock_irqsave(&ioapic_lock, flags);
         entry = cfg->irq_2_pin;
@@@ -323,13 -481,12 +481,12 @@@ static void ioapic_mask_entry(int apic
   }
   
   #ifdef CONFIG_SMP
- static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
   {
         int apic, pin;
-       struct irq_cfg *cfg;
         struct irq_pin_list *entry;
+       u8 vector = cfg->vector;
   
-       cfg = irq_cfg(irq);
         entry = cfg->irq_2_pin;
         for (;;) {
                 unsigned int reg;
@@@ -359,37 -516,48 +516,49 @@@
         }
   }
   
- static int assign_irq_vector(int irq, cpumask_t mask);
+ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
   
- static void set_ioapic_affinity_irq(unsigned int irq,
-                                   const struct cpumask *mask)
+ static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
   {
         struct irq_cfg *cfg;
         unsigned long flags;
         unsigned int dest;
         cpumask_t tmp;
-       struct irq_desc *desc;
+       unsigned int irq;
   
-       if (!cpumask_intersects(mask, cpu_online_mask))
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
                 return;
   
-       cfg = irq_cfg(irq);
-       if (assign_irq_vector(irq, *mask))
+       irq = desc->irq;
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
                 return;
   
-       cpumask_and(&tmp, &cfg->domain, mask);
+       set_extra_move_desc(desc, mask);
+ 
+       cpus_and(tmp, cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
         /*
          * Only the high 8 bits are valid.
          */
         dest = SET_APIC_LOGICAL_ID(dest);
   
-       desc = irq_to_desc(irq);
         spin_lock_irqsave(&ioapic_lock, flags);
-       __target_IO_APIC_irq(irq, dest, cfg->vector);
-       cpumask_copy(&desc->affinity, mask);
+       __target_IO_APIC_irq(irq, dest, cfg);
+       desc->affinity = mask;
         spin_unlock_irqrestore(&ioapic_lock, flags);
   }
- -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+ 
- -      set_ioapic_affinity_irq_desc(desc, mask);
++static void set_ioapic_affinity_irq(unsigned int irq,
++                                  const struct cpumask *mask)
+ {
+       struct irq_desc *desc;
+ 
+       desc = irq_to_desc(irq);
+ 
++      set_ioapic_affinity_irq_desc(desc, *mask);
+ }
   #endif /* CONFIG_SMP */
   
   /*
@@@ -397,16 -565,18 +566,18 @@@
    * shared ISA-space IRQs, so we have to support them. We are super
    * fast in the common case, and fast for shared ISA-space IRQs.
    */
- static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
   {
-       struct irq_cfg *cfg;
         struct irq_pin_list *entry;
   
-       /* first time to refer irq_cfg, so with new */
-       cfg = irq_cfg_alloc(irq);
         entry = cfg->irq_2_pin;
         if (!entry) {
-               entry = get_one_free_irq_2_pin();
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+                                       apic, pin);
+                       return;
+               }
                 cfg->irq_2_pin = entry;
                 entry->apic = apic;
                 entry->pin = pin;
@@@ -421,7 -591,7 +592,7 @@@
                 entry = entry->next;
         }
   
-       entry->next = get_one_free_irq_2_pin();
+       entry->next = get_one_free_irq_2_pin(cpu);
         entry = entry->next;
         entry->apic = apic;
         entry->pin = pin;
@@@ -430,11 -600,10 +601,10 @@@
   /*
    * Reroute an IRQ to a different pin.
    */
- static void __init replace_pin_at_irq(unsigned int irq,
+ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
                                       int oldapic, int oldpin,
                                       int newapic, int newpin)
   {
-       struct irq_cfg *cfg = irq_cfg(irq);
         struct irq_pin_list *entry = cfg->irq_2_pin;
         int replaced = 0;
   
@@@ -451,18 -620,16 +621,16 @@@
   
         /* why? call replace before add? */
         if (!replaced)
-               add_pin_to_irq(irq, newapic, newpin);
+               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
   }
   
- static inline void io_apic_modify_irq(unsigned int irq,
+ static inline void io_apic_modify_irq(struct irq_cfg *cfg,
                                 int mask_and, int mask_or,
                                 void (*final)(struct irq_pin_list *entry))
   {
         int pin;
-       struct irq_cfg *cfg;
         struct irq_pin_list *entry;
   
-       cfg = irq_cfg(irq);
         for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
                 unsigned int reg;
                 pin = entry->pin;
@@@ -475,9 -642,9 +643,9 @@@
         }
   }
   
- static void __unmask_IO_APIC_irq(unsigned int irq)
+ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
   {
-       io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
   }
   
   #ifdef CONFIG_X86_64
@@@ -492,47 -659,64 +660,64 @@@ void io_apic_sync(struct irq_pin_list *
         readl(&io_apic->data);
   }
   
- static void __mask_IO_APIC_irq(unsigned int irq)
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
   {
-       io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
   }
   #else /* CONFIG_X86_32 */
- static void __mask_IO_APIC_irq(unsigned int irq)
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
   {
-       io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
   }
   
- static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+ static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
   {
-       io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
                         IO_APIC_REDIR_MASKED, NULL);
   }
   
- static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
   {
-       io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
                         IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
   }
   #endif /* CONFIG_X86_32 */
   
- static void mask_IO_APIC_irq (unsigned int irq)
+ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
   {
+       struct irq_cfg *cfg = desc->chip_data;
         unsigned long flags;
   
+       BUG_ON(!cfg);
+ 
         spin_lock_irqsave(&ioapic_lock, flags);
-       __mask_IO_APIC_irq(irq);
+       __mask_IO_APIC_irq(cfg);
         spin_unlock_irqrestore(&ioapic_lock, flags);
   }
   
- static void unmask_IO_APIC_irq (unsigned int irq)
+ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
   {
+       struct irq_cfg *cfg = desc->chip_data;
         unsigned long flags;
   
         spin_lock_irqsave(&ioapic_lock, flags);
-       __unmask_IO_APIC_irq(irq);
+       __unmask_IO_APIC_irq(cfg);
         spin_unlock_irqrestore(&ioapic_lock, flags);
   }
   
+ static void mask_IO_APIC_irq(unsigned int irq)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+ 
+       mask_IO_APIC_irq_desc(desc);
+ }
+ static void unmask_IO_APIC_irq(unsigned int irq)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+ 
+       unmask_IO_APIC_irq_desc(desc);
+ }
+ 
   static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
   {
         struct IO_APIC_route_entry entry;
@@@ -809,7 -993,7 +994,7 @@@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vecto
    */
   static int EISA_ELCR(unsigned int irq)
   {
-       if (irq < 16) {
+       if (irq < NR_IRQS_LEGACY) {
                 unsigned int port = 0x4d0 + (irq >> 3);
                 return (inb(port) >> (irq & 7)) & 1;
         }
@@@ -1034,7 -1218,7 +1219,7 @@@ void unlock_vector_lock(void
         spin_unlock(&vector_lock);
   }
   
- static int __assign_irq_vector(int irq, cpumask_t mask)
+ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
   {
         /*
          * NOTE! The local APIC isn't very good at handling
@@@ -1050,16 -1234,13 +1235,13 @@@
         static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
         unsigned int old_vector;
         int cpu;
-       struct irq_cfg *cfg;
   
-       cfg = irq_cfg(irq);
+       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+               return -EBUSY;
   
         /* Only try and allocate irqs on cpus that are present */
         cpus_and(mask, mask, cpu_online_map);
   
-       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-               return -EBUSY;
- 
         old_vector = cfg->vector;
         if (old_vector) {
                 cpumask_t tmp;
@@@ -1113,24 -1294,22 +1295,22 @@@ next
         return -ENOSPC;
   }
   
- static int assign_irq_vector(int irq, cpumask_t mask)
+ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
   {
         int err;
         unsigned long flags;
   
         spin_lock_irqsave(&vector_lock, flags);
-       err = __assign_irq_vector(irq, mask);
+       err = __assign_irq_vector(irq, cfg, mask);
         spin_unlock_irqrestore(&vector_lock, flags);
         return err;
   }
   
- static void __clear_irq_vector(int irq)
+ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
   {
-       struct irq_cfg *cfg;
         cpumask_t mask;
         int cpu, vector;
   
-       cfg = irq_cfg(irq);
         BUG_ON(!cfg->vector);
   
         vector = cfg->vector;
@@@ -1162,9 -1341,13 +1342,13 @@@ void __setup_vector_irq(int cpu
         /* This function must be called with vector_lock held */
         int irq, vector;
         struct irq_cfg *cfg;
+       struct irq_desc *desc;
   
         /* Mark the inuse vectors */
-       for_each_irq_cfg(irq, cfg) {
+       for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
+               cfg = desc->chip_data;
                 if (!cpu_isset(cpu, cfg->domain))
                         continue;
                 vector = cfg->vector;
@@@ -1215,11 -1398,8 +1399,8 @@@ static inline int IO_APIC_irq_trigger(i
   }
   #endif
   
- static void ioapic_register_intr(int irq, unsigned long trigger)
+ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
   {
-       struct irq_desc *desc;
- 
-       desc = irq_to_desc(irq);
   
         if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
             trigger == IOAPIC_LEVEL)
@@@ -1311,7 -1491,7 +1492,7 @@@ static int setup_ioapic_entry(int apic
         return 0;
   }
   
- static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
                               int trigger, int polarity)
   {
         struct irq_cfg *cfg;
@@@ -1321,10 -1501,10 +1502,10 @@@
         if (!IO_APIC_IRQ(irq))
                 return;
   
-       cfg = irq_cfg(irq);
+       cfg = desc->chip_data;
   
         mask = TARGET_CPUS;
-       if (assign_irq_vector(irq, mask))
+       if (assign_irq_vector(irq, cfg, mask))
                 return;
   
         cpus_and(mask, cfg->domain, mask);
@@@ -1341,12 -1521,12 +1522,12 @@@
                                cfg->vector)) {
                 printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
                        mp_ioapics[apic].mp_apicid, pin);
-               __clear_irq_vector(irq);
+               __clear_irq_vector(irq, cfg);
                 return;
         }
   
-       ioapic_register_intr(irq, trigger);
-       if (irq < 16)
+       ioapic_register_intr(irq, desc, trigger);
+       if (irq < NR_IRQS_LEGACY)
                 disable_8259A_irq(irq);
   
         ioapic_write_entry(apic, pin, entry);
@@@ -1356,6 -1536,9 +1537,9 @@@ static void __init setup_IO_APIC_irqs(v
   {
         int apic, pin, idx, irq;
         int notcon = 0;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
   
         apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
   
@@@ -1387,9 -1570,15 +1571,15 @@@
                         if (multi_timer_check(apic, irq))
                                 continue;
   #endif
-                       add_pin_to_irq(irq, apic, pin);
+                       desc = irq_to_desc_alloc_cpu(irq, cpu);
+                       if (!desc) {
+                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+                               continue;
+                       }
+                       cfg = desc->chip_data;
+                       add_pin_to_irq_cpu(cfg, cpu, apic, pin);
   
-                       setup_IO_APIC_irq(apic, pin, irq,
+                       setup_IO_APIC_irq(apic, pin, irq, desc,
                                         irq_trigger(idx), irq_polarity(idx));
                 }
         }
@@@ -1448,6 -1637,7 +1638,7 @@@ __apicdebuginit(void) print_IO_APIC(voi
         union IO_APIC_reg_03 reg_03;
         unsigned long flags;
         struct irq_cfg *cfg;
+       struct irq_desc *desc;
         unsigned int irq;
   
         if (apic_verbosity == APIC_QUIET)
@@@ -1537,8 -1727,13 +1728,13 @@@
         }
         }
         printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for_each_irq_cfg(irq, cfg) {
-               struct irq_pin_list *entry = cfg->irq_2_pin;
+       for_each_irq_desc(irq, desc) {
+               struct irq_pin_list *entry;
+ 
+               if (!desc)
+                       continue;
+               cfg = desc->chip_data;
+               entry = cfg->irq_2_pin;
                 if (!entry)
                         continue;
                 printk(KERN_DEBUG "IRQ%d ", irq);
@@@ -2022,14 -2217,16 +2218,16 @@@ static unsigned int startup_ioapic_irq(
   {
         int was_pending = 0;
         unsigned long flags;
+       struct irq_cfg *cfg;
   
         spin_lock_irqsave(&ioapic_lock, flags);
-       if (irq < 16) {
+       if (irq < NR_IRQS_LEGACY) {
                 disable_8259A_irq(irq);
                 if (i8259A_irq_pending(irq))
                         was_pending = 1;
         }
-       __unmask_IO_APIC_irq(irq);
+       cfg = irq_cfg(irq);
+       __unmask_IO_APIC_irq(cfg);
         spin_unlock_irqrestore(&ioapic_lock, flags);
   
         return was_pending;
@@@ -2092,35 -2289,37 +2290,37 @@@ static DECLARE_DELAYED_WORK(ir_migratio
    * as simple as edge triggered migration and we can do the irq migration
    * with a simple atomic update to IO-APIC RTE.
    */
- static void migrate_ioapic_irq(int irq, cpumask_t mask)
+ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
   {
         struct irq_cfg *cfg;
-       struct irq_desc *desc;
         cpumask_t tmp, cleanup_mask;
         struct irte irte;
         int modify_ioapic_rte;
         unsigned int dest;
         unsigned long flags;
+       unsigned int irq;
   
         cpus_and(tmp, mask, cpu_online_map);
         if (cpus_empty(tmp))
                 return;
   
+       irq = desc->irq;
         if (get_irte(irq, &irte))
                 return;
   
-       if (assign_irq_vector(irq, mask))
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
                 return;
   
-       cfg = irq_cfg(irq);
+       set_extra_move_desc(desc, mask);
+ 
         cpus_and(tmp, cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
-       desc = irq_to_desc(irq);
         modify_ioapic_rte = desc->status & IRQ_LEVEL;
         if (modify_ioapic_rte) {
                 spin_lock_irqsave(&ioapic_lock, flags);
-               __target_IO_APIC_irq(irq, dest, cfg->vector);
+               __target_IO_APIC_irq(irq, dest, cfg);
                 spin_unlock_irqrestore(&ioapic_lock, flags);
         }
   
@@@ -2142,14 -2341,14 +2342,14 @@@
         desc->affinity = mask;
   }
   
- static int migrate_irq_remapped_level(int irq)
+ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
   {
         int ret = -1;
-       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg = desc->chip_data;
   
-       mask_IO_APIC_irq(irq);
+       mask_IO_APIC_irq_desc(desc);
   
-       if (io_apic_level_ack_pending(irq)) {
+       if (io_apic_level_ack_pending(cfg)) {
                 /*
                  * Interrupt in progress. Migrating irq now will change the
                  * vector information in the IO-APIC RTE and that will confuse
@@@ -2161,14 -2360,15 +2361,15 @@@
         }
   
         /* everthing is clear. we have right of way */
-       migrate_ioapic_irq(irq, desc->pending_mask);
+       migrate_ioapic_irq_desc(desc, desc->pending_mask);
   
         ret = 0;
         desc->status &= ~IRQ_MOVE_PENDING;
         cpus_clear(desc->pending_mask);
   
   unmask:
-       unmask_IO_APIC_irq(irq);
+       unmask_IO_APIC_irq_desc(desc);
+ 
         return ret;
   }
   
@@@ -2178,6 -2378,9 +2379,9 @@@ static void ir_irq_migration(struct wor
         struct irq_desc *desc;
   
         for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
+ 
                 if (desc->status & IRQ_MOVE_PENDING) {
                         unsigned long flags;
   
@@@ -2189,7 -2392,7 +2393,7 @@@
                                 continue;
                         }
   
- -                      desc->chip->set_affinity(irq, desc->pending_mask);
+ +                      desc->chip->set_affinity(irq, &desc->pending_mask);
                         spin_unlock_irqrestore(&desc->lock, flags);
                 }
         }
@@@ -2198,19 -2401,22 +2402,23 @@@
   /*
    * Migrates the IRQ destination in the process context.
    */
- static void set_ir_ioapic_affinity_irq(unsigned int irq,
-                                      const struct cpumask *mask)
+ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
   {
-       struct irq_desc *desc = irq_to_desc(irq);
- 
         if (desc->status & IRQ_LEVEL) {
                 desc->status |= IRQ_MOVE_PENDING;
-               cpumask_copy(&desc->pending_mask, mask);
-               migrate_irq_remapped_level(irq);
+               desc->pending_mask = mask;
+               migrate_irq_remapped_level_desc(desc);
                 return;
         }
   
-       migrate_ioapic_irq(irq, *mask);
+       migrate_ioapic_irq_desc(desc, mask);
+ }
- -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
++static void set_ir_ioapic_affinity_irq(unsigned int irq,
++                                     const struct cpumask *mask)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+ 
- -      set_ir_ioapic_affinity_irq_desc(desc, mask);
++      set_ir_ioapic_affinity_irq_desc(desc, *mask);
   }
   #endif
   
@@@ -2229,6 -2435,9 +2437,9 @@@ asmlinkage void smp_irq_move_cleanup_in
                 struct irq_cfg *cfg;
                 irq = __get_cpu_var(vector_irq)[vector];
   
+               if (irq == -1)
+                       continue;
+ 
                 desc = irq_to_desc(irq);
                 if (!desc)
                         continue;
@@@ -2250,19 -2459,40 +2461,40 @@@ unlock
         irq_exit();
   }
   
- static void irq_complete_move(unsigned int irq)
+ static void irq_complete_move(struct irq_desc **descp)
   {
-       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_desc *desc = *descp;
+       struct irq_cfg *cfg = desc->chip_data;
         unsigned vector, me;
   
-       if (likely(!cfg->move_in_progress))
+       if (likely(!cfg->move_in_progress)) {
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+               if (likely(!cfg->move_desc_pending))
+                       return;
+ 
+               /* domain has not changed, but affinity did */
+               me = smp_processor_id();
+               if (cpu_isset(me, desc->affinity)) {
+                       *descp = desc = move_irq_desc(desc, me);
+                       /* get the new one */
+                       cfg = desc->chip_data;
+                       cfg->move_desc_pending = 0;
+               }
+ #endif
                 return;
+       }
   
         vector = ~get_irq_regs()->orig_ax;
         me = smp_processor_id();
         if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
                 cpumask_t cleanup_mask;
   
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+               *descp = desc = move_irq_desc(desc, me);
+               /* get the new one */
+               cfg = desc->chip_data;
+ #endif
+ 
                 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
                 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
                 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
@@@ -2270,8 -2500,9 +2502,9 @@@
         }
   }
   #else
- static inline void irq_complete_move(unsigned int irq) {}
+ static inline void irq_complete_move(struct irq_desc **descp) {}
   #endif
+ 
   #ifdef CONFIG_INTR_REMAP
   static void ack_x2apic_level(unsigned int irq)
   {
@@@ -2282,11 -2513,14 +2515,14 @@@ static void ack_x2apic_edge(unsigned in
   {
         ack_x2APIC_irq();
   }
+ 
   #endif
   
   static void ack_apic_edge(unsigned int irq)
   {
-       irq_complete_move(irq);
+       struct irq_desc *desc = irq_to_desc(irq);
+ 
+       irq_complete_move(&desc);
         move_native_irq(irq);
         ack_APIC_irq();
   }
@@@ -2295,18 -2529,21 +2531,21 @@@ atomic_t irq_mis_count
   
   static void ack_apic_level(unsigned int irq)
   {
+       struct irq_desc *desc = irq_to_desc(irq);
+ 
   #ifdef CONFIG_X86_32
         unsigned long v;
         int i;
   #endif
+       struct irq_cfg *cfg;
         int do_unmask_irq = 0;
   
-       irq_complete_move(irq);
+       irq_complete_move(&desc);
   #ifdef CONFIG_GENERIC_PENDING_IRQ
         /* If we are moving the irq we need to mask it */
-       if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+       if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
                 do_unmask_irq = 1;
-               mask_IO_APIC_irq(irq);
+               mask_IO_APIC_irq_desc(desc);
         }
   #endif
   
@@@ -2330,7 -2567,8 +2569,8 @@@
         * operation to prevent an edge-triggered interrupt escaping meanwhile.
         * The idea is from Manfred Spraul.  --macro
         */
-       i = irq_cfg(irq)->vector;
+       cfg = desc->chip_data;
+       i = cfg->vector;
   
         v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
   #endif
@@@ -2369,17 -2607,18 +2609,18 @@@
                  * accurate and is causing problems then it is a hardware bug
                  * and you can go talk to the chipset vendor about it.
                  */
-               if (!io_apic_level_ack_pending(irq))
+               cfg = desc->chip_data;
+               if (!io_apic_level_ack_pending(cfg))
                         move_masked_irq(irq);
-               unmask_IO_APIC_irq(irq);
+               unmask_IO_APIC_irq_desc(desc);
         }
   
   #ifdef CONFIG_X86_32
         if (!(v & (1 << (i & 0x1f)))) {
                 atomic_inc(&irq_mis_count);
                 spin_lock(&ioapic_lock);
-               __mask_and_edge_IO_APIC_irq(irq);
-               __unmask_and_level_IO_APIC_irq(irq);
+               __mask_and_edge_IO_APIC_irq(cfg);
+               __unmask_and_level_IO_APIC_irq(cfg);
                 spin_unlock(&ioapic_lock);
         }
   #endif
@@@ -2430,20 -2669,22 +2671,22 @@@ static inline void init_IO_APIC_traps(v
          * Also, we've got to be careful not to trash gate
          * 0x80, because int 0x80 is hm, kind of importantish. ;)
          */
-       for_each_irq_cfg(irq, cfg) {
-               if (IO_APIC_IRQ(irq) && !cfg->vector) {
+       for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
+ 
+               cfg = desc->chip_data;
+               if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
                         /*
                          * Hmm.. We don't have an entry for this,
                          * so default to an old-fashioned 8259
                          * interrupt if we can..
                          */
-                       if (irq < 16)
+                       if (irq < NR_IRQS_LEGACY)
                                 make_8259A_irq(irq);
-                       else {
-                               desc = irq_to_desc(irq);
+                       else
                                 /* Strange. Oh, well.. */
                                 desc->chip = &no_irq_chip;
-                       }
                 }
         }
   }
@@@ -2468,7 -2709,7 +2711,7 @@@ static void unmask_lapic_irq(unsigned i
         apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
   }
   
- static void ack_lapic_irq (unsigned int irq)
+ static void ack_lapic_irq(unsigned int irq)
   {
         ack_APIC_irq();
   }
@@@ -2480,11 -2721,8 +2723,8 @@@ static struct irq_chip lapic_chip __rea
         .ack            = ack_lapic_irq,
   };
   
- static void lapic_register_intr(int irq)
+ static void lapic_register_intr(int irq, struct irq_desc *desc)
   {
-       struct irq_desc *desc;
- 
-       desc = irq_to_desc(irq);
         desc->status &= ~IRQ_LEVEL;
         set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
                                       "edge");
@@@ -2588,7 -2826,9 +2828,9 @@@ int timer_through_8259 __initdata
    */
   static inline void __init check_timer(void)
   {
-       struct irq_cfg *cfg = irq_cfg(0);
+       struct irq_desc *desc = irq_to_desc(0);
+       struct irq_cfg *cfg = desc->chip_data;
+       int cpu = boot_cpu_id;
         int apic1, pin1, apic2, pin2;
         unsigned long flags;
         unsigned int ver;
@@@ -2603,7 -2843,7 +2845,7 @@@
          * get/set the timer IRQ vector:
          */
         disable_8259A_irq(0);
-       assign_irq_vector(0, TARGET_CPUS);
+       assign_irq_vector(0, cfg, TARGET_CPUS);
   
         /*
          * As IRQ0 is to be enabled in the 8259A, the virtual
@@@ -2654,10 -2894,10 +2896,10 @@@
                  * Ok, does IRQ0 through the IOAPIC work?
                  */
                 if (no_pin1) {
-                       add_pin_to_irq(0, apic1, pin1);
+                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
                         setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                 }
-               unmask_IO_APIC_irq(0);
+               unmask_IO_APIC_irq_desc(desc);
                 if (timer_irq_works()) {
                         if (nmi_watchdog == NMI_IO_APIC) {
                                 setup_nmi();
@@@ -2683,9 -2923,9 +2925,9 @@@
                 /*
                  * legacy devices should be connected to IO APIC #0
                  */
-               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
                 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-               unmask_IO_APIC_irq(0);
+               unmask_IO_APIC_irq_desc(desc);
                 enable_8259A_irq(0);
                 if (timer_irq_works()) {
                         apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@@ -2717,7 -2957,7 +2959,7 @@@
         apic_printk(APIC_QUIET, KERN_INFO
                     "...trying to set up timer as Virtual Wire IRQ...\n");
   
-       lapic_register_intr(0);
+       lapic_register_intr(0, desc);
         apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
         enable_8259A_irq(0);
   
@@@ -2902,22 -3142,26 +3144,26 @@@ unsigned int create_irq_nr(unsigned in
         unsigned int irq;
         unsigned int new;
         unsigned long flags;
-       struct irq_cfg *cfg_new;
- 
-       irq_want = nr_irqs - 1;
+       struct irq_cfg *cfg_new = NULL;
+       int cpu = boot_cpu_id;
+       struct irq_desc *desc_new = NULL;
   
         irq = 0;
         spin_lock_irqsave(&vector_lock, flags);
-       for (new = irq_want; new > 0; new--) {
+       for (new = irq_want; new < NR_IRQS; new++) {
                 if (platform_legacy_irq(new))
                         continue;
-               cfg_new = irq_cfg(new);
-               if (cfg_new && cfg_new->vector != 0)
+ 
+               desc_new = irq_to_desc_alloc_cpu(new, cpu);
+               if (!desc_new) {
+                       printk(KERN_INFO "can not get irq_desc for %d\n", new);
                         continue;
-               /* check if need to create one */
-               if (!cfg_new)
-                       cfg_new = irq_cfg_alloc(new);
-               if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+               }
+               cfg_new = desc_new->chip_data;
+ 
+               if (cfg_new->vector != 0)
+                       continue;
+               if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
                         irq = new;
                 break;
         }
@@@ -2925,15 -3169,21 +3171,21 @@@
   
         if (irq > 0) {
                 dynamic_irq_init(irq);
+               /* restore it, in case dynamic_irq_init clear it */
+               if (desc_new)
+                       desc_new->chip_data = cfg_new;
         }
         return irq;
   }
   
+ static int nr_irqs_gsi = NR_IRQS_LEGACY;
   int create_irq(void)
   {
+       unsigned int irq_want;
         int irq;
   
-       irq = create_irq_nr(nr_irqs - 1);
+       irq_want = nr_irqs_gsi;
+       irq = create_irq_nr(irq_want);
   
         if (irq == 0)
                 irq = -1;
@@@ -2944,14 -3194,22 +3196,22 @@@
   void destroy_irq(unsigned int irq)
   {
         unsigned long flags;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
   
+       /* store it, in case dynamic_irq_cleanup clear it */
+       desc = irq_to_desc(irq);
+       cfg = desc->chip_data;
         dynamic_irq_cleanup(irq);
+       /* connect back irq_cfg */
+       if (desc)
+               desc->chip_data = cfg;
   
   #ifdef CONFIG_INTR_REMAP
         free_irte(irq);
   #endif
         spin_lock_irqsave(&vector_lock, flags);
-       __clear_irq_vector(irq);
+       __clear_irq_vector(irq, cfg);
         spin_unlock_irqrestore(&vector_lock, flags);
   }
   
@@@ -2966,12 -3224,12 +3226,12 @@@ static int msi_compose_msg(struct pci_d
         unsigned dest;
         cpumask_t tmp;
   
+       cfg = irq_cfg(irq);
         tmp = TARGET_CPUS;
-       err = assign_irq_vector(irq, tmp);
+       err = assign_irq_vector(irq, cfg, tmp);
         if (err)
                 return err;
   
-       cfg = irq_cfg(irq);
         cpus_and(tmp, cfg->domain, tmp);
         dest = cpu_mask_to_apicid(tmp);
   
@@@ -3027,61 -3285,64 +3287,63 @@@
   }
   
   #ifdef CONFIG_SMP
- -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+ +static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
   {
+       struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         struct msi_msg msg;
         unsigned int dest;
         cpumask_t tmp;
-       struct irq_desc *desc;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
-       if (assign_irq_vector(irq, *mask))
+       cfg = desc->chip_data;
- -      if (assign_irq_vector(irq, cfg, mask))
++      if (assign_irq_vector(irq, cfg, *mask))
                 return;
   
-       cfg = irq_cfg(irq);
- -      set_extra_move_desc(desc, mask);
++      set_extra_move_desc(desc, *mask);
+ 
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
-       read_msi_msg(irq, &msg);
+       read_msi_msg_desc(desc, &msg);
   
         msg.data &= ~MSI_DATA_VECTOR_MASK;
         msg.data |= MSI_DATA_VECTOR(cfg->vector);
         msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
   
-       write_msi_msg(irq, &msg);
-       desc = irq_to_desc(irq);
+       write_msi_msg_desc(desc, &msg);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
   }
- 
   #ifdef CONFIG_INTR_REMAP
   /*
    * Migrate the MSI irq to another cpumask. This migration is
    * done in the process context using interrupt-remapping hardware.
    */
- -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+ +static void ir_set_msi_irq_affinity(unsigned int irq,
+ +                                  const struct cpumask *mask)
   {
+       struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         unsigned int dest;
         cpumask_t tmp, cleanup_mask;
         struct irte irte;
-       struct irq_desc *desc;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
         if (get_irte(irq, &irte))
                 return;
   
-       if (assign_irq_vector(irq, *mask))
+       cfg = desc->chip_data;
- -      if (assign_irq_vector(irq, cfg, mask))
++      if (assign_irq_vector(irq, cfg, *mask))
                 return;
   
-       cfg = irq_cfg(irq);
- -      set_extra_move_desc(desc, mask);
++      set_extra_move_desc(desc, *mask);
+ 
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
         irte.vector = cfg->vector;
@@@ -3104,9 -3365,9 +3366,9 @@@
                 cfg->move_in_progress = 0;
         }
   
-       desc = irq_to_desc(irq);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
   }
+ 
   #endif
   #endif /* CONFIG_SMP */
   
@@@ -3165,7 -3426,7 +3427,7 @@@ static int msi_alloc_irte(struct pci_de
   }
   #endif
   
- static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
   {
         int ret;
         struct msi_msg msg;
@@@ -3174,7 -3435,7 +3436,7 @@@
         if (ret < 0)
                 return ret;
   
-       set_irq_msi(irq, desc);
+       set_irq_msi(irq, msidesc);
         write_msi_msg(irq, &msg);
   
   #ifdef CONFIG_INTR_REMAP
@@@ -3194,26 -3455,13 +3456,13 @@@
         return 0;
   }
   
- static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
- {
-       unsigned int irq;
- 
-       irq = dev->bus->number;
-       irq <<= 8;
-       irq |= dev->devfn;
-       irq <<= 12;
- 
-       return irq;
- }
- 
- int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
   {
         unsigned int irq;
         int ret;
         unsigned int irq_want;
   
-       irq_want = build_irq_for_pci_dev(dev) + 0x100;
- 
+       irq_want = nr_irqs_gsi;
         irq = create_irq_nr(irq_want);
         if (irq == 0)
                 return -1;
@@@ -3227,7 -3475,7 +3476,7 @@@
                 goto error;
   no_ir:
   #endif
-       ret = setup_msi_irq(dev, desc, irq);
+       ret = setup_msi_irq(dev, msidesc, irq);
         if (ret < 0) {
                 destroy_irq(irq);
                 return ret;
@@@ -3245,7 -3493,7 +3494,7 @@@ int arch_setup_msi_irqs(struct pci_dev 
   {
         unsigned int irq;
         int ret, sub_handle;
-       struct msi_desc *desc;
+       struct msi_desc *msidesc;
         unsigned int irq_want;
   
   #ifdef CONFIG_INTR_REMAP
@@@ -3253,10 -3501,11 +3502,11 @@@
         int index = 0;
   #endif
   
-       irq_want = build_irq_for_pci_dev(dev) + 0x100;
+       irq_want = nr_irqs_gsi;
         sub_handle = 0;
-       list_for_each_entry(desc, &dev->msi_list, list) {
-               irq = create_irq_nr(irq_want--);
+       list_for_each_entry(msidesc, &dev->msi_list, list) {
+               irq = create_irq_nr(irq_want);
+               irq_want++;
                 if (irq == 0)
                         return -1;
   #ifdef CONFIG_INTR_REMAP
@@@ -3288,7 -3537,7 +3538,7 @@@
                 }
   no_ir:
   #endif
-               ret = setup_msi_irq(dev, desc, irq);
+               ret = setup_msi_irq(dev, msidesc, irq);
                 if (ret < 0)
                         goto error;
                 sub_handle++;
@@@ -3307,22 -3556,25 +3557,24 @@@ void arch_teardown_msi_irq(unsigned in
   
   #ifdef CONFIG_DMAR
   #ifdef CONFIG_SMP
- -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+ +static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
   {
+       struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         struct msi_msg msg;
         unsigned int dest;
         cpumask_t tmp;
-       struct irq_desc *desc;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
-       if (assign_irq_vector(irq, *mask))
+       cfg = desc->chip_data;
- -      if (assign_irq_vector(irq, cfg, mask))
++      if (assign_irq_vector(irq, cfg, *mask))
                 return;
   
-       cfg = irq_cfg(irq);
- -      set_extra_move_desc(desc, mask);
++      set_extra_move_desc(desc, *mask);
+ 
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
         dmar_msi_read(irq, &msg);
@@@ -3333,9 -3585,9 +3585,9 @@@
         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
   
         dmar_msi_write(irq, &msg);
-       desc = irq_to_desc(irq);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
   }
+ 
   #endif /* CONFIG_SMP */
   
   struct irq_chip dmar_msi_type = {
@@@ -3367,22 -3619,25 +3619,24 @@@ int arch_setup_dmar_msi(unsigned int ir
   #ifdef CONFIG_HPET_TIMER
   
   #ifdef CONFIG_SMP
- -static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+ +static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
   {
+       struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
-       struct irq_desc *desc;
         struct msi_msg msg;
         unsigned int dest;
         cpumask_t tmp;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
-       if (assign_irq_vector(irq, *mask))
+       cfg = desc->chip_data;
- -      if (assign_irq_vector(irq, cfg, mask))
++      if (assign_irq_vector(irq, cfg, *mask))
                 return;
   
-       cfg = irq_cfg(irq);
- -      set_extra_move_desc(desc, mask);
++      set_extra_move_desc(desc, *mask);
+ 
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
         hpet_msi_read(irq, &msg);
@@@ -3393,9 -3648,9 +3647,9 @@@
         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
   
         hpet_msi_write(irq, &msg);
-       desc = irq_to_desc(irq);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
   }
+ 
   #endif /* CONFIG_SMP */
   
   struct irq_chip hpet_msi_type = {
@@@ -3448,27 -3703,30 +3702,29 @@@ static void target_ht_irq(unsigned int 
         write_ht_irq_msg(irq, &msg);
   }
   
- -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+ +static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
   {
+       struct irq_desc *desc = irq_to_desc(irq);
         struct irq_cfg *cfg;
         unsigned int dest;
         cpumask_t tmp;
-       struct irq_desc *desc;
   
- -      cpus_and(tmp, mask, cpu_online_map);
- -      if (cpus_empty(tmp))
+ +      if (!cpumask_intersects(mask, cpu_online_mask))
                 return;
   
-       if (assign_irq_vector(irq, *mask))
+       cfg = desc->chip_data;
- -      if (assign_irq_vector(irq, cfg, mask))
++      if (assign_irq_vector(irq, cfg, *mask))
                 return;
   
-       cfg = irq_cfg(irq);
- -      set_extra_move_desc(desc, mask);
++      set_extra_move_desc(desc, *mask);
+ 
- -      cpus_and(tmp, cfg->domain, mask);
+ +      cpumask_and(&tmp, &cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
   
         target_ht_irq(irq, dest, cfg->vector);
-       desc = irq_to_desc(irq);
- -      desc->affinity = mask;
+ +      cpumask_copy(&desc->affinity, mask);
   }
+ 
   #endif
   
   static struct irq_chip ht_irq_chip = {
@@@ -3488,13 -3746,13 +3744,13 @@@ int arch_setup_ht_irq(unsigned int irq
         int err;
         cpumask_t tmp;
   
+       cfg = irq_cfg(irq);
         tmp = TARGET_CPUS;
-       err = assign_irq_vector(irq, tmp);
+       err = assign_irq_vector(irq, cfg, tmp);
         if (!err) {
                 struct ht_irq_msg msg;
                 unsigned dest;
   
-               cfg = irq_cfg(irq);
                 cpus_and(tmp, cfg->domain, tmp);
                 dest = cpu_mask_to_apicid(tmp);
   
@@@ -3540,7 -3798,9 +3796,9 @@@ int arch_enable_uv_irq(char *irq_name, 
         unsigned long flags;
         int err;
   
-       err = assign_irq_vector(irq, *eligible_cpu);
+       cfg = irq_cfg(irq);
+ 
+       err = assign_irq_vector(irq, cfg, *eligible_cpu);
         if (err != 0)
                 return err;
   
@@@ -3549,8 -3809,6 +3807,6 @@@
                                       irq_name);
         spin_unlock_irqrestore(&vector_lock, flags);
   
-       cfg = irq_cfg(irq);
- 
         mmr_value = 0;
         entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
         BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@@ -3602,9 -3860,16 +3858,16 @@@ int __init io_apic_get_redir_entries (i
         return reg_01.bits.entries;
   }
   
- int __init probe_nr_irqs(void)
+ void __init probe_nr_irqs_gsi(void)
   {
-       return NR_IRQS;
+       int idx;
+       int nr = 0;
+ 
+       for (idx = 0; idx < nr_ioapics; idx++)
+               nr += io_apic_get_redir_entries(idx) + 1;
+ 
+       if (nr > nr_irqs_gsi)
+               nr_irqs_gsi = nr;
   }
   
   /* --------------------------------------------------------------------------
@@@ -3703,19 -3968,31 +3966,31 @@@ int __init io_apic_get_version(int ioap
   
   int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
   {
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
+ 
         if (!IO_APIC_IRQ(irq)) {
                 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
                         ioapic);
                 return -EINVAL;
         }
   
+       desc = irq_to_desc_alloc_cpu(irq, cpu);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc %d\n", irq);
+               return 0;
+       }
+ 
         /*
          * IRQs < 16 are already in the irq_2_pin[] map
          */
-       if (irq >= 16)
-               add_pin_to_irq(irq, ioapic, pin);
+       if (irq >= NR_IRQS_LEGACY) {
+               cfg = desc->chip_data;
+               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+       }
   
-       setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
   
         return 0;
   }
@@@ -3769,9 -4046,10 +4044,10 @@@ void __init setup_ioapic_dest(void
                          * when you have too many devices, because at that time only boot
                          * cpu is online.
                          */
-                       cfg = irq_cfg(irq);
+                       desc = irq_to_desc(irq);
+                       cfg = desc->chip_data;
                         if (!cfg->vector) {
-                               setup_IO_APIC_irq(ioapic, pin, irq,
+                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
                                                   irq_trigger(irq_entry),
                                                   irq_polarity(irq_entry));
                                 continue;
@@@ -3781,7 -4059,6 +4057,6 @@@
                         /*
                          * Honour affinities which have been set in early boot
                          */
-                       desc = irq_to_desc(irq);
                         if (desc->status &
                             (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
                                 mask = desc->affinity;
@@@ -3790,10 -4067,10 +4065,10 @@@
   
   #ifdef CONFIG_INTR_REMAP
                         if (intr_remapping_enabled)
-                               set_ir_ioapic_affinity_irq(irq, &mask);
+                               set_ir_ioapic_affinity_irq_desc(desc, mask);
                         else
   #endif
-                               set_ioapic_affinity_irq(irq, &mask);
+                               set_ioapic_affinity_irq_desc(desc, mask);
                 }
   
         }
@@@ -3842,7 -4119,6 +4117,6 @@@ void __init ioapic_init_mappings(void
         struct resource *ioapic_res;
         int i;
   
-       irq_2_pin_init();
         ioapic_res = ioapic_setup_resources();
         for (i = 0; i < nr_ioapics; i++) {
                 if (smp_found_config) {
diff --combined arch/x86/kernel/irq_32.c

index 87870a49be4e19a26e2201a56533ab7fb7fdfaae,119fc9c8ff7f29135b0e353cc31722cd6abe452f..9cf9cbbf7a028b74bf4bec2c0f16a8e9290dc926
--- 1/arch/x86/kernel/irq_32.c
--- 2/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@@ -242,6 -242,8 +242,8 @@@ void fixup_irqs(cpumask_t map
         for_each_irq_desc(irq, desc) {
                 cpumask_t mask;
   
+               if (!desc)
+                       continue;
                 if (irq == 2)
                         continue;
   
@@@ -251,7 -253,7 +253,7 @@@
                         mask = map;
                 }
                 if (desc->chip->set_affinity)
- -                      desc->chip->set_affinity(irq, mask);
+ +                      desc->chip->set_affinity(irq, &mask);
                 else if (desc->action && !(warned++))
                         printk("Cannot set affinity for irq %i\n", irq);
         }
diff --combined arch/x86/kernel/irq_64.c

index 8cbd069e5b41f6412aad93b0dea2409bd5934448,a174a217eb1aefc25c971c2d6155e483d2d50192..54c69d47a771d0cf969ab6d135579a72121acb0c
--- 1/arch/x86/kernel/irq_64.c
--- 2/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@@ -91,6 -91,8 +91,8 @@@ void fixup_irqs(cpumask_t map
                 int break_affinity = 0;
                 int set_affinity = 1;
   
+               if (!desc)
+                       continue;
                 if (irq == 2)
                         continue;
   
@@@ -113,7 -115,7 +115,7 @@@
                         desc->chip->mask(irq);
   
                 if (desc->chip->set_affinity)
- -                      desc->chip->set_affinity(irq, mask);
+ +                      desc->chip->set_affinity(irq, &mask);
                 else if (!(warned++))
                         set_affinity = 0;
   
diff --combined drivers/xen/events.c

index eba5ec5b020e1801b5f8a97c3b144cf9583f9eba,46625cd38743c2506e1fc024c8e1c4d434a4fa26..add640ff5c6c873c6b43383b096ac947c6eab02f
--- 1/drivers/xen/events.c
--- 2/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@@ -141,8 -141,12 +141,12 @@@ static void init_evtchn_cpu_bindings(vo
         int i;
   
         /* By default all event channels notify CPU#0. */
-       for_each_irq_desc(i, desc)
+       for_each_irq_desc(i, desc) {
+               if (!desc)
+                       continue;
+ 
                 desc->affinity = cpumask_of_cpu(0);
+       }
   #endif
   
         memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@@ -229,15 -233,20 +233,20 @@@ static void unmask_evtchn(int port
   static int find_unbound_irq(void)
   {
         int irq;
+       struct irq_desc *desc;
   
         /* Only allocate from dynirq range */
-       for_each_irq_nr(irq)
+       for (irq = 0; irq < nr_irqs; irq++)
                 if (irq_bindcount[irq] == 0)
                         break;
   
         if (irq == nr_irqs)
                 panic("No available IRQ to bind to: increase nr_irqs!\n");
   
+       desc = irq_to_desc_alloc_cpu(irq, 0);
+       if (WARN_ON(desc == NULL))
+               return -1;
+ 
         return irq;
   }
   
@@@ -579,7 -588,7 +588,7 @@@ void rebind_evtchn_irq(int evtchn, int 
         spin_unlock(&irq_mapping_update_lock);
   
         /* new event channels are always bound to cpu 0 */
- -      irq_set_affinity(irq, cpumask_of_cpu(0));
+ +      irq_set_affinity(irq, cpumask_of(0));
   
         /* Unmask the event channel. */
         enable_irq(irq);
@@@ -608,9 -617,9 +617,9 @@@ static void rebind_irq_to_cpu(unsigned 
   }
   
   
- -static void set_affinity_irq(unsigned irq, cpumask_t dest)
+ +static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
   {
- -      unsigned tcpu = first_cpu(dest);
+ +      unsigned tcpu = cpumask_first(dest);
         rebind_irq_to_cpu(irq, tcpu);
   }
   
@@@ -792,7 -801,7 +801,7 @@@ void xen_irq_resume(void
                 mask_evtchn(evtchn);
   
         /* No IRQ <-> event-channel mappings. */
-       for_each_irq_nr(irq)
+       for (irq = 0; irq < nr_irqs; irq++)
                 irq_info[irq].evtchn = 0; /* zap event-channel binding */
   
         for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@@ -824,7 -833,7 +833,7 @@@ void __init xen_init_IRQ(void
                 mask_evtchn(i);
   
         /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-       for_each_irq_nr(i)
+       for (i = 0; i < nr_irqs; i++)
                 irq_bindcount[i] = 0;
   
         irq_ctx_init(smp_processor_id());
diff --combined include/linux/interrupt.h

index 48e63934fabeb01e4480c92cb829dc1a9e2ce9df,be3c484b5242555082763718f4818eacdb37f020..dfaee6bd265bc4ac375179636cd6d83b60a1d5eb
--- 1/include/linux/interrupt.h
--- 2/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@@ -14,6 -14,8 +14,8 @@@
   #include <linux/irqflags.h>
   #include <linux/smp.h>
   #include <linux/percpu.h>
+ #include <linux/irqnr.h>
+ 
   #include <asm/atomic.h>
   #include <asm/ptrace.h>
   #include <asm/system.h>
@@@ -109,13 -111,13 +111,13 @@@ extern void enable_irq(unsigned int irq
   
   extern cpumask_t irq_default_affinity;
   
- -extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
+ +extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
   extern int irq_can_set_affinity(unsigned int irq);
   extern int irq_select_affinity(unsigned int irq);
   
   #else /* CONFIG_SMP */
   
- -static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+ +static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
   {
         return -EINVAL;
   }
@@@ -251,9 -253,6 +253,6 @@@ enu
         BLOCK_SOFTIRQ,
         TASKLET_SOFTIRQ,
         SCHED_SOFTIRQ,
- #ifdef CONFIG_HIGH_RES_TIMERS
-       HRTIMER_SOFTIRQ,
- #endif
         RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */
   
         NR_SOFTIRQS
diff --combined include/linux/irq.h

index ab70fd604d3a71e93110f2577b2dcdd851e5568b,98564dc6447627f0d6c25033e3ba601db819daf1..5845bdc1ac0940fe43d1b66d435589d9967558ad
--- 1/include/linux/irq.h
--- 2/include/linux/irq.h
+++ b/include/linux/irq.h
@@@ -113,8 -113,7 +113,8 @@@ struct irq_chip 
         void            (*eoi)(unsigned int irq);
   
         void            (*end)(unsigned int irq);
- -      void            (*set_affinity)(unsigned int irq, cpumask_t dest);
+ +      void            (*set_affinity)(unsigned int irq,
+ +                                      const struct cpumask *dest);
         int             (*retrigger)(unsigned int irq);
         int             (*set_type)(unsigned int irq, unsigned int flow_type);
         int             (*set_wake)(unsigned int irq, unsigned int on);
@@@ -130,9 -129,14 +130,14 @@@
         const char      *typename;
   };
   
+ struct timer_rand_state;
+ struct irq_2_iommu;
   /**
    * struct irq_desc - interrupt descriptor
    * @irq:              interrupt number for this descriptor
+  * @timer_rand_state: pointer to timer rand state struct
+  * @kstat_irqs:               irq stats per cpu
+  * @irq_2_iommu:      iommu with this irq
    * @handle_irq:               highlevel irq-events handler [if NULL, __do_IRQ()]
    * @chip:             low level interrupt hardware access
    * @msi_desc:         MSI descriptor
@@@ -144,8 -148,8 +149,8 @@@
    * @depth:            disable-depth, for nested irq_disable() calls
    * @wake_depth:               enable depth, for multiple set_irq_wake() callers
    * @irq_count:                stats field to detect stalled irqs
-  * @irqs_unhandled:   stats field for spurious unhandled interrupts
    * @last_unhandled:   aging timer for unhandled count
+  * @irqs_unhandled:   stats field for spurious unhandled interrupts
    * @lock:             locking for SMP
    * @affinity:         IRQ affinity on SMP
    * @cpu:              cpu index useful for balancing
@@@ -155,6 -159,13 +160,13 @@@
    */
   struct irq_desc {
         unsigned int            irq;
+ #ifdef CONFIG_SPARSE_IRQ
+       struct timer_rand_state *timer_rand_state;
+       unsigned int            *kstat_irqs;
+ # ifdef CONFIG_INTR_REMAP
+       struct irq_2_iommu      *irq_2_iommu;
+ # endif
+ #endif
         irq_flow_handler_t      handle_irq;
         struct irq_chip         *chip;
         struct msi_desc         *msi_desc;
@@@ -166,8 -177,8 +178,8 @@@
         unsigned int            depth;          /* nested irq disables */
         unsigned int            wake_depth;     /* nested wake enables */
         unsigned int            irq_count;      /* For detecting broken IRQs */
-       unsigned int            irqs_unhandled;
         unsigned long           last_unhandled; /* Aging timer for unhandled count */
+       unsigned int            irqs_unhandled;
         spinlock_t              lock;
   #ifdef CONFIG_SMP
         cpumask_t               affinity;
@@@ -182,12 -193,51 +194,51 @@@
         const char              *name;
   } ____cacheline_internodealigned_in_smp;
   
+ extern void early_irq_init(void);
+ extern void arch_early_irq_init(void);
+ extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
+ extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
+                                       struct irq_desc *desc, int cpu);
+ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
   
+ #ifndef CONFIG_SPARSE_IRQ
   extern struct irq_desc irq_desc[NR_IRQS];
   
   static inline struct irq_desc *irq_to_desc(unsigned int irq)
   {
-       return (irq < nr_irqs) ? irq_desc + irq : NULL;
+       return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+ }
+ static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+ {
+       return irq_to_desc(irq);
+ }
+ 
+ #else
+ 
+ extern struct irq_desc *irq_to_desc(unsigned int irq);
+ extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
+ 
+ # define for_each_irq_desc(irq, desc)         \
+       for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
+ # define for_each_irq_desc_reverse(irq, desc)                          \
+       for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
+ 
+ #define kstat_irqs_this_cpu(DESC) \
+       ((DESC)->kstat_irqs[smp_processor_id()])
+ #define kstat_incr_irqs_this_cpu(irqno, DESC) \
+       ((DESC)->kstat_irqs[smp_processor_id()]++)
+ 
+ #endif
+ 
+ static inline struct irq_desc *
+ irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
+ {
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+       return irq_to_desc(irq);
+ #else
+       return desc;
+ #endif
   }
   
   /*
@@@ -381,6 -431,11 +432,11 @@@ extern int set_irq_msi(unsigned int irq
   #define get_irq_data(irq)     (irq_to_desc(irq)->handler_data)
   #define get_irq_msi(irq)      (irq_to_desc(irq)->msi_desc)
   
+ #define get_irq_desc_chip(desc)               ((desc)->chip)
+ #define get_irq_desc_chip_data(desc)  ((desc)->chip_data)
+ #define get_irq_desc_data(desc)               ((desc)->handler_data)
+ #define get_irq_desc_msi(desc)                ((desc)->msi_desc)
+ 
   #endif /* CONFIG_GENERIC_HARDIRQS */
   
   #endif /* !CONFIG_S390 */
diff --combined init/Kconfig

index 8e9904fc3024c623f23db90703e402cb031db523,13627191a60d194de08aaa4b410aa752cfd7cb21..f6281711166d5dbeba4f55121547396d00b6ff2e
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -924,15 -924,6 +924,15 @@@ config KMO
   
   endif # MODULES
   
+ +config INIT_ALL_POSSIBLE
+ +      bool
+ +      help
+ +        Back when each arch used to define their own cpu_online_map and
+ +        cpu_possible_map, some of them chose to initialize cpu_possible_map
+ +        with all 1s, and others with all 0s.  When they were centralised,
+ +        it was better to provide this option than to break all the archs
+ +        and have several arch maintainers persuing me down dark alleys.
+ +
   config STOP_MACHINE
         bool
         default y
@@@ -945,10 -936,90 +945,90 @@@ source "block/Kconfig
   config PREEMPT_NOTIFIERS
         bool
   
+ choice
+       prompt "RCU Implementation"
+       default CLASSIC_RCU
+ 
   config CLASSIC_RCU
-       def_bool !PREEMPT_RCU
+       bool "Classic RCU"
         help
           This option selects the classic RCU implementation that is
           designed for best read-side performance on non-realtime
-         systems.  Classic RCU is the default.  Note that the
-         PREEMPT_RCU symbol is used to select/deselect this option.
+         systems.
+ 
+         Select this option if you are unsure.
+ 
+ config TREE_RCU
+       bool "Tree-based hierarchical RCU"
+       help
+         This option selects the RCU implementation that is
+         designed for very large SMP system with hundreds or
+         thousands of CPUs.
+ 
+ config PREEMPT_RCU
+       bool "Preemptible RCU"
+       depends on PREEMPT
+       help
+         This option reduces the latency of the kernel by making certain
+         RCU sections preemptible. Normally RCU code is non-preemptible, if
+         this option is selected then read-only RCU sections become
+         preemptible. This helps latency, but may expose bugs due to
+         now-naive assumptions about each RCU read-side critical section
+         remaining on a given CPU through its execution.
+ 
+ endchoice
+ 
+ config RCU_TRACE
+       bool "Enable tracing for RCU"
+       depends on TREE_RCU || PREEMPT_RCU
+       help
+         This option provides tracing in RCU which presents stats
+         in debugfs for debugging RCU implementation.
+ 
+         Say Y here if you want to enable RCU tracing
+         Say N if you are unsure.
+ 
+ config RCU_FANOUT
+       int "Tree-based hierarchical RCU fanout value"
+       range 2 64 if 64BIT
+       range 2 32 if !64BIT
+       depends on TREE_RCU
+       default 64 if 64BIT
+       default 32 if !64BIT
+       help
+         This option controls the fanout of hierarchical implementations
+         of RCU, allowing RCU to work efficiently on machines with
+         large numbers of CPUs.  This value must be at least the cube
+         root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+         systems and up to 262,144 for 64-bit systems.
+ 
+         Select a specific number if testing RCU itself.
+         Take the default if unsure.
+ 
+ config RCU_FANOUT_EXACT
+       bool "Disable tree-based hierarchical RCU auto-balancing"
+       depends on TREE_RCU
+       default n
+       help
+         This option forces use of the exact RCU_FANOUT value specified,
+         regardless of imbalances in the hierarchy.  This is useful for
+         testing RCU itself, and might one day be useful on systems with
+         strong NUMA behavior.
+ 
+         Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+ 
+         Say N if unsure.
+ 
+ config TREE_RCU_TRACE
+       def_bool RCU_TRACE && TREE_RCU
+       select DEBUG_FS
+       help
+         This option provides tracing for the TREE_RCU implementation,
+         permitting Makefile to trivially select kernel/rcutree_trace.c.
+ 
+ config PREEMPT_RCU_TRACE
+       def_bool RCU_TRACE && PREEMPT_RCU
+       select DEBUG_FS
+       help
+         This option provides tracing for the PREEMPT_RCU implementation,
+         permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --combined kernel/irq/chip.c

index 58d8e31daa49d295e5063439aad46a5786d21518,6eb3c7952b6496fc9c5f8da49b982d79866e1504..f63c706d25e15f481f61548dd248d1eaf69702bb
--- 1/kernel/irq/chip.c
--- 2/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@@ -24,9 -24,10 +24,10 @@@
    */
   void dynamic_irq_init(unsigned int irq)
   {
-       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_desc *desc;
         unsigned long flags;
   
+       desc = irq_to_desc(irq);
         if (!desc) {
                 WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
                 return;
@@@ -45,7 -46,7 +46,7 @@@
         desc->irq_count = 0;
         desc->irqs_unhandled = 0;
   #ifdef CONFIG_SMP
- -      cpus_setall(desc->affinity);
+ +      cpumask_setall(&desc->affinity);
   #endif
         spin_unlock_irqrestore(&desc->lock, flags);
   }
@@@ -124,6 -125,7 +125,7 @@@ int set_irq_type(unsigned int irq, unsi
                 return -ENODEV;
         }
   
+       type &= IRQ_TYPE_SENSE_MASK;
         if (type == IRQ_TYPE_NONE)
                 return 0;
   
@@@ -352,6 -354,7 +354,7 @@@ handle_level_irq(unsigned int irq, stru
   
         spin_lock(&desc->lock);
         mask_ack_irq(desc, irq);
+       desc = irq_remap_to_desc(irq, desc);
   
         if (unlikely(desc->status & IRQ_INPROGRESS))
                 goto out_unlock;
@@@ -429,6 -432,7 +432,7 @@@ handle_fasteoi_irq(unsigned int irq, st
         desc->status &= ~IRQ_INPROGRESS;
   out:
         desc->chip->eoi(irq);
+       desc = irq_remap_to_desc(irq, desc);
   
         spin_unlock(&desc->lock);
   }
@@@ -465,12 -469,14 +469,14 @@@ handle_edge_irq(unsigned int irq, struc
                     !desc->action)) {
                 desc->status |= (IRQ_PENDING | IRQ_MASKED);
                 mask_ack_irq(desc, irq);
+               desc = irq_remap_to_desc(irq, desc);
                 goto out_unlock;
         }
         kstat_incr_irqs_this_cpu(irq, desc);
   
         /* Start handling the irq */
         desc->chip->ack(irq);
+       desc = irq_remap_to_desc(irq, desc);
   
         /* Mark the IRQ currently in progress.*/
         desc->status |= IRQ_INPROGRESS;
@@@ -531,8 -537,10 +537,10 @@@ handle_percpu_irq(unsigned int irq, str
         if (!noirqdebug)
                 note_interrupt(irq, desc, action_ret);
   
-       if (desc->chip->eoi)
+       if (desc->chip->eoi) {
                 desc->chip->eoi(irq);
+               desc = irq_remap_to_desc(irq, desc);
+       }
   }
   
   void
@@@ -567,8 -575,10 +575,10 @@@ __set_irq_handler(unsigned int irq, irq
   
         /* Uninstall? */
         if (handle == handle_bad_irq) {
-               if (desc->chip != &no_irq_chip)
+               if (desc->chip != &no_irq_chip) {
                         mask_ack_irq(desc, irq);
+                       desc = irq_remap_to_desc(irq, desc);
+               }
                 desc->status |= IRQ_DISABLED;
                 desc->depth = 1;
         }
diff --combined kernel/irq/manage.c

index 10ad2f87ed9a0cfe6d20844f05231b7f0fd0506a,540f6c49f3fa156b2bd0d61ad2c5090ea0e46013..61c4a9b6216546aac546b4a6ab1427ed4ac2a00a
--- 1/kernel/irq/manage.c
--- 2/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@@ -79,7 -79,7 +79,7 @@@ int irq_can_set_affinity(unsigned int i
    *    @cpumask:       cpumask
    *
    */
- -int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+ +int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
   {
         struct irq_desc *desc = irq_to_desc(irq);
         unsigned long flags;
@@@ -91,14 -91,14 +91,14 @@@
   
   #ifdef CONFIG_GENERIC_PENDING_IRQ
         if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
- -              desc->affinity = cpumask;
+ +              cpumask_copy(&desc->affinity, cpumask);
                 desc->chip->set_affinity(irq, cpumask);
         } else {
                 desc->status |= IRQ_MOVE_PENDING;
- -              desc->pending_mask = cpumask;
+ +              cpumask_copy(&desc->pending_mask, cpumask);
         }
   #else
- -      desc->affinity = cpumask;
+ +      cpumask_copy(&desc->affinity, cpumask);
         desc->chip->set_affinity(irq, cpumask);
   #endif
         desc->status |= IRQ_AFFINITY_SET;
@@@ -112,24 -112,26 +112,24 @@@
    */
   int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
   {
- -      cpumask_t mask;
- -
         if (!irq_can_set_affinity(irq))
                 return 0;
   
- -      cpus_and(mask, cpu_online_map, irq_default_affinity);
- -
         /*
          * Preserve an userspace affinity setup, but make sure that
          * one of the targets is online.
          */
         if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
- -              if (cpus_intersects(desc->affinity, cpu_online_map))
- -                      mask = desc->affinity;
+ +              if (cpumask_any_and(&desc->affinity, cpu_online_mask)
+ +                  < nr_cpu_ids)
+ +                      goto set_affinity;
                 else
                         desc->status &= ~IRQ_AFFINITY_SET;
         }
   
- -      desc->affinity = mask;
- -      desc->chip->set_affinity(irq, mask);
+ +      cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
+ +set_affinity:
+ +      desc->chip->set_affinity(irq, &desc->affinity);
   
         return 0;
   }
@@@ -368,16 -370,18 +368,18 @@@ int __irq_set_trigger(struct irq_desc *
                 return 0;
         }
   
-       ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK);
+       /* caller masked out all except trigger mode flags */
+       ret = chip->set_type(irq, flags);
   
         if (ret)
                 pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
-                               (int)(flags & IRQF_TRIGGER_MASK),
-                               irq, chip->set_type);
+                               (int)flags, irq, chip->set_type);
         else {
+               if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
+                       flags |= IRQ_LEVEL;
                 /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
-               desc->status &= ~IRQ_TYPE_SENSE_MASK;
-               desc->status |= flags & IRQ_TYPE_SENSE_MASK;
+               desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);
+               desc->status |= flags;
         }
   
         return ret;
@@@ -457,7 -461,8 +459,8 @@@ __setup_irq(unsigned int irq, struct ir
   
                 /* Setup the type (level, edge polarity) if configured: */
                 if (new->flags & IRQF_TRIGGER_MASK) {
-                       ret = __irq_set_trigger(desc, irq, new->flags);
+                       ret = __irq_set_trigger(desc, irq,
+                                       new->flags & IRQF_TRIGGER_MASK);
   
                         if (ret) {
                                 spin_unlock_irqrestore(&desc->lock, flags);
@@@ -671,6 -676,18 +674,18 @@@ int request_irq(unsigned int irq, irq_h
         struct irq_desc *desc;
         int retval;
   
+       /*
+        * handle_IRQ_event() always ignores IRQF_DISABLED except for
+        * the _first_ irqaction (sigh).  That can cause oopsing, but
+        * the behavior is classified as "will not fix" so we need to
+        * start nudging drivers away from using that idiom.
+        */
+       if ((irqflags & (IRQF_SHARED|IRQF_DISABLED))
+                       == (IRQF_SHARED|IRQF_DISABLED))
+               pr_warning("IRQ %d/%s: IRQF_DISABLED is not "
+                               "guaranteed on shared IRQs\n",
+                               irq, devname);
+ 
   #ifdef CONFIG_LOCKDEP
         /*
          * Lockdep wants atomic interrupt handlers:
diff --combined kernel/irq/proc.c

index 8e91c9762520de79b38d7971dd9c87752f456e39,f6b3440f05bc50dbe68cdb7026e4e287e227b934..d2c0e5ee53c573019f45942e4d63bb362db42551
--- 1/kernel/irq/proc.c
--- 2/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@@ -40,42 -40,33 +40,42 @@@ static ssize_t irq_affinity_proc_write(
                 const char __user *buffer, size_t count, loff_t *pos)
   {
         unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
- -      cpumask_t new_value;
+ +      cpumask_var_t new_value;
         int err;
   
         if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
             irq_balancing_disabled(irq))
                 return -EIO;
   
+ +      if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+ +              return -ENOMEM;
+ +
         err = cpumask_parse_user(buffer, count, new_value);
         if (err)
- -              return err;
+ +              goto free_cpumask;
   
- -      if (!is_affinity_mask_valid(new_value))
- -              return -EINVAL;
+ +      if (!is_affinity_mask_valid(*new_value)) {
+ +              err = -EINVAL;
+ +              goto free_cpumask;
+ +      }
   
         /*
          * Do not allow disabling IRQs completely - it's a too easy
          * way to make the system unusable accidentally :-) At least
          * one online CPU still has to be targeted.
          */
- -      if (!cpus_intersects(new_value, cpu_online_map))
+ +      if (!cpumask_intersects(new_value, cpu_online_mask)) {
                 /* Special case for empty set - allow the architecture
                    code to set default SMP affinity. */
- -              return irq_select_affinity_usr(irq) ? -EINVAL : count;
- -
- -      irq_set_affinity(irq, new_value);
- -
- -      return count;
+ +              err = irq_select_affinity_usr(irq) ? -EINVAL : count;
+ +      } else {
+ +              irq_set_affinity(irq, new_value);
+ +              err = count;
+ +      }
+ +
+ +free_cpumask:
+ +      free_cpumask_var(new_value);
+ +      return err;
   }
   
   static int irq_affinity_proc_open(struct inode *inode, struct file *file)
@@@ -104,7 -95,7 +104,7 @@@ static ssize_t default_affinity_write(s
         cpumask_t new_value;
         int err;
   
- -      err = cpumask_parse_user(buffer, count, new_value);
+ +      err = cpumask_parse_user(buffer, count, &new_value);
         if (err)
                 return err;
   
@@@ -252,7 -243,11 +252,11 @@@ void init_irq_proc(void
         /*
          * Create entries for all existing IRQs.
          */
-       for_each_irq_desc(irq, desc)
+       for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
+ 
                 register_irq_proc(irq, desc);
+       }
   }
   
diff --combined kernel/sched.c

index bdd180a0c6be0da437e048e9d0154eb903729863,fff1c4a20b6538966a0cf2b97a012c045d52b84d..f2095660efec72966823de6b66c6147367b8327b
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -209,7 -209,6 +209,6 @@@ void init_rt_bandwidth(struct rt_bandwi
         hrtimer_init(&rt_b->rt_period_timer,
                         CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         rt_b->rt_period_timer.function = sched_rt_period_timer;
-       rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
   }
   
   static inline int rt_bandwidth_enabled(void)
@@@ -1139,7 -1138,6 +1138,6 @@@ static void init_rq_hrtick(struct rq *r
   
         hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         rq->hrtick_timer.function = hrtick;
-       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
   }
   #else /* CONFIG_SCHED_HRTICK */
   static inline void hrtick_clear(struct rq *rq)
@@@ -4192,7 -4190,6 +4190,6 @@@ void account_steal_time(struct task_str
   
         if (p == rq->idle) {
                 p->stime = cputime_add(p->stime, steal);
-               account_group_system_time(p, steal);
                 if (atomic_read(&rq->nr_iowait) > 0)
                         cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                 else
@@@ -4328,7 -4325,7 +4325,7 @@@ void __kprobes sub_preempt_count(int va
         /*
          * Underflow?
          */
-       if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+        if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
                 return;
         /*
          * Is the spinlock portion underflowing?
@@@ -6647,7 -6644,7 +6644,7 @@@ static int sched_domain_debug_one(struc
         struct sched_group *group = sd->groups;
         char str[256];
   
- -      cpulist_scnprintf(str, sizeof(str), sd->span);
+ +      cpulist_scnprintf(str, sizeof(str), &sd->span);
         cpus_clear(*groupmask);
   
         printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@@ -6700,7 -6697,7 +6697,7 @@@
   
                 cpus_or(*groupmask, *groupmask, group->cpumask);
   
- -              cpulist_scnprintf(str, sizeof(str), group->cpumask);
+ +              cpulist_scnprintf(str, sizeof(str), &group->cpumask);
                 printk(KERN_CONT " %s", str);
   
                 group = group->next;
@@@ -7101,7 -7098,7 +7098,7 @@@ cpu_to_phys_group(int cpu, const cpumas
   {
         int group;
   #ifdef CONFIG_SCHED_MC
- -      *mask = cpu_coregroup_map(cpu);
+ +      *mask = *cpu_coregroup_mask(cpu);
         cpus_and(*mask, *mask, *cpu_map);
         group = first_cpu(*mask);
   #elif defined(CONFIG_SCHED_SMT)
@@@ -7474,7 -7471,7 +7471,7 @@@ static int __build_sched_domains(const 
                 sd = &per_cpu(core_domains, i);
                 SD_INIT(sd, MC);
                 set_domain_attribute(sd, attr);
- -              sd->span = cpu_coregroup_map(i);
+ +              sd->span = *cpu_coregroup_mask(i);
                 cpus_and(sd->span, sd->span, *cpu_map);
                 sd->parent = p;
                 p->child = sd;
@@@ -7517,7 -7514,7 +7514,7 @@@
                 SCHED_CPUMASK_VAR(this_core_map, allmasks);
                 SCHED_CPUMASK_VAR(send_covered, allmasks);
   
- -              *this_core_map = cpu_coregroup_map(i);
+ +              *this_core_map = *cpu_coregroup_mask(i);
                 cpus_and(*this_core_map, *this_core_map, *cpu_map);
                 if (i != first_cpu(*this_core_map))
                         continue;
diff --combined kernel/trace/trace.c

index c8760ec0e4631c044305643226003fb6d90d1314,4185d5221633f19755efb818a1fb969a69d0d29c..0e91f43b6baf53cac177af4ca02f0939997c1a0c
--- 1/kernel/trace/trace.c
--- 2/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -30,7 -30,6 +30,6 @@@
   #include <linux/gfp.h>
   #include <linux/fs.h>
   #include <linux/kprobes.h>
- #include <linux/seq_file.h>
   #include <linux/writeback.h>
   
   #include <linux/stacktrace.h>
@@@ -1310,7 -1309,7 +1309,7 @@@ enum trace_file_type 
         TRACE_FILE_ANNOTATE     = 2,
   };
   
- static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+ static void trace_iterator_increment(struct trace_iterator *iter)
   {
         /* Don't allow ftrace to trace into the ring buffers */
         ftrace_disable_cpu();
@@@ -1389,7 -1388,7 +1388,7 @@@ static void *find_next_entry_inc(struc
         iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
   
         if (iter->ent)
-               trace_iterator_increment(iter, iter->cpu);
+               trace_iterator_increment(iter);
   
         return iter->ent ? iter : NULL;
   }
@@@ -2675,7 -2674,7 +2674,7 @@@ tracing_cpumask_read(struct file *filp
   
         mutex_lock(&tracing_cpumask_update_lock);
   
- -      len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+ +      len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
         if (count - len < 2) {
                 count = -EINVAL;
                 goto out_err;
@@@ -2696,7 -2695,7 +2695,7 @@@ tracing_cpumask_write(struct file *filp
         int err, cpu;
   
         mutex_lock(&tracing_cpumask_update_lock);
- -      err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+ +      err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
         if (err)
                 goto err_unlock;
   
diff --combined mm/slub.c

index 8e516e29f98920f65e8da441e5d2cc51e4cf4f97,6cb7ad10785227f2b889bcff96d6610ddeabe37a..0d861c3154b6eeed56981c33b3505ecca87d5712
--- 1/mm/slub.c
--- 2/mm/slub.c
+++ b/mm/slub.c
@@@ -24,6 -24,7 +24,7 @@@
   #include <linux/kallsyms.h>
   #include <linux/memory.h>
   #include <linux/math64.h>
+ #include <linux/fault-inject.h>
   
   /*
    * Lock order:
@@@ -153,6 -154,10 +154,10 @@@
   #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
   #endif
   
+ #define OO_SHIFT      16
+ #define OO_MASK               ((1 << OO_SHIFT) - 1)
+ #define MAX_OBJS_PER_PAGE     65535 /* since page.objects is u16 */
+ 
   /* Internal SLUB flags */
   #define __OBJECT_POISON               0x80000000 /* Poison object */
   #define __SYSFS_ADD_DEFERRED  0x40000000 /* Not yet visible via sysfs */
@@@ -178,7 -183,7 +183,7 @@@ static LIST_HEAD(slab_caches)
    * Tracking user of a slab.
    */
   struct track {
-       void *addr;             /* Called from address */
+       unsigned long addr;     /* Called from address */
         int cpu;                /* Was running on cpu */
         int pid;                /* Pid context */
         unsigned long when;     /* When did the operation occur */
@@@ -290,7 -295,7 +295,7 @@@ static inline struct kmem_cache_order_o
                                                 unsigned long size)
   {
         struct kmem_cache_order_objects x = {
-               (order << 16) + (PAGE_SIZE << order) / size
+               (order << OO_SHIFT) + (PAGE_SIZE << order) / size
         };
   
         return x;
@@@ -298,12 -303,12 +303,12 @@@
   
   static inline int oo_order(struct kmem_cache_order_objects x)
   {
-       return x.x >> 16;
+       return x.x >> OO_SHIFT;
   }
   
   static inline int oo_objects(struct kmem_cache_order_objects x)
   {
-       return x.x & ((1 << 16) - 1);
+       return x.x & OO_MASK;
   }
   
   #ifdef CONFIG_SLUB_DEBUG
@@@ -367,7 -372,7 +372,7 @@@ static struct track *get_track(struct k
   }
   
   static void set_track(struct kmem_cache *s, void *object,
-                               enum track_item alloc, void *addr)
+                       enum track_item alloc, unsigned long addr)
   {
         struct track *p;
   
@@@ -391,8 -396,8 +396,8 @@@ static void init_tracking(struct kmem_c
         if (!(s->flags & SLAB_STORE_USER))
                 return;
   
-       set_track(s, object, TRACK_FREE, NULL);
-       set_track(s, object, TRACK_ALLOC, NULL);
+       set_track(s, object, TRACK_FREE, 0UL);
+       set_track(s, object, TRACK_ALLOC, 0UL);
   }
   
   static void print_track(const char *s, struct track *t)
@@@ -401,7 -406,7 +406,7 @@@
                 return;
   
         printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
-               s, t->addr, jiffies - t->when, t->cpu, t->pid);
+               s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
   }
   
   static void print_tracking(struct kmem_cache *s, void *object)
@@@ -692,7 -697,7 +697,7 @@@ static int check_object(struct kmem_cac
         if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
                 object_err(s, page, p, "Freepointer corrupt");
                 /*
-                * No choice but to zap it and thus loose the remainder
+                * No choice but to zap it and thus lose the remainder
                  * of the free objects in this slab. May cause
                  * another error because the object count is now wrong.
                  */
@@@ -764,8 -769,8 +769,8 @@@ static int on_freelist(struct kmem_cach
         }
   
         max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
-       if (max_objects > 65535)
-               max_objects = 65535;
+       if (max_objects > MAX_OBJS_PER_PAGE)
+               max_objects = MAX_OBJS_PER_PAGE;
   
         if (page->objects != max_objects) {
                 slab_err(s, page, "Wrong number of objects. Found %d but "
@@@ -866,7 -871,7 +871,7 @@@ static void setup_object_debug(struct k
   }
   
   static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
-                                               void *object, void *addr)
+                                       void *object, unsigned long addr)
   {
         if (!check_slab(s, page))
                 goto bad;
@@@ -906,7 -911,7 +911,7 @@@ bad
   }
   
   static int free_debug_processing(struct kmem_cache *s, struct page *page,
-                                               void *object, void *addr)
+                                       void *object, unsigned long addr)
   {
         if (!check_slab(s, page))
                 goto fail;
@@@ -1029,10 -1034,10 +1034,10 @@@ static inline void setup_object_debug(s
                         struct page *page, void *object) {}
   
   static inline int alloc_debug_processing(struct kmem_cache *s,
-       struct page *page, void *object, void *addr) { return 0; }
+       struct page *page, void *object, unsigned long addr) { return 0; }
   
   static inline int free_debug_processing(struct kmem_cache *s,
-       struct page *page, void *object, void *addr) { return 0; }
+       struct page *page, void *object, unsigned long addr) { return 0; }
   
   static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
                         { return 1; }
@@@ -1499,8 -1504,8 +1504,8 @@@ static inline int node_match(struct kme
    * we need to allocate a new slab. This is the slowest path since it involves
    * a call to the page allocator and the setup of a new slab.
    */
- static void *__slab_alloc(struct kmem_cache *s,
-               gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
+ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+                         unsigned long addr, struct kmem_cache_cpu *c)
   {
         void **object;
         struct page *new;
@@@ -1584,13 -1589,18 +1589,18 @@@ debug
    * Otherwise we can simply pick the next object from the lockless free list.
    */
   static __always_inline void *slab_alloc(struct kmem_cache *s,
-               gfp_t gfpflags, int node, void *addr)
+               gfp_t gfpflags, int node, unsigned long addr)
   {
         void **object;
         struct kmem_cache_cpu *c;
         unsigned long flags;
         unsigned int objsize;
   
+       might_sleep_if(gfpflags & __GFP_WAIT);
+ 
+       if (should_failslab(s->objsize, gfpflags))
+               return NULL;
+ 
         local_irq_save(flags);
         c = get_cpu_slab(s, smp_processor_id());
         objsize = c->objsize;
@@@ -1613,14 -1623,14 +1623,14 @@@
   
   void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
   {
-       return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
+       return slab_alloc(s, gfpflags, -1, _RET_IP_);
   }
   EXPORT_SYMBOL(kmem_cache_alloc);
   
   #ifdef CONFIG_NUMA
   void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
   {
-       return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
+       return slab_alloc(s, gfpflags, node, _RET_IP_);
   }
   EXPORT_SYMBOL(kmem_cache_alloc_node);
   #endif
@@@ -1634,7 -1644,7 +1644,7 @@@
    * handling required then we can return immediately.
    */
   static void __slab_free(struct kmem_cache *s, struct page *page,
-                               void *x, void *addr, unsigned int offset)
+                       void *x, unsigned long addr, unsigned int offset)
   {
         void *prior;
         void **object = (void *)x;
@@@ -1704,7 -1714,7 +1714,7 @@@ debug
    * with all sorts of special processing.
    */
   static __always_inline void slab_free(struct kmem_cache *s,
-                       struct page *page, void *x, void *addr)
+                       struct page *page, void *x, unsigned long addr)
   {
         void **object = (void *)x;
         struct kmem_cache_cpu *c;
@@@ -1731,11 -1741,11 +1741,11 @@@ void kmem_cache_free(struct kmem_cache 
   
         page = virt_to_head_page(x);
   
-       slab_free(s, page, x, __builtin_return_address(0));
+       slab_free(s, page, x, _RET_IP_);
   }
   EXPORT_SYMBOL(kmem_cache_free);
   
- /* Figure out on which slab object the object resides */
+ /* Figure out on which slab page the object resides */
   static struct page *get_object_page(const void *x)
   {
         struct page *page = virt_to_head_page(x);
@@@ -1807,8 -1817,8 +1817,8 @@@ static inline int slab_order(int size, 
         int rem;
         int min_order = slub_min_order;
   
-       if ((PAGE_SIZE << min_order) / size > 65535)
-               return get_order(size * 65535) - 1;
+       if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
+               return get_order(size * MAX_OBJS_PER_PAGE) - 1;
   
         for (order = max(min_order,
                                 fls(min_objects * size - 1) - PAGE_SHIFT);
@@@ -2073,8 -2083,7 +2083,7 @@@ static inline int alloc_kmem_cache_cpus
    * when allocating for the kmalloc_node_cache. This is used for bootstrapping
    * memory on a fresh node that has no slab structures yet.
    */
- static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
-                                                          int node)
+ static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
   {
         struct page *page;
         struct kmem_cache_node *n;
@@@ -2112,7 -2121,6 +2121,6 @@@
         local_irq_save(flags);
         add_partial(n, page, 0);
         local_irq_restore(flags);
-       return n;
   }
   
   static void free_kmem_cache_nodes(struct kmem_cache *s)
@@@ -2144,8 -2152,7 +2152,7 @@@ static int init_kmem_cache_nodes(struc
                         n = &s->local_node;
                 else {
                         if (slab_state == DOWN) {
-                               n = early_kmem_cache_node_alloc(gfpflags,
-                                                               node);
+                               early_kmem_cache_node_alloc(gfpflags, node);
                                 continue;
                         }
                         n = kmem_cache_alloc_node(kmalloc_caches,
@@@ -2659,7 -2666,7 +2666,7 @@@ void *__kmalloc(size_t size, gfp_t flag
         if (unlikely(ZERO_OR_NULL_PTR(s)))
                 return s;
   
-       return slab_alloc(s, flags, -1, __builtin_return_address(0));
+       return slab_alloc(s, flags, -1, _RET_IP_);
   }
   EXPORT_SYMBOL(__kmalloc);
   
@@@ -2687,7 -2694,7 +2694,7 @@@ void *__kmalloc_node(size_t size, gfp_
         if (unlikely(ZERO_OR_NULL_PTR(s)))
                 return s;
   
-       return slab_alloc(s, flags, node, __builtin_return_address(0));
+       return slab_alloc(s, flags, node, _RET_IP_);
   }
   EXPORT_SYMBOL(__kmalloc_node);
   #endif
@@@ -2744,7 -2751,7 +2751,7 @@@ void kfree(const void *x
                 put_page(page);
                 return;
         }
-       slab_free(page->slab, page, object, __builtin_return_address(0));
+       slab_free(page->slab, page, object, _RET_IP_);
   }
   EXPORT_SYMBOL(kfree);
   
@@@ -3123,8 -3130,12 +3130,12 @@@ struct kmem_cache *kmem_cache_create(co
                 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
                 up_write(&slub_lock);
   
-               if (sysfs_slab_alias(s, name))
+               if (sysfs_slab_alias(s, name)) {
+                       down_write(&slub_lock);
+                       s->refcount--;
+                       up_write(&slub_lock);
                         goto err;
+               }
                 return s;
         }
   
@@@ -3134,8 -3145,13 +3145,13 @@@
                                 size, align, flags, ctor)) {
                         list_add(&s->list, &slab_caches);
                         up_write(&slub_lock);
-                       if (sysfs_slab_add(s))
+                       if (sysfs_slab_add(s)) {
+                               down_write(&slub_lock);
+                               list_del(&s->list);
+                               up_write(&slub_lock);
+                               kfree(s);
                                 goto err;
+                       }
                         return s;
                 }
                 kfree(s);
@@@ -3202,7 -3218,7 +3218,7 @@@ static struct notifier_block __cpuinitd
   
   #endif
   
- void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
+ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
   {
         struct kmem_cache *s;
   
@@@ -3218,7 -3234,7 +3234,7 @@@
   }
   
   void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
-                                       int node, void *caller)
+                                       int node, unsigned long caller)
   {
         struct kmem_cache *s;
   
@@@ -3429,7 -3445,7 +3445,7 @@@ static void resiliency_test(void) {}
   
   struct location {
         unsigned long count;
-       void *addr;
+       unsigned long addr;
         long long sum_time;
         long min_time;
         long max_time;
@@@ -3477,7 -3493,7 +3493,7 @@@ static int add_location(struct loc_trac
   {
         long start, end, pos;
         struct location *l;
-       void *caddr;
+       unsigned long caddr;
         unsigned long age = jiffies - track->when;
   
         start = -1;
@@@ -3626,7 -3642,7 +3642,7 @@@ static int list_locations(struct kmem_c
                                 len < PAGE_SIZE - 60) {
                         len += sprintf(buf + len, " cpus=");
                         len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
- -                                      l->cpus);
+ +                                      &l->cpus);
                 }
   
                 if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
@@@ -4345,7 -4361,7 +4361,7 @@@ static void sysfs_slab_remove(struct km
   
   /*
    * Need to buffer aliases during bootup until sysfs becomes
-  * available lest we loose that information.
+  * available lest we lose that information.
    */
   struct saved_alias {
         struct kmem_cache *s;
author	Rusty Russell <rusty@rustcorp.com.au>
	Wed, 31 Dec 2008 12:35:57 +0000 (23:05 +1030)
committer	Rusty Russell <rusty@rustcorp.com.au>
	Wed, 31 Dec 2008 12:35:57 +0000 (23:05 +1030)
		1	2
arch/arm/kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-at91/at91rm9200_time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-pxa/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-realview/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-realview/localtimer.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-sa1100/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-versatile/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/irq_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/of_device_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/pci_msi.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/smp_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/smp_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/sparc_ksyms_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/time_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/pci.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/hpet.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/io_apic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/irq_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/irq_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/xen/events.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/interrupt.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/irq.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq/chip.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq/manage.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq/proc.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slub.c	patch \|	diff1 \|	diff2 \|	blob \| history