]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
authorRusty Russell <rusty@rustcorp.com.au>
Wed, 31 Dec 2008 12:35:57 +0000 (23:05 +1030)
committerRusty Russell <rusty@rustcorp.com.au>
Wed, 31 Dec 2008 12:35:57 +0000 (23:05 +1030)
Conflicts:

arch/x86/kernel/io_apic.c

29 files changed:
1  2 
arch/arm/kernel/smp.c
arch/arm/mach-at91/at91rm9200_time.c
arch/arm/mach-pxa/time.c
arch/arm/mach-realview/core.c
arch/arm/mach-realview/localtimer.c
arch/arm/mach-sa1100/time.c
arch/arm/mach-versatile/core.c
arch/sparc/kernel/irq_64.c
arch/sparc/kernel/of_device_64.c
arch/sparc/kernel/pci_msi.c
arch/sparc/kernel/smp_32.c
arch/sparc/kernel/smp_64.c
arch/sparc/kernel/sparc_ksyms_32.c
arch/sparc/kernel/time_64.c
arch/x86/include/asm/pci.h
arch/x86/kernel/hpet.c
arch/x86/kernel/io_apic.c
arch/x86/kernel/irq_32.c
arch/x86/kernel/irq_64.c
drivers/xen/events.c
include/linux/interrupt.h
include/linux/irq.h
init/Kconfig
kernel/irq/chip.c
kernel/irq/manage.c
kernel/irq/proc.c
kernel/sched.c
kernel/trace/trace.c
mm/slub.c

diff --combined arch/arm/kernel/smp.c
index bd905c0a73651f85d54fffb25146b50d7d267368,019237d21622ba2cde3669d4915dca02a590ea26..55fa7ff96a3e7aaf654d30c64677b1a4d666ee4b
  #include <asm/tlbflush.h>
  #include <asm/ptrace.h>
  
 -/*
 - * bitmask of present and online CPUs.
 - * The present bitmask indicates that the CPU is physically present.
 - * The online bitmask indicates that the CPU is up and running.
 - */
 -cpumask_t cpu_possible_map;
 -EXPORT_SYMBOL(cpu_possible_map);
 -cpumask_t cpu_online_map;
 -EXPORT_SYMBOL(cpu_online_map);
 -
  /*
   * as from 2.5, kernels no longer have an init_tasks structure
   * so we need some other way of telling a new secondary core
@@@ -171,7 -181,7 +171,7 @@@ int __cpuexit __cpu_disable(void
        /*
         * Stop the local timer for this CPU.
         */
-       local_timer_stop(cpu);
+       local_timer_stop();
  
        /*
         * Flush user cache and TLB mappings, and then remove this CPU
@@@ -274,7 -284,7 +274,7 @@@ asmlinkage void __cpuinit secondary_sta
        /*
         * Setup local timer for this CPU.
         */
-       local_timer_setup(cpu);
+       local_timer_setup();
  
        calibrate_delay();
  
index 72f51d39202c7a22d49a393657255d283b339889,d140eae53ded281bcc9f33fc6b991b1086c5fd55..1ff1bda0a894a4ce313ecb0896eb1011bd4a2413
@@@ -141,6 -141,15 +141,15 @@@ clkevt32k_next_event(unsigned long delt
        /* Use "raw" primitives so we behave correctly on RT kernels. */
        raw_local_irq_save(flags);
  
+       /*
+        * According to Thomas Gleixner irqs are already disabled here.  Simply
+        * removing raw_local_irq_save above (and the matching
+        * raw_local_irq_restore) was not accepted.  See
+        * http://thread.gmane.org/gmane.linux.ports.arm.kernel/41174
+        * So for now (2008-11-20) just warn once if irqs were not disabled ...
+        */
+       WARN_ON_ONCE(!raw_irqs_disabled_flags(flags));
        /* The alarm IRQ uses absolute time (now+delta), not the relative
         * time (delta) in our calling convention.  Like all clockevents
         * using such "match" hardware, we have a race to defend against.
@@@ -169,6 -178,7 +178,6 @@@ static struct clock_event_device clkev
        .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
        .shift          = 32,
        .rating         = 150,
 -      .cpumask        = CPU_MASK_CPU0,
        .set_next_event = clkevt32k_next_event,
        .set_mode       = clkevt32k_mode,
  };
@@@ -196,7 -206,7 +205,7 @@@ void __init at91rm9200_timer_init(void
        clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift);
        clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt);
        clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1;
 -      clkevt.cpumask = cpumask_of_cpu(0);
 +      clkevt.cpumask = cpumask_of(0);
        clockevents_register_device(&clkevt);
  
        /* register clocksource */
diff --combined arch/arm/mach-pxa/time.c
index bf3c9a4aad509fc8fde9a6858f38e65e14bf60b9,0016241585190e3770de17ac6e02b0db57547a2f..95656a72268dd9f1a1ee7177c4ca5286d9d0a570
@@@ -22,8 -22,8 +22,8 @@@
  #include <asm/div64.h>
  #include <asm/mach/irq.h>
  #include <asm/mach/time.h>
+ #include <mach/hardware.h>
  #include <mach/pxa-regs.h>
- #include <asm/mach-types.h>
  
  /*
   * This is PXA's sched_clock implementation. This has a resolution
@@@ -122,6 -122,7 +122,6 @@@ static struct clock_event_device ckevt_
        .features       = CLOCK_EVT_FEAT_ONESHOT,
        .shift          = 32,
        .rating         = 200,
 -      .cpumask        = CPU_MASK_CPU0,
        .set_next_event = pxa_osmr0_set_next_event,
        .set_mode       = pxa_osmr0_set_mode,
  };
@@@ -149,18 -150,11 +149,11 @@@ static struct irqaction pxa_ost0_irq = 
  
  static void __init pxa_timer_init(void)
  {
-       unsigned long clock_tick_rate;
+       unsigned long clock_tick_rate = get_clock_tick_rate();
  
        OIER = 0;
        OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
  
-       if (cpu_is_pxa25x())
-               clock_tick_rate = 3686400;
-       else if (machine_is_mainstone())
-               clock_tick_rate = 3249600;
-       else
-               clock_tick_rate = 3250000;
        set_oscr2ns_scale(clock_tick_rate);
  
        ckevt_pxa_osmr0.mult =
                clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0);
        ckevt_pxa_osmr0.min_delta_ns =
                clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
 +      ckevt_pxa_osmr0.cpumask = cpumask_of(0);
  
        cksrc_pxa_oscr0.mult =
                clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
index b07cb9b7adb15d5f4a6580ecc9315a401184ae25,5f1d55963cedb8e7b4d3cbd0b6e71a24c84b10c9..bd2aa4f16141d72895e655f63efe75cf6ffcd298
  #include <linux/clocksource.h>
  #include <linux/clockchips.h>
  #include <linux/io.h>
+ #include <linux/smc911x.h>
  
+ #include <asm/clkdev.h>
  #include <asm/system.h>
  #include <mach/hardware.h>
  #include <asm/irq.h>
  #include <asm/leds.h>
+ #include <asm/mach-types.h>
  #include <asm/hardware/arm_timer.h>
  #include <asm/hardware/icst307.h>
  
@@@ -49,7 -52,7 +52,7 @@@
  
  #define REALVIEW_REFCOUNTER   (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
  
- /* used by entry-macro.S */
+ /* used by entry-macro.S and platsmp.c */
  void __iomem *gic_cpu_base_addr;
  
  /*
@@@ -124,6 -127,29 +127,29 @@@ int realview_flash_register(struct reso
        return platform_device_register(&realview_flash_device);
  }
  
+ static struct smc911x_platdata realview_smc911x_platdata = {
+       .flags          = SMC911X_USE_32BIT,
+       .irq_flags      = IRQF_SHARED,
+       .irq_polarity   = 1,
+ };
+ static struct platform_device realview_eth_device = {
+       .name           = "smc911x",
+       .id             = 0,
+       .num_resources  = 2,
+ };
+ int realview_eth_register(const char *name, struct resource *res)
+ {
+       if (name)
+               realview_eth_device.name = name;
+       realview_eth_device.resource = res;
+       if (strcmp(realview_eth_device.name, "smc911x") == 0)
+               realview_eth_device.dev.platform_data = &realview_smc911x_platdata;
+       return platform_device_register(&realview_eth_device);
+ }
  static struct resource realview_i2c_resource = {
        .start          = REALVIEW_I2C_BASE,
        .end            = REALVIEW_I2C_BASE + SZ_4K - 1,
@@@ -177,9 -203,14 +203,14 @@@ static const struct icst307_params real
  static void realview_oscvco_set(struct clk *clk, struct icst307_vco vco)
  {
        void __iomem *sys_lock = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_LOCK_OFFSET;
-       void __iomem *sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC4_OFFSET;
+       void __iomem *sys_osc;
        u32 val;
  
+       if (machine_is_realview_pb1176())
+               sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC0_OFFSET;
+       else
+               sys_osc = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_OSC4_OFFSET;
        val = readl(sys_osc) & ~0x7ffff;
        val |= vco.v | (vco.r << 9) | (vco.s << 16);
  
        writel(0, sys_lock);
  }
  
- struct clk realview_clcd_clk = {
-       .name   = "CLCDCLK",
+ static struct clk oscvco_clk = {
        .params = &realview_oscvco_params,
        .setvco = realview_oscvco_set,
  };
  
+ /*
+  * These are fixed clocks.
+  */
+ static struct clk ref24_clk = {
+       .rate   = 24000000,
+ };
+ static struct clk_lookup lookups[] = {
+       {       /* UART0 */
+               .dev_id         = "dev:f1",
+               .clk            = &ref24_clk,
+       }, {    /* UART1 */
+               .dev_id         = "dev:f2",
+               .clk            = &ref24_clk,
+       }, {    /* UART2 */
+               .dev_id         = "dev:f3",
+               .clk            = &ref24_clk,
+       }, {    /* UART3 */
+               .dev_id         = "fpga:09",
+               .clk            = &ref24_clk,
+       }, {    /* KMI0 */
+               .dev_id         = "fpga:06",
+               .clk            = &ref24_clk,
+       }, {    /* KMI1 */
+               .dev_id         = "fpga:07",
+               .clk            = &ref24_clk,
+       }, {    /* MMC0 */
+               .dev_id         = "fpga:05",
+               .clk            = &ref24_clk,
+       }, {    /* EB:CLCD */
+               .dev_id         = "dev:20",
+               .clk            = &oscvco_clk,
+       }, {    /* PB:CLCD */
+               .dev_id         = "issp:20",
+               .clk            = &oscvco_clk,
+       }
+ };
+ static int __init clk_init(void)
+ {
+       int i;
+       for (i = 0; i < ARRAY_SIZE(lookups); i++)
+               clkdev_add(&lookups[i]);
+       return 0;
+ }
+ arch_initcall(clk_init);
  /*
   * CLCD support.
   */
@@@ -226,7 -304,30 +304,30 @@@ static struct clcd_panel vga = 
        .width          = -1,
        .height         = -1,
        .tim2           = TIM2_BCD | TIM2_IPC,
-       .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+       .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
+       .bpp            = 16,
+ };
+ static struct clcd_panel xvga = {
+       .mode           = {
+               .name           = "XVGA",
+               .refresh        = 60,
+               .xres           = 1024,
+               .yres           = 768,
+               .pixclock       = 15748,
+               .left_margin    = 152,
+               .right_margin   = 48,
+               .upper_margin   = 23,
+               .lower_margin   = 3,
+               .hsync_len      = 104,
+               .vsync_len      = 4,
+               .sync           = 0,
+               .vmode          = FB_VMODE_NONINTERLACED,
+       },
+       .width          = -1,
+       .height         = -1,
+       .tim2           = TIM2_BCD | TIM2_IPC,
+       .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
        .bpp            = 16,
  };
  
@@@ -249,7 -350,7 +350,7 @@@ static struct clcd_panel sanyo_3_8_in 
        .width          = -1,
        .height         = -1,
        .tim2           = TIM2_BCD,
-       .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+       .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
        .bpp            = 16,
  };
  
@@@ -272,7 -373,7 +373,7 @@@ static struct clcd_panel sanyo_2_5_in 
        .width          = -1,
        .height         = -1,
        .tim2           = TIM2_IVS | TIM2_IHS | TIM2_IPC,
-       .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+       .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
        .bpp            = 16,
  };
  
@@@ -295,7 -396,7 +396,7 @@@ static struct clcd_panel epson_2_2_in 
        .width          = -1,
        .height         = -1,
        .tim2           = TIM2_BCD | TIM2_IPC,
-       .cntl           = CNTL_LCDTFT | CNTL_LCDVCOMP(1),
+       .cntl           = CNTL_LCDTFT | CNTL_BGR | CNTL_LCDVCOMP(1),
        .bpp            = 16,
  };
  
  static struct clcd_panel *realview_clcd_panel(void)
  {
        void __iomem *sys_clcd = __io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_CLCD_OFFSET;
-       struct clcd_panel *panel = &vga;
+       struct clcd_panel *vga_panel;
+       struct clcd_panel *panel;
        u32 val;
  
+       if (machine_is_realview_eb())
+               vga_panel = &vga;
+       else
+               vga_panel = &xvga;
        val = readl(sys_clcd) & SYS_CLCD_ID_MASK;
        if (val == SYS_CLCD_ID_SANYO_3_8)
                panel = &sanyo_3_8_in;
        else if (val == SYS_CLCD_ID_EPSON_2_2)
                panel = &epson_2_2_in;
        else if (val == SYS_CLCD_ID_VGA)
-               panel = &vga;
+               panel = vga_panel;
        else {
                printk(KERN_ERR "CLCD: unknown LCD panel ID 0x%08x, using VGA\n",
                        val);
-               panel = &vga;
+               panel = vga_panel;
        }
  
        return panel;
@@@ -358,12 -465,18 +465,18 @@@ static void realview_clcd_enable(struc
        writel(val, sys_clcd);
  }
  
- static unsigned long framesize = SZ_1M;
  static int realview_clcd_setup(struct clcd_fb *fb)
  {
+       unsigned long framesize;
        dma_addr_t dma;
  
+       if (machine_is_realview_eb())
+               /* VGA, 16bpp */
+               framesize = 640 * 480 * 2;
+       else
+               /* XVGA, 16bpp */
+               framesize = 1024 * 768 * 2;
        fb->panel               = realview_clcd_panel();
  
        fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
@@@ -511,7 -624,7 +624,7 @@@ static struct clock_event_device timer0
        .set_mode       = timer_set_mode,
        .set_next_event = timer_set_next_event,
        .rating         = 300,
 -      .cpumask        = CPU_MASK_ALL,
 +      .cpumask        = cpu_all_mask,
  };
  
  static void __init realview_clockevents_init(unsigned int timer_irq)
@@@ -588,7 -701,7 +701,7 @@@ void __init realview_timer_init(unsigne
         * The dummy clock device has to be registered before the main device
         * so that the latter will broadcast the clock events
         */
-       local_timer_setup(smp_processor_id());
+       local_timer_setup();
  #endif
  
        /* 
index 504961ef343c2ef0f0e6be6bc052b997d90405fd,9019ef2e56115ac72f5b359bd2da1d68999a5d7b..67d6d9cc68b2a693b5edc2a89aa8d78125e294d3
@@@ -38,18 -38,14 +38,14 @@@ void local_timer_interrupt(void
  
  #ifdef CONFIG_LOCAL_TIMERS
  
- #define TWD_BASE(cpu) (twd_base_addr + (cpu) * twd_size)
  /* set up by the platform code */
- void __iomem *twd_base_addr;
- unsigned int twd_size;
+ void __iomem *twd_base;
  
  static unsigned long mpcore_timer_rate;
  
  static void local_timer_set_mode(enum clock_event_mode mode,
                                 struct clock_event_device *clk)
  {
-       void __iomem *base = TWD_BASE(smp_processor_id());
        unsigned long ctrl;
  
        switch(mode) {
                ctrl = 0;
        }
  
-       __raw_writel(ctrl, base + TWD_TIMER_CONTROL);
+       __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL);
  }
  
  static int local_timer_set_next_event(unsigned long evt,
                                      struct clock_event_device *unused)
  {
-       void __iomem *base = TWD_BASE(smp_processor_id());
-       unsigned long ctrl = __raw_readl(base + TWD_TIMER_CONTROL);
+       unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL);
  
-       __raw_writel(evt, base + TWD_TIMER_COUNTER);
-       __raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, base + TWD_TIMER_CONTROL);
+       __raw_writel(evt, twd_base + TWD_TIMER_COUNTER);
+       __raw_writel(ctrl | TWD_TIMER_CONTROL_ENABLE, twd_base + TWD_TIMER_CONTROL);
  
        return 0;
  }
   */
  int local_timer_ack(void)
  {
-       void __iomem *base = TWD_BASE(smp_processor_id());
-       if (__raw_readl(base + TWD_TIMER_INTSTAT)) {
-               __raw_writel(1, base + TWD_TIMER_INTSTAT);
+       if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) {
+               __raw_writel(1, twd_base + TWD_TIMER_INTSTAT);
                return 1;
        }
  
        return 0;
  }
  
- static void __cpuinit twd_calibrate_rate(unsigned int cpu)
+ static void __cpuinit twd_calibrate_rate(void)
  {
-       void __iomem *base = TWD_BASE(cpu);
        unsigned long load, count;
        u64 waitjiffies;
  
                waitjiffies += 5;
  
                                 /* enable, no interrupt or reload */
-               __raw_writel(0x1, base + TWD_TIMER_CONTROL);
+               __raw_writel(0x1, twd_base + TWD_TIMER_CONTROL);
  
                                 /* maximum value */
-               __raw_writel(0xFFFFFFFFU, base + TWD_TIMER_COUNTER);
+               __raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER);
  
                while (get_jiffies_64() < waitjiffies)
                        udelay(10);
  
-               count = __raw_readl(base + TWD_TIMER_COUNTER);
+               count = __raw_readl(twd_base + TWD_TIMER_COUNTER);
  
                mpcore_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
  
  
        load = mpcore_timer_rate / HZ;
  
-       __raw_writel(load, base + TWD_TIMER_LOAD);
+       __raw_writel(load, twd_base + TWD_TIMER_LOAD);
  }
  
  /*
   * Setup the local clock events for a CPU.
   */
- void __cpuinit local_timer_setup(unsigned int cpu)
+ void __cpuinit local_timer_setup(void)
  {
+       unsigned int cpu = smp_processor_id();
        struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
        unsigned long flags;
  
-       twd_calibrate_rate(cpu);
+       twd_calibrate_rate();
  
        clk->name               = "local_timer";
        clk->features           = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
        clk->set_mode           = local_timer_set_mode;
        clk->set_next_event     = local_timer_set_next_event;
        clk->irq                = IRQ_LOCALTIMER;
 -      clk->cpumask            = cpumask_of_cpu(cpu);
 +      clk->cpumask            = cpumask_of(cpu);
        clk->shift              = 20;
        clk->mult               = div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
        clk->max_delta_ns       = clockevent_delta2ns(0xffffffff, clk);
  /*
   * take a local timer down
   */
- void __cpuexit local_timer_stop(unsigned int cpu)
+ void __cpuexit local_timer_stop(void)
  {
-       __raw_writel(0, TWD_BASE(cpu) + TWD_TIMER_CONTROL);
+       __raw_writel(0, twd_base + TWD_TIMER_CONTROL);
  }
  
  #else /* CONFIG_LOCAL_TIMERS */
@@@ -190,8 -183,9 +183,9 @@@ static void dummy_timer_set_mode(enum c
  {
  }
  
- void __cpuinit local_timer_setup(unsigned int cpu)
+ void __cpuinit local_timer_setup(void)
  {
+       unsigned int cpu = smp_processor_id();
        struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
  
        clk->name               = "dummy_timer";
        clk->rating             = 200;
        clk->set_mode           = dummy_timer_set_mode;
        clk->broadcast          = smp_timer_broadcast;
 -      clk->cpumask            = cpumask_of_cpu(cpu);
 +      clk->cpumask            = cpumask_of(cpu);
  
        clockevents_register_device(clk);
  }
index 1cac4ac0b4b89e7af0dc56116817d163744a542c,8c5e727f3b751ffb0e87b176402ae75adb276cfc..711c0295c66f1710de34e20f3d9a88c2eeb99609
@@@ -2,8 -2,8 +2,8 @@@
   * linux/arch/arm/mach-sa1100/time.c
   *
   * Copyright (C) 1998 Deborah Wallach.
-  * Twiddles  (C) 1999         Hugo Fiennes <hugo@empeg.com>
-  * 
+  * Twiddles  (C) 1999 Hugo Fiennes <hugo@empeg.com>
+  *
   * 2000/03/29 (C) Nicolas Pitre <nico@cam.org>
   *    Rewritten: big cleanup, much simpler, better HZ accuracy.
   *
@@@ -73,6 -73,7 +73,6 @@@ static struct clock_event_device ckevt_
        .features       = CLOCK_EVT_FEAT_ONESHOT,
        .shift          = 32,
        .rating         = 200,
 -      .cpumask        = CPU_MASK_CPU0,
        .set_next_event = sa1100_osmr0_set_next_event,
        .set_mode       = sa1100_osmr0_set_mode,
  };
@@@ -109,7 -110,6 +109,7 @@@ static void __init sa1100_timer_init(vo
                clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0);
        ckevt_sa1100_osmr0.min_delta_ns =
                clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
 +      ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
  
        cksrc_sa1100_oscr.mult =
                clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
index a3f1933434e261d604c20f665a8b2b6dde450b0b,df25aa138509c95aec6dfcb391f8409de5b182a8..1c43494f5c422092d713a860e1623e83078b9d05
@@@ -31,6 -31,7 +31,7 @@@
  #include <linux/cnt32_to_63.h>
  #include <linux/io.h>
  
+ #include <asm/clkdev.h>
  #include <asm/system.h>
  #include <mach/hardware.h>
  #include <asm/irq.h>
@@@ -373,22 -374,60 +374,60 @@@ static const struct icst307_params vers
  
  static void versatile_oscvco_set(struct clk *clk, struct icst307_vco vco)
  {
-       void __iomem *sys_lock = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_LOCK_OFFSET;
-       void __iomem *sys_osc = __io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_OSCCLCD_OFFSET;
+       void __iomem *sys = __io_address(VERSATILE_SYS_BASE);
+       void __iomem *sys_lock = sys + VERSATILE_SYS_LOCK_OFFSET;
        u32 val;
  
-       val = readl(sys_osc) & ~0x7ffff;
+       val = readl(sys + clk->oscoff) & ~0x7ffff;
        val |= vco.v | (vco.r << 9) | (vco.s << 16);
  
        writel(0xa05f, sys_lock);
-       writel(val, sys_osc);
+       writel(val, sys + clk->oscoff);
        writel(0, sys_lock);
  }
  
- static struct clk versatile_clcd_clk = {
-       .name   = "CLCDCLK",
+ static struct clk osc4_clk = {
        .params = &versatile_oscvco_params,
-       .setvco = versatile_oscvco_set,
+       .oscoff = VERSATILE_SYS_OSCCLCD_OFFSET,
+       .setvco = versatile_oscvco_set,
+ };
+ /*
+  * These are fixed clocks.
+  */
+ static struct clk ref24_clk = {
+       .rate   = 24000000,
+ };
+ static struct clk_lookup lookups[] __initdata = {
+       {       /* UART0 */
+               .dev_id         = "dev:f1",
+               .clk            = &ref24_clk,
+       }, {    /* UART1 */
+               .dev_id         = "dev:f2",
+               .clk            = &ref24_clk,
+       }, {    /* UART2 */
+               .dev_id         = "dev:f3",
+               .clk            = &ref24_clk,
+       }, {    /* UART3 */
+               .dev_id         = "fpga:09",
+               .clk            = &ref24_clk,
+       }, {    /* KMI0 */
+               .dev_id         = "fpga:06",
+               .clk            = &ref24_clk,
+       }, {    /* KMI1 */
+               .dev_id         = "fpga:07",
+               .clk            = &ref24_clk,
+       }, {    /* MMC0 */
+               .dev_id         = "fpga:05",
+               .clk            = &ref24_clk,
+       }, {    /* MMC1 */
+               .dev_id         = "fpga:0b",
+               .clk            = &ref24_clk,
+       }, {    /* CLCD */
+               .dev_id         = "dev:20",
+               .clk            = &osc4_clk,
+       }
  };
  
  /*
@@@ -786,7 -825,8 +825,8 @@@ void __init versatile_init(void
  {
        int i;
  
-       clk_register(&versatile_clcd_clk);
+       for (i = 0; i < ARRAY_SIZE(lookups); i++)
+               clkdev_add(&lookups[i]);
  
        platform_device_register(&versatile_flash_device);
        platform_device_register(&versatile_i2c_device);
@@@ -965,7 -1005,7 +1005,7 @@@ static void __init versatile_timer_init
        timer0_clockevent.min_delta_ns =
                clockevent_delta2ns(0xf, &timer0_clockevent);
  
 -      timer0_clockevent.cpumask = cpumask_of_cpu(0);
 +      timer0_clockevent.cpumask = cpumask_of(0);
        clockevents_register_device(&timer0_clockevent);
  }
  
index 4aaf18e83c8c24766c4a18ae5a3716f7a6249402,a3ea2bcb95de6a39ebb7bf297ee2355f507f178c..cab8e02868716d691a38b9bad239ec754dd39134
@@@ -312,8 -312,7 +312,8 @@@ static void sun4u_irq_enable(unsigned i
        }
  }
  
 -static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
 +static void sun4u_set_affinity(unsigned int virt_irq,
 +                             const struct cpumask *mask)
  {
        sun4u_irq_enable(virt_irq);
  }
@@@ -363,8 -362,7 +363,8 @@@ static void sun4v_irq_enable(unsigned i
                       ino, err);
  }
  
 -static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
 +static void sun4v_set_affinity(unsigned int virt_irq,
 +                             const struct cpumask *mask)
  {
        unsigned int ino = virt_irq_table[virt_irq].dev_ino;
        unsigned long cpuid = irq_choose_cpu(virt_irq);
@@@ -431,8 -429,7 +431,8 @@@ static void sun4v_virq_enable(unsigned 
                       dev_handle, dev_ino, err);
  }
  
 -static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
 +static void sun4v_virt_set_affinity(unsigned int virt_irq,
 +                                  const struct cpumask *mask)
  {
        unsigned long cpuid, dev_handle, dev_ino;
        int err;
@@@ -778,6 -775,69 +778,69 @@@ void do_softirq(void
        local_irq_restore(flags);
  }
  
+ static void unhandled_perf_irq(struct pt_regs *regs)
+ {
+       unsigned long pcr, pic;
+       read_pcr(pcr);
+       read_pic(pic);
+       write_pcr(0);
+       printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
+              smp_processor_id());
+       printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
+              smp_processor_id(), pcr, pic);
+ }
+ /* Almost a direct copy of the powerpc PMC code.  */
+ static DEFINE_SPINLOCK(perf_irq_lock);
+ static void *perf_irq_owner_caller; /* mostly for debugging */
+ static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
+ /* Invoked from level 15 PIL handler in trap table.  */
+ void perfctr_irq(int irq, struct pt_regs *regs)
+ {
+       clear_softint(1 << irq);
+       perf_irq(regs);
+ }
+ int register_perfctr_intr(void (*handler)(struct pt_regs *))
+ {
+       int ret;
+       if (!handler)
+               return -EINVAL;
+       spin_lock(&perf_irq_lock);
+       if (perf_irq != unhandled_perf_irq) {
+               printk(KERN_WARNING "register_perfctr_intr: "
+                      "perf IRQ busy (reserved by caller %p)\n",
+                      perf_irq_owner_caller);
+               ret = -EBUSY;
+               goto out;
+       }
+       perf_irq_owner_caller = __builtin_return_address(0);
+       perf_irq = handler;
+       ret = 0;
+ out:
+       spin_unlock(&perf_irq_lock);
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(register_perfctr_intr);
+ void release_perfctr_intr(void (*handler)(struct pt_regs *))
+ {
+       spin_lock(&perf_irq_lock);
+       perf_irq_owner_caller = NULL;
+       perf_irq = unhandled_perf_irq;
+       spin_unlock(&perf_irq_lock);
+ }
+ EXPORT_SYMBOL_GPL(release_perfctr_intr);
  #ifdef CONFIG_HOTPLUG_CPU
  void fixup_irqs(void)
  {
                    !(irq_desc[irq].status & IRQ_PER_CPU)) {
                        if (irq_desc[irq].chip->set_affinity)
                                irq_desc[irq].chip->set_affinity(irq,
 -                                      irq_desc[irq].affinity);
 +                                      &irq_desc[irq].affinity);
                }
                spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
        }
index 4f6098d318ec21993f6173769eeca4898989fdbb,46e231f7c5ce2c37cf51b6111a0175f9dcc4280e..4873f28905b082b07859d15389984b8f7dff55a8
@@@ -778,9 -778,9 +778,9 @@@ static unsigned int __init build_one_de
  out:
        nid = of_node_to_nid(dp);
        if (nid != -1) {
 -              cpumask_t numa_mask = node_to_cpumask(nid);
 +              cpumask_t numa_mask = *cpumask_of_node(nid);
  
 -              irq_set_affinity(irq, numa_mask);
 +              irq_set_affinity(irq, &numa_mask);
        }
  
        return irq;
@@@ -811,20 -811,20 +811,20 @@@ static struct of_device * __init scan_o
  
        irq = of_get_property(dp, "interrupts", &len);
        if (irq) {
-               memcpy(op->irqs, irq, len);
                op->num_irqs = len / 4;
+               /* Prevent overrunning the op->irqs[] array.  */
+               if (op->num_irqs > PROMINTR_MAX) {
+                       printk(KERN_WARNING "%s: Too many irqs (%d), "
+                              "limiting to %d.\n",
+                              dp->full_name, op->num_irqs, PROMINTR_MAX);
+                       op->num_irqs = PROMINTR_MAX;
+               }
+               memcpy(op->irqs, irq, op->num_irqs * 4);
        } else {
                op->num_irqs = 0;
        }
  
-       /* Prevent overrunning the op->irqs[] array.  */
-       if (op->num_irqs > PROMINTR_MAX) {
-               printk(KERN_WARNING "%s: Too many irqs (%d), "
-                      "limiting to %d.\n",
-                      dp->full_name, op->num_irqs, PROMINTR_MAX);
-               op->num_irqs = PROMINTR_MAX;
-       }
        build_device_resources(op, parent);
        for (i = 0; i < op->num_irqs; i++)
                op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]);
index 4ef282e8191208b4bc954614d8f1b9d675385177,2e680f34f727fa61f5defef92e63b8144e365d70..4ef282e8191208b4bc954614d8f1b9d675385177
@@@ -286,9 -286,9 +286,9 @@@ static int bringup_one_msi_queue(struc
  
        nid = pbm->numa_node;
        if (nid != -1) {
 -              cpumask_t numa_mask = node_to_cpumask(nid);
 +              cpumask_t numa_mask = *cpumask_of_node(nid);
  
 -              irq_set_affinity(irq, numa_mask);
 +              irq_set_affinity(irq, &numa_mask);
        }
        err = request_irq(irq, sparc64_msiq_interrupt, 0,
                          "MSIQ",
index 1e5ac4e282e1285030aaa43380b91c01b7bcff48,e396c1f17a922deaef7b03a0751d06f1345770fd..1e5ac4e282e1285030aaa43380b91c01b7bcff48
@@@ -39,6 -39,8 +39,6 @@@ volatile unsigned long cpu_callin_map[N
  unsigned char boot_cpu_id = 0;
  unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
  
 -cpumask_t cpu_online_map = CPU_MASK_NONE;
 -cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
  cpumask_t smp_commenced_mask = CPU_MASK_NONE;
  
  /* The only guaranteed locking primitive available on all Sparc
@@@ -332,7 -334,7 +332,7 @@@ void __init smp_setup_cpu_possible_map(
        instance = 0;
        while (!cpu_find_by_instance(instance, NULL, &mid)) {
                if (mid < NR_CPUS) {
 -                      cpu_set(mid, phys_cpu_present_map);
 +                      cpu_set(mid, cpu_possible_map);
                        cpu_set(mid, cpu_present_map);
                }
                instance++;
@@@ -352,7 -354,7 +352,7 @@@ void __init smp_prepare_boot_cpu(void
  
        current_thread_info()->cpu = cpuid;
        cpu_set(cpuid, cpu_online_map);
 -      cpu_set(cpuid, phys_cpu_present_map);
 +      cpu_set(cpuid, cpu_possible_map);
  }
  
  int __cpuinit __cpu_up(unsigned int cpu)
index a97b8822c22ca029acc8ea1fe0fc6cc90c88efd0,bfe99d82d458702d32bf52a863cf00345e04a8e2..46329799f3462bb4002024558be74f204e679f07
  
  int sparc64_multi_core __read_mostly;
  
 -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
 -cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
  DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
  cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
        { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
  
 -EXPORT_SYMBOL(cpu_possible_map);
 -EXPORT_SYMBOL(cpu_online_map);
  EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
  EXPORT_SYMBOL(cpu_core_map);
  
@@@ -159,7 -163,7 +159,7 @@@ static inline long get_delta (long *rt
        for (i = 0; i < NUM_ITERS; i++) {
                t0 = tick_ops->get_tick();
                go[MASTER] = 1;
-               membar_storeload();
+               membar_safe("#StoreLoad");
                while (!(tm = go[SLAVE]))
                        rmb();
                go[SLAVE] = 0;
@@@ -253,7 -257,7 +253,7 @@@ static void smp_synchronize_one_tick(in
  
        /* now let the client proceed into his loop */
        go[MASTER] = 0;
-       membar_storeload();
+       membar_safe("#StoreLoad");
  
        spin_lock_irqsave(&itc_sync_lock, flags);
        {
                        go[MASTER] = 0;
                        wmb();
                        go[SLAVE] = tick_ops->get_tick();
-                       membar_storeload();
+                       membar_safe("#StoreLoad");
                }
        }
        spin_unlock_irqrestore(&itc_sync_lock, flags);
@@@ -769,7 -773,7 +769,7 @@@ static void xcall_deliver(u64 data0, u6
  
        /* Setup the initial cpu list.  */
        cnt = 0;
-       for_each_cpu_mask_nr(i, *mask) {
+       for_each_cpu(i, mask) {
                if (i == this_cpu || !cpu_online(i))
                        continue;
                cpu_list[cnt++] = i;
@@@ -1118,7 -1122,6 +1118,6 @@@ void smp_capture(void
                       smp_processor_id());
  #endif
                penguins_are_doing_time = 1;
-               membar_storestore_loadstore();
                atomic_inc(&smp_capture_registry);
                smp_cross_call(&xcall_capture, 0, 0, 0);
                while (atomic_read(&smp_capture_registry) != ncpus)
@@@ -1138,13 -1141,13 +1137,13 @@@ void smp_release(void
                       smp_processor_id());
  #endif
                penguins_are_doing_time = 0;
-               membar_storeload_storestore();
+               membar_safe("#StoreLoad");
                atomic_dec(&smp_capture_registry);
        }
  }
  
- /* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they
-  * can service tlb flush xcalls...
+ /* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE
+  * set, so they can service tlb flush xcalls...
   */
  extern void prom_world(int);
  
@@@ -1157,7 -1160,7 +1156,7 @@@ void smp_penguin_jailcell(int irq, stru
        __asm__ __volatile__("flushw");
        prom_world(1);
        atomic_inc(&smp_capture_registry);
-       membar_storeload_storestore();
+       membar_safe("#StoreLoad");
        while (penguins_are_doing_time)
                rmb();
        atomic_dec(&smp_capture_registry);
index 32d11a5fe3a86f9e5f60acfefff09a2dd0d98c41,a4d45fc29b21e4ac9c9c069bdab7b8c277c2f6f5..e1e97639231b208e8b01ee99373771020b49ec2f
@@@ -61,7 -61,6 +61,6 @@@ extern void (*bzero_1page)(void *)
  extern void *__bzero(void *, size_t);
  extern void *__memscan_zero(void *, size_t);
  extern void *__memscan_generic(void *, int, size_t);
- extern int __memcmp(const void *, const void *, __kernel_size_t);
  extern int __strncmp(const char *, const char *, __kernel_size_t);
  
  extern int __ashrdi3(int, int);
@@@ -113,15 -112,17 +112,13 @@@ EXPORT_PER_CPU_SYMBOL(__cpu_data)
  #ifdef CONFIG_SMP
  /* IRQ implementation. */
  EXPORT_SYMBOL(synchronize_irq);
 -
 -/* CPU online map and active count. */
 -EXPORT_SYMBOL(cpu_online_map);
 -EXPORT_SYMBOL(phys_cpu_present_map);
  #endif
  
  EXPORT_SYMBOL(__udelay);
  EXPORT_SYMBOL(__ndelay);
  EXPORT_SYMBOL(rtc_lock);
- #ifdef CONFIG_SUN_AUXIO
  EXPORT_SYMBOL(set_auxio);
  EXPORT_SYMBOL(get_auxio);
- #endif
  EXPORT_SYMBOL(io_remap_pfn_range);
  
  #ifndef CONFIG_SMP
@@@ -209,7 -210,6 +206,6 @@@ EXPORT_SYMBOL(bzero_1page)
  EXPORT_SYMBOL(__bzero);
  EXPORT_SYMBOL(__memscan_zero);
  EXPORT_SYMBOL(__memscan_generic);
- EXPORT_SYMBOL(__memcmp);
  EXPORT_SYMBOL(__strncmp);
  EXPORT_SYMBOL(__memmove);
  
index 9df8f095a8b11a59e448bec001ac6aec852c480c,141da375909129dea0ab0fffc9d5359d85e9a039..9df8f095a8b11a59e448bec001ac6aec852c480c
@@@ -763,7 -763,7 +763,7 @@@ void __devinit setup_sparc64_timer(void
        sevt = &__get_cpu_var(sparc64_events);
  
        memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
 -      sevt->cpumask = cpumask_of_cpu(smp_processor_id());
 +      sevt->cpumask = cpumask_of(smp_processor_id());
  
        clockevents_register_device(sevt);
  }
index f8959c7a985f4f6f8ce581f1967ad95100fd1b64,66834c41c0493eccf1b117b1565443c10ec706b6..a977de23cb4d83320e5255de21ffc073a0859403
@@@ -84,6 -84,8 +84,8 @@@ static inline void pci_dma_burst_advice
  static inline void early_quirks(void) { }
  #endif
  
+ extern void pci_iommu_alloc(void);
  #endif  /* __KERNEL__ */
  
  #ifdef CONFIG_X86_32
  
  #ifdef CONFIG_NUMA
  /* Returns the node based on pci bus */
 -static inline int __pcibus_to_node(struct pci_bus *bus)
 +static inline int __pcibus_to_node(const struct pci_bus *bus)
  {
 -      struct pci_sysdata *sd = bus->sysdata;
 +      const struct pci_sysdata *sd = bus->sysdata;
  
        return sd->node;
  }
@@@ -111,12 -113,6 +113,12 @@@ static inline cpumask_t __pcibus_to_cpu
  {
        return node_to_cpumask(__pcibus_to_node(bus));
  }
 +
 +static inline const struct cpumask *
 +cpumask_of_pcibus(const struct pci_bus *bus)
 +{
 +      return cpumask_of_node(__pcibus_to_node(bus));
 +}
  #endif
  
  #endif /* _ASM_X86_PCI_H */
diff --combined arch/x86/kernel/hpet.c
index b5310ff1259e891d1f07891d43e9ee80efb01205,845ea097383ee4051a24b54bca8da4c29bd9f6d1..cd759ad90690e72d109aed4309adeb87755977e5
@@@ -248,7 -248,7 +248,7 @@@ static void hpet_legacy_clockevent_regi
         * Start hpet with the boot cpu mask and make it
         * global after the IO_APIC has been initialized.
         */
 -      hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
 +      hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
        clockevents_register_device(&hpet_clockevent);
        global_clock_event = &hpet_clockevent;
        printk(KERN_DEBUG "hpet clockevent registered\n");
@@@ -303,7 -303,7 +303,7 @@@ static void hpet_set_mode(enum clock_ev
                        struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
                        hpet_setup_msi_irq(hdev->irq);
                        disable_irq(hdev->irq);
 -                      irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
 +                      irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
                        enable_irq(hdev->irq);
                }
                break;
@@@ -451,7 -451,7 +451,7 @@@ static int hpet_setup_irq(struct hpet_d
                return -1;
  
        disable_irq(dev->irq);
 -      irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
 +      irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
        enable_irq(dev->irq);
  
        printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@@ -502,7 -502,7 +502,7 @@@ static void init_one_hpet_msi_clockeven
        /* 5 usec minimum reprogramming delta. */
        evt->min_delta_ns = 5000;
  
 -      evt->cpumask = cpumask_of_cpu(hdev->cpu);
 +      evt->cpumask = cpumask_of(hdev->cpu);
        clockevents_register_device(evt);
  }
  
@@@ -813,7 -813,7 +813,7 @@@ int __init hpet_enable(void
  
  out_nohpet:
        hpet_clear_mapping();
-       boot_hpet_disable = 1;
+       hpet_address = 0;
        return 0;
  }
  
@@@ -836,10 -836,11 +836,11 @@@ static __init int hpet_late_init(void
  
                hpet_address = force_hpet_address;
                hpet_enable();
-               if (!hpet_virt_address)
-                       return -ENODEV;
        }
  
+       if (!hpet_virt_address)
+               return -ENODEV;
        hpet_reserve_platform_timers(hpet_readl(HPET_ID));
  
        for_each_online_cpu(cpu) {
index 6dbf427175ffa349ee377d86b09c6f339eb8db74,f6ea94b74da146072cca138aa824ed5a3d5eeec7..e7745961ed314c7a03e32ce2db46edf7d44e63c9
@@@ -108,94 -108,253 +108,253 @@@ static int __init parse_noapic(char *st
  early_param("noapic", parse_noapic);
  
  struct irq_pin_list;
+ /*
+  * This is performance-critical, we want to do it O(1)
+  *
+  * the indexing order of this array favors 1:1 mappings
+  * between pins and IRQs.
+  */
+ struct irq_pin_list {
+       int apic, pin;
+       struct irq_pin_list *next;
+ };
+ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+ {
+       struct irq_pin_list *pin;
+       int node;
+       node = cpu_to_node(cpu);
+       pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+       printk(KERN_DEBUG "  alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+       return pin;
+ }
  struct irq_cfg {
-       unsigned int irq;
        struct irq_pin_list *irq_2_pin;
        cpumask_t domain;
        cpumask_t old_domain;
        unsigned move_cleanup_count;
        u8 vector;
        u8 move_in_progress : 1;
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+       u8 move_desc_pending : 1;
+ #endif
  };
  
  /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+ #ifdef CONFIG_SPARSE_IRQ
+ static struct irq_cfg irq_cfgx[] = {
+ #else
  static struct irq_cfg irq_cfgx[NR_IRQS] = {
-       [0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+ #endif
+       [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+       [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+       [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+       [3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+       [4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+       [5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+       [6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+       [7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+       [8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+       [9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+       [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+       [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+       [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+       [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+       [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
  };
  
- #define for_each_irq_cfg(irq, cfg)            \
-       for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
+ void __init arch_early_irq_init(void)
+ {
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       int count;
+       int i;
+       cfg = irq_cfgx;
+       count = ARRAY_SIZE(irq_cfgx);
  
+       for (i = 0; i < count; i++) {
+               desc = irq_to_desc(i);
+               desc->chip_data = &cfg[i];
+       }
+ }
+ #ifdef CONFIG_SPARSE_IRQ
  static struct irq_cfg *irq_cfg(unsigned int irq)
  {
-       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+       struct irq_cfg *cfg = NULL;
+       struct irq_desc *desc;
+       desc = irq_to_desc(irq);
+       if (desc)
+               cfg = desc->chip_data;
+       return cfg;
  }
  
- static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
  {
-       return irq_cfg(irq);
+       struct irq_cfg *cfg;
+       int node;
+       node = cpu_to_node(cpu);
+       cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+       printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
+       return cfg;
  }
  
- /*
-  * Rough estimation of how many shared IRQs there are, can be changed
-  * anytime.
-  */
- #define MAX_PLUS_SHARED_IRQS NR_IRQS
- #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+ void arch_init_chip_data(struct irq_desc *desc, int cpu)
+ {
+       struct irq_cfg *cfg;
  
- /*
-  * This is performance-critical, we want to do it O(1)
-  *
-  * the indexing order of this array favors 1:1 mappings
-  * between pins and IRQs.
-  */
+       cfg = desc->chip_data;
+       if (!cfg) {
+               desc->chip_data = get_one_free_irq_cfg(cpu);
+               if (!desc->chip_data) {
+                       printk(KERN_ERR "can not alloc irq_cfg\n");
+                       BUG_ON(1);
+               }
+       }
+ }
  
- struct irq_pin_list {
-       int apic, pin;
-       struct irq_pin_list *next;
- };
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ static void
+ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+ {
+       struct irq_pin_list *old_entry, *head, *tail, *entry;
+       cfg->irq_2_pin = NULL;
+       old_entry = old_cfg->irq_2_pin;
+       if (!old_entry)
+               return;
+       entry = get_one_free_irq_2_pin(cpu);
+       if (!entry)
+               return;
+       entry->apic     = old_entry->apic;
+       entry->pin      = old_entry->pin;
+       head            = entry;
+       tail            = entry;
+       old_entry       = old_entry->next;
+       while (old_entry) {
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       entry = head;
+                       while (entry) {
+                               head = entry->next;
+                               kfree(entry);
+                               entry = head;
+                       }
+                       /* still use the old one */
+                       return;
+               }
+               entry->apic     = old_entry->apic;
+               entry->pin      = old_entry->pin;
+               tail->next      = entry;
+               tail            = entry;
+               old_entry       = old_entry->next;
+       }
  
- static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
- static struct irq_pin_list *irq_2_pin_ptr;
+       tail->next = NULL;
+       cfg->irq_2_pin = head;
+ }
  
- static void __init irq_2_pin_init(void)
+ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
  {
-       struct irq_pin_list *pin = irq_2_pin_head;
-       int i;
+       struct irq_pin_list *entry, *next;
+       if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+               return;
  
-       for (i = 1; i < PIN_MAP_SIZE; i++)
-               pin[i-1].next = &pin[i];
+       entry = old_cfg->irq_2_pin;
  
-       irq_2_pin_ptr = &pin[0];
+       while (entry) {
+               next = entry->next;
+               kfree(entry);
+               entry = next;
+       }
+       old_cfg->irq_2_pin = NULL;
  }
  
- static struct irq_pin_list *get_one_free_irq_2_pin(void)
+ void arch_init_copy_chip_data(struct irq_desc *old_desc,
+                                struct irq_desc *desc, int cpu)
  {
-       struct irq_pin_list *pin = irq_2_pin_ptr;
+       struct irq_cfg *cfg;
+       struct irq_cfg *old_cfg;
  
-       if (!pin)
-               panic("can not get more irq_2_pin\n");
+       cfg = get_one_free_irq_cfg(cpu);
  
-       irq_2_pin_ptr = pin->next;
-       pin->next = NULL;
-       return pin;
+       if (!cfg)
+               return;
+       desc->chip_data = cfg;
+       old_cfg = old_desc->chip_data;
+       memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+       init_copy_irq_2_pin(old_cfg, cfg, cpu);
+ }
+ static void free_irq_cfg(struct irq_cfg *old_cfg)
+ {
+       kfree(old_cfg);
+ }
+ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+ {
+       struct irq_cfg *old_cfg, *cfg;
+       old_cfg = old_desc->chip_data;
+       cfg = desc->chip_data;
+       if (old_cfg == cfg)
+               return;
+       if (old_cfg) {
+               free_irq_2_pin(old_cfg, cfg);
+               free_irq_cfg(old_cfg);
+               old_desc->chip_data = NULL;
+       }
  }
  
+ static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+ {
+       struct irq_cfg *cfg = desc->chip_data;
+       if (!cfg->move_in_progress) {
+               /* it means that domain is not changed */
+               if (!cpus_intersects(desc->affinity, mask))
+                       cfg->move_desc_pending = 1;
+       }
+ }
+ #endif
+ #else
+ static struct irq_cfg *irq_cfg(unsigned int irq)
+ {
+       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ }
+ #endif
+ #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+ {
+ }
+ #endif
  struct io_apic {
        unsigned int index;
        unsigned int unused[3];
@@@ -237,11 -396,10 +396,10 @@@ static inline void io_apic_modify(unsig
        writel(value, &io_apic->data);
  }
  
- static bool io_apic_level_ack_pending(unsigned int irq)
+ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
  {
        struct irq_pin_list *entry;
        unsigned long flags;
-       struct irq_cfg *cfg = irq_cfg(irq);
  
        spin_lock_irqsave(&ioapic_lock, flags);
        entry = cfg->irq_2_pin;
@@@ -323,13 -481,12 +481,12 @@@ static void ioapic_mask_entry(int apic
  }
  
  #ifdef CONFIG_SMP
- static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
  {
        int apic, pin;
-       struct irq_cfg *cfg;
        struct irq_pin_list *entry;
+       u8 vector = cfg->vector;
  
-       cfg = irq_cfg(irq);
        entry = cfg->irq_2_pin;
        for (;;) {
                unsigned int reg;
        }
  }
  
- static int assign_irq_vector(int irq, cpumask_t mask);
+ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
  
- static void set_ioapic_affinity_irq(unsigned int irq,
-                                   const struct cpumask *mask)
+ static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
  {
        struct irq_cfg *cfg;
        unsigned long flags;
        unsigned int dest;
        cpumask_t tmp;
-       struct irq_desc *desc;
+       unsigned int irq;
  
-       if (!cpumask_intersects(mask, cpu_online_mask))
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
                return;
  
-       cfg = irq_cfg(irq);
-       if (assign_irq_vector(irq, *mask))
+       irq = desc->irq;
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
                return;
  
-       cpumask_and(&tmp, &cfg->domain, mask);
+       set_extra_move_desc(desc, mask);
+       cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
        /*
         * Only the high 8 bits are valid.
         */
        dest = SET_APIC_LOGICAL_ID(dest);
  
-       desc = irq_to_desc(irq);
        spin_lock_irqsave(&ioapic_lock, flags);
-       __target_IO_APIC_irq(irq, dest, cfg->vector);
-       cpumask_copy(&desc->affinity, mask);
+       __target_IO_APIC_irq(irq, dest, cfg);
+       desc->affinity = mask;
        spin_unlock_irqrestore(&ioapic_lock, flags);
  }
 -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 -      set_ioapic_affinity_irq_desc(desc, mask);
++static void set_ioapic_affinity_irq(unsigned int irq,
++                                  const struct cpumask *mask)
+ {
+       struct irq_desc *desc;
+       desc = irq_to_desc(irq);
++      set_ioapic_affinity_irq_desc(desc, *mask);
+ }
  #endif /* CONFIG_SMP */
  
  /*
   * shared ISA-space IRQs, so we have to support them. We are super
   * fast in the common case, and fast for shared ISA-space IRQs.
   */
- static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
  {
-       struct irq_cfg *cfg;
        struct irq_pin_list *entry;
  
-       /* first time to refer irq_cfg, so with new */
-       cfg = irq_cfg_alloc(irq);
        entry = cfg->irq_2_pin;
        if (!entry) {
-               entry = get_one_free_irq_2_pin();
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+                                       apic, pin);
+                       return;
+               }
                cfg->irq_2_pin = entry;
                entry->apic = apic;
                entry->pin = pin;
                entry = entry->next;
        }
  
-       entry->next = get_one_free_irq_2_pin();
+       entry->next = get_one_free_irq_2_pin(cpu);
        entry = entry->next;
        entry->apic = apic;
        entry->pin = pin;
  /*
   * Reroute an IRQ to a different pin.
   */
- static void __init replace_pin_at_irq(unsigned int irq,
+ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
                                      int oldapic, int oldpin,
                                      int newapic, int newpin)
  {
-       struct irq_cfg *cfg = irq_cfg(irq);
        struct irq_pin_list *entry = cfg->irq_2_pin;
        int replaced = 0;
  
  
        /* why? call replace before add? */
        if (!replaced)
-               add_pin_to_irq(irq, newapic, newpin);
+               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
  }
  
- static inline void io_apic_modify_irq(unsigned int irq,
+ static inline void io_apic_modify_irq(struct irq_cfg *cfg,
                                int mask_and, int mask_or,
                                void (*final)(struct irq_pin_list *entry))
  {
        int pin;
-       struct irq_cfg *cfg;
        struct irq_pin_list *entry;
  
-       cfg = irq_cfg(irq);
        for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
                unsigned int reg;
                pin = entry->pin;
        }
  }
  
- static void __unmask_IO_APIC_irq(unsigned int irq)
+ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
  {
-       io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
  }
  
  #ifdef CONFIG_X86_64
@@@ -492,47 -659,64 +660,64 @@@ void io_apic_sync(struct irq_pin_list *
        readl(&io_apic->data);
  }
  
- static void __mask_IO_APIC_irq(unsigned int irq)
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
  {
-       io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
  }
  #else /* CONFIG_X86_32 */
- static void __mask_IO_APIC_irq(unsigned int irq)
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
  {
-       io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
  }
  
- static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+ static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
  {
-       io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
                        IO_APIC_REDIR_MASKED, NULL);
  }
  
- static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
  {
-       io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
                        IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
  }
  #endif /* CONFIG_X86_32 */
  
- static void mask_IO_APIC_irq (unsigned int irq)
+ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
  {
+       struct irq_cfg *cfg = desc->chip_data;
        unsigned long flags;
  
+       BUG_ON(!cfg);
        spin_lock_irqsave(&ioapic_lock, flags);
-       __mask_IO_APIC_irq(irq);
+       __mask_IO_APIC_irq(cfg);
        spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  
- static void unmask_IO_APIC_irq (unsigned int irq)
+ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
  {
+       struct irq_cfg *cfg = desc->chip_data;
        unsigned long flags;
  
        spin_lock_irqsave(&ioapic_lock, flags);
-       __unmask_IO_APIC_irq(irq);
+       __unmask_IO_APIC_irq(cfg);
        spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  
+ static void mask_IO_APIC_irq(unsigned int irq)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       mask_IO_APIC_irq_desc(desc);
+ }
+ static void unmask_IO_APIC_irq(unsigned int irq)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       unmask_IO_APIC_irq_desc(desc);
+ }
  static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
  {
        struct IO_APIC_route_entry entry;
@@@ -809,7 -993,7 +994,7 @@@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vecto
   */
  static int EISA_ELCR(unsigned int irq)
  {
-       if (irq < 16) {
+       if (irq < NR_IRQS_LEGACY) {
                unsigned int port = 0x4d0 + (irq >> 3);
                return (inb(port) >> (irq & 7)) & 1;
        }
@@@ -1034,7 -1218,7 +1219,7 @@@ void unlock_vector_lock(void
        spin_unlock(&vector_lock);
  }
  
- static int __assign_irq_vector(int irq, cpumask_t mask)
+ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
  {
        /*
         * NOTE! The local APIC isn't very good at handling
        static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
        unsigned int old_vector;
        int cpu;
-       struct irq_cfg *cfg;
  
-       cfg = irq_cfg(irq);
+       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+               return -EBUSY;
  
        /* Only try and allocate irqs on cpus that are present */
        cpus_and(mask, mask, cpu_online_map);
  
-       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-               return -EBUSY;
        old_vector = cfg->vector;
        if (old_vector) {
                cpumask_t tmp;
@@@ -1113,24 -1294,22 +1295,22 @@@ next
        return -ENOSPC;
  }
  
- static int assign_irq_vector(int irq, cpumask_t mask)
+ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
  {
        int err;
        unsigned long flags;
  
        spin_lock_irqsave(&vector_lock, flags);
-       err = __assign_irq_vector(irq, mask);
+       err = __assign_irq_vector(irq, cfg, mask);
        spin_unlock_irqrestore(&vector_lock, flags);
        return err;
  }
  
- static void __clear_irq_vector(int irq)
+ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
  {
-       struct irq_cfg *cfg;
        cpumask_t mask;
        int cpu, vector;
  
-       cfg = irq_cfg(irq);
        BUG_ON(!cfg->vector);
  
        vector = cfg->vector;
@@@ -1162,9 -1341,13 +1342,13 @@@ void __setup_vector_irq(int cpu
        /* This function must be called with vector_lock held */
        int irq, vector;
        struct irq_cfg *cfg;
+       struct irq_desc *desc;
  
        /* Mark the inuse vectors */
-       for_each_irq_cfg(irq, cfg) {
+       for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
+               cfg = desc->chip_data;
                if (!cpu_isset(cpu, cfg->domain))
                        continue;
                vector = cfg->vector;
@@@ -1215,11 -1398,8 +1399,8 @@@ static inline int IO_APIC_irq_trigger(i
  }
  #endif
  
- static void ioapic_register_intr(int irq, unsigned long trigger)
+ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
  {
-       struct irq_desc *desc;
-       desc = irq_to_desc(irq);
  
        if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
            trigger == IOAPIC_LEVEL)
@@@ -1311,7 -1491,7 +1492,7 @@@ static int setup_ioapic_entry(int apic
        return 0;
  }
  
- static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
                              int trigger, int polarity)
  {
        struct irq_cfg *cfg;
        if (!IO_APIC_IRQ(irq))
                return;
  
-       cfg = irq_cfg(irq);
+       cfg = desc->chip_data;
  
        mask = TARGET_CPUS;
-       if (assign_irq_vector(irq, mask))
+       if (assign_irq_vector(irq, cfg, mask))
                return;
  
        cpus_and(mask, cfg->domain, mask);
                               cfg->vector)) {
                printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
                       mp_ioapics[apic].mp_apicid, pin);
-               __clear_irq_vector(irq);
+               __clear_irq_vector(irq, cfg);
                return;
        }
  
-       ioapic_register_intr(irq, trigger);
-       if (irq < 16)
+       ioapic_register_intr(irq, desc, trigger);
+       if (irq < NR_IRQS_LEGACY)
                disable_8259A_irq(irq);
  
        ioapic_write_entry(apic, pin, entry);
@@@ -1356,6 -1536,9 +1537,9 @@@ static void __init setup_IO_APIC_irqs(v
  {
        int apic, pin, idx, irq;
        int notcon = 0;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
  
        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
  
                        if (multi_timer_check(apic, irq))
                                continue;
  #endif
-                       add_pin_to_irq(irq, apic, pin);
+                       desc = irq_to_desc_alloc_cpu(irq, cpu);
+                       if (!desc) {
+                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+                               continue;
+                       }
+                       cfg = desc->chip_data;
+                       add_pin_to_irq_cpu(cfg, cpu, apic, pin);
  
-                       setup_IO_APIC_irq(apic, pin, irq,
+                       setup_IO_APIC_irq(apic, pin, irq, desc,
                                        irq_trigger(idx), irq_polarity(idx));
                }
        }
@@@ -1448,6 -1637,7 +1638,7 @@@ __apicdebuginit(void) print_IO_APIC(voi
        union IO_APIC_reg_03 reg_03;
        unsigned long flags;
        struct irq_cfg *cfg;
+       struct irq_desc *desc;
        unsigned int irq;
  
        if (apic_verbosity == APIC_QUIET)
        }
        }
        printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for_each_irq_cfg(irq, cfg) {
-               struct irq_pin_list *entry = cfg->irq_2_pin;
+       for_each_irq_desc(irq, desc) {
+               struct irq_pin_list *entry;
+               if (!desc)
+                       continue;
+               cfg = desc->chip_data;
+               entry = cfg->irq_2_pin;
                if (!entry)
                        continue;
                printk(KERN_DEBUG "IRQ%d ", irq);
@@@ -2022,14 -2217,16 +2218,16 @@@ static unsigned int startup_ioapic_irq(
  {
        int was_pending = 0;
        unsigned long flags;
+       struct irq_cfg *cfg;
  
        spin_lock_irqsave(&ioapic_lock, flags);
-       if (irq < 16) {
+       if (irq < NR_IRQS_LEGACY) {
                disable_8259A_irq(irq);
                if (i8259A_irq_pending(irq))
                        was_pending = 1;
        }
-       __unmask_IO_APIC_irq(irq);
+       cfg = irq_cfg(irq);
+       __unmask_IO_APIC_irq(cfg);
        spin_unlock_irqrestore(&ioapic_lock, flags);
  
        return was_pending;
@@@ -2092,35 -2289,37 +2290,37 @@@ static DECLARE_DELAYED_WORK(ir_migratio
   * as simple as edge triggered migration and we can do the irq migration
   * with a simple atomic update to IO-APIC RTE.
   */
- static void migrate_ioapic_irq(int irq, cpumask_t mask)
+ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
  {
        struct irq_cfg *cfg;
-       struct irq_desc *desc;
        cpumask_t tmp, cleanup_mask;
        struct irte irte;
        int modify_ioapic_rte;
        unsigned int dest;
        unsigned long flags;
+       unsigned int irq;
  
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
                return;
  
+       irq = desc->irq;
        if (get_irte(irq, &irte))
                return;
  
-       if (assign_irq_vector(irq, mask))
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
                return;
  
-       cfg = irq_cfg(irq);
+       set_extra_move_desc(desc, mask);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
  
-       desc = irq_to_desc(irq);
        modify_ioapic_rte = desc->status & IRQ_LEVEL;
        if (modify_ioapic_rte) {
                spin_lock_irqsave(&ioapic_lock, flags);
-               __target_IO_APIC_irq(irq, dest, cfg->vector);
+               __target_IO_APIC_irq(irq, dest, cfg);
                spin_unlock_irqrestore(&ioapic_lock, flags);
        }
  
        desc->affinity = mask;
  }
  
- static int migrate_irq_remapped_level(int irq)
+ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
  {
        int ret = -1;
-       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg = desc->chip_data;
  
-       mask_IO_APIC_irq(irq);
+       mask_IO_APIC_irq_desc(desc);
  
-       if (io_apic_level_ack_pending(irq)) {
+       if (io_apic_level_ack_pending(cfg)) {
                /*
                 * Interrupt in progress. Migrating irq now will change the
                 * vector information in the IO-APIC RTE and that will confuse
        }
  
        /* everthing is clear. we have right of way */
-       migrate_ioapic_irq(irq, desc->pending_mask);
+       migrate_ioapic_irq_desc(desc, desc->pending_mask);
  
        ret = 0;
        desc->status &= ~IRQ_MOVE_PENDING;
        cpus_clear(desc->pending_mask);
  
  unmask:
-       unmask_IO_APIC_irq(irq);
+       unmask_IO_APIC_irq_desc(desc);
        return ret;
  }
  
@@@ -2178,6 -2378,9 +2379,9 @@@ static void ir_irq_migration(struct wor
        struct irq_desc *desc;
  
        for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
                if (desc->status & IRQ_MOVE_PENDING) {
                        unsigned long flags;
  
                                continue;
                        }
  
 -                      desc->chip->set_affinity(irq, desc->pending_mask);
 +                      desc->chip->set_affinity(irq, &desc->pending_mask);
                        spin_unlock_irqrestore(&desc->lock, flags);
                }
        }
  /*
   * Migrates the IRQ destination in the process context.
   */
- static void set_ir_ioapic_affinity_irq(unsigned int irq,
-                                      const struct cpumask *mask)
+ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
  {
-       struct irq_desc *desc = irq_to_desc(irq);
        if (desc->status & IRQ_LEVEL) {
                desc->status |= IRQ_MOVE_PENDING;
-               cpumask_copy(&desc->pending_mask, mask);
-               migrate_irq_remapped_level(irq);
+               desc->pending_mask = mask;
+               migrate_irq_remapped_level_desc(desc);
                return;
        }
  
-       migrate_ioapic_irq(irq, *mask);
+       migrate_ioapic_irq_desc(desc, mask);
+ }
 -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
++static void set_ir_ioapic_affinity_irq(unsigned int irq,
++                                     const struct cpumask *mask)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
 -      set_ir_ioapic_affinity_irq_desc(desc, mask);
++      set_ir_ioapic_affinity_irq_desc(desc, *mask);
  }
  #endif
  
@@@ -2229,6 -2435,9 +2437,9 @@@ asmlinkage void smp_irq_move_cleanup_in
                struct irq_cfg *cfg;
                irq = __get_cpu_var(vector_irq)[vector];
  
+               if (irq == -1)
+                       continue;
                desc = irq_to_desc(irq);
                if (!desc)
                        continue;
@@@ -2250,19 -2459,40 +2461,40 @@@ unlock
        irq_exit();
  }
  
- static void irq_complete_move(unsigned int irq)
+ static void irq_complete_move(struct irq_desc **descp)
  {
-       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_desc *desc = *descp;
+       struct irq_cfg *cfg = desc->chip_data;
        unsigned vector, me;
  
-       if (likely(!cfg->move_in_progress))
+       if (likely(!cfg->move_in_progress)) {
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+               if (likely(!cfg->move_desc_pending))
+                       return;
+               /* domain has not changed, but affinity did */
+               me = smp_processor_id();
+               if (cpu_isset(me, desc->affinity)) {
+                       *descp = desc = move_irq_desc(desc, me);
+                       /* get the new one */
+                       cfg = desc->chip_data;
+                       cfg->move_desc_pending = 0;
+               }
+ #endif
                return;
+       }
  
        vector = ~get_irq_regs()->orig_ax;
        me = smp_processor_id();
        if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
                cpumask_t cleanup_mask;
  
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+               *descp = desc = move_irq_desc(desc, me);
+               /* get the new one */
+               cfg = desc->chip_data;
+ #endif
                cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
                cfg->move_cleanup_count = cpus_weight(cleanup_mask);
                send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
        }
  }
  #else
- static inline void irq_complete_move(unsigned int irq) {}
+ static inline void irq_complete_move(struct irq_desc **descp) {}
  #endif
  #ifdef CONFIG_INTR_REMAP
  static void ack_x2apic_level(unsigned int irq)
  {
@@@ -2282,11 -2513,14 +2515,14 @@@ static void ack_x2apic_edge(unsigned in
  {
        ack_x2APIC_irq();
  }
  #endif
  
  static void ack_apic_edge(unsigned int irq)
  {
-       irq_complete_move(irq);
+       struct irq_desc *desc = irq_to_desc(irq);
+       irq_complete_move(&desc);
        move_native_irq(irq);
        ack_APIC_irq();
  }
@@@ -2295,18 -2529,21 +2531,21 @@@ atomic_t irq_mis_count
  
  static void ack_apic_level(unsigned int irq)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
  #ifdef CONFIG_X86_32
        unsigned long v;
        int i;
  #endif
+       struct irq_cfg *cfg;
        int do_unmask_irq = 0;
  
-       irq_complete_move(irq);
+       irq_complete_move(&desc);
  #ifdef CONFIG_GENERIC_PENDING_IRQ
        /* If we are moving the irq we need to mask it */
-       if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+       if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
                do_unmask_irq = 1;
-               mask_IO_APIC_irq(irq);
+               mask_IO_APIC_irq_desc(desc);
        }
  #endif
  
        * operation to prevent an edge-triggered interrupt escaping meanwhile.
        * The idea is from Manfred Spraul.  --macro
        */
-       i = irq_cfg(irq)->vector;
+       cfg = desc->chip_data;
+       i = cfg->vector;
  
        v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
  #endif
                 * accurate and is causing problems then it is a hardware bug
                 * and you can go talk to the chipset vendor about it.
                 */
-               if (!io_apic_level_ack_pending(irq))
+               cfg = desc->chip_data;
+               if (!io_apic_level_ack_pending(cfg))
                        move_masked_irq(irq);
-               unmask_IO_APIC_irq(irq);
+               unmask_IO_APIC_irq_desc(desc);
        }
  
  #ifdef CONFIG_X86_32
        if (!(v & (1 << (i & 0x1f)))) {
                atomic_inc(&irq_mis_count);
                spin_lock(&ioapic_lock);
-               __mask_and_edge_IO_APIC_irq(irq);
-               __unmask_and_level_IO_APIC_irq(irq);
+               __mask_and_edge_IO_APIC_irq(cfg);
+               __unmask_and_level_IO_APIC_irq(cfg);
                spin_unlock(&ioapic_lock);
        }
  #endif
@@@ -2430,20 -2669,22 +2671,22 @@@ static inline void init_IO_APIC_traps(v
         * Also, we've got to be careful not to trash gate
         * 0x80, because int 0x80 is hm, kind of importantish. ;)
         */
-       for_each_irq_cfg(irq, cfg) {
-               if (IO_APIC_IRQ(irq) && !cfg->vector) {
+       for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
+               cfg = desc->chip_data;
+               if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
                        /*
                         * Hmm.. We don't have an entry for this,
                         * so default to an old-fashioned 8259
                         * interrupt if we can..
                         */
-                       if (irq < 16)
+                       if (irq < NR_IRQS_LEGACY)
                                make_8259A_irq(irq);
-                       else {
-                               desc = irq_to_desc(irq);
+                       else
                                /* Strange. Oh, well.. */
                                desc->chip = &no_irq_chip;
-                       }
                }
        }
  }
@@@ -2468,7 -2709,7 +2711,7 @@@ static void unmask_lapic_irq(unsigned i
        apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
  }
  
- static void ack_lapic_irq (unsigned int irq)
+ static void ack_lapic_irq(unsigned int irq)
  {
        ack_APIC_irq();
  }
@@@ -2480,11 -2721,8 +2723,8 @@@ static struct irq_chip lapic_chip __rea
        .ack            = ack_lapic_irq,
  };
  
- static void lapic_register_intr(int irq)
+ static void lapic_register_intr(int irq, struct irq_desc *desc)
  {
-       struct irq_desc *desc;
-       desc = irq_to_desc(irq);
        desc->status &= ~IRQ_LEVEL;
        set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
                                      "edge");
@@@ -2588,7 -2826,9 +2828,9 @@@ int timer_through_8259 __initdata
   */
  static inline void __init check_timer(void)
  {
-       struct irq_cfg *cfg = irq_cfg(0);
+       struct irq_desc *desc = irq_to_desc(0);
+       struct irq_cfg *cfg = desc->chip_data;
+       int cpu = boot_cpu_id;
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
        unsigned int ver;
         * get/set the timer IRQ vector:
         */
        disable_8259A_irq(0);
-       assign_irq_vector(0, TARGET_CPUS);
+       assign_irq_vector(0, cfg, TARGET_CPUS);
  
        /*
         * As IRQ0 is to be enabled in the 8259A, the virtual
                 * Ok, does IRQ0 through the IOAPIC work?
                 */
                if (no_pin1) {
-                       add_pin_to_irq(0, apic1, pin1);
+                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                }
-               unmask_IO_APIC_irq(0);
+               unmask_IO_APIC_irq_desc(desc);
                if (timer_irq_works()) {
                        if (nmi_watchdog == NMI_IO_APIC) {
                                setup_nmi();
                /*
                 * legacy devices should be connected to IO APIC #0
                 */
-               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
                setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-               unmask_IO_APIC_irq(0);
+               unmask_IO_APIC_irq_desc(desc);
                enable_8259A_irq(0);
                if (timer_irq_works()) {
                        apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
        apic_printk(APIC_QUIET, KERN_INFO
                    "...trying to set up timer as Virtual Wire IRQ...\n");
  
-       lapic_register_intr(0);
+       lapic_register_intr(0, desc);
        apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
        enable_8259A_irq(0);
  
@@@ -2902,22 -3142,26 +3144,26 @@@ unsigned int create_irq_nr(unsigned in
        unsigned int irq;
        unsigned int new;
        unsigned long flags;
-       struct irq_cfg *cfg_new;
-       irq_want = nr_irqs - 1;
+       struct irq_cfg *cfg_new = NULL;
+       int cpu = boot_cpu_id;
+       struct irq_desc *desc_new = NULL;
  
        irq = 0;
        spin_lock_irqsave(&vector_lock, flags);
-       for (new = irq_want; new > 0; new--) {
+       for (new = irq_want; new < NR_IRQS; new++) {
                if (platform_legacy_irq(new))
                        continue;
-               cfg_new = irq_cfg(new);
-               if (cfg_new && cfg_new->vector != 0)
+               desc_new = irq_to_desc_alloc_cpu(new, cpu);
+               if (!desc_new) {
+                       printk(KERN_INFO "can not get irq_desc for %d\n", new);
                        continue;
-               /* check if need to create one */
-               if (!cfg_new)
-                       cfg_new = irq_cfg_alloc(new);
-               if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+               }
+               cfg_new = desc_new->chip_data;
+               if (cfg_new->vector != 0)
+                       continue;
+               if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
                        irq = new;
                break;
        }
  
        if (irq > 0) {
                dynamic_irq_init(irq);
+               /* restore it, in case dynamic_irq_init clear it */
+               if (desc_new)
+                       desc_new->chip_data = cfg_new;
        }
        return irq;
  }
  
+ static int nr_irqs_gsi = NR_IRQS_LEGACY;
  int create_irq(void)
  {
+       unsigned int irq_want;
        int irq;
  
-       irq = create_irq_nr(nr_irqs - 1);
+       irq_want = nr_irqs_gsi;
+       irq = create_irq_nr(irq_want);
  
        if (irq == 0)
                irq = -1;
  void destroy_irq(unsigned int irq)
  {
        unsigned long flags;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
  
+       /* store it, in case dynamic_irq_cleanup clear it */
+       desc = irq_to_desc(irq);
+       cfg = desc->chip_data;
        dynamic_irq_cleanup(irq);
+       /* connect back irq_cfg */
+       if (desc)
+               desc->chip_data = cfg;
  
  #ifdef CONFIG_INTR_REMAP
        free_irte(irq);
  #endif
        spin_lock_irqsave(&vector_lock, flags);
-       __clear_irq_vector(irq);
+       __clear_irq_vector(irq, cfg);
        spin_unlock_irqrestore(&vector_lock, flags);
  }
  
@@@ -2966,12 -3224,12 +3226,12 @@@ static int msi_compose_msg(struct pci_d
        unsigned dest;
        cpumask_t tmp;
  
+       cfg = irq_cfg(irq);
        tmp = TARGET_CPUS;
-       err = assign_irq_vector(irq, tmp);
+       err = assign_irq_vector(irq, cfg, tmp);
        if (err)
                return err;
  
-       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, tmp);
        dest = cpu_mask_to_apicid(tmp);
  
  }
  
  #ifdef CONFIG_SMP
 -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 +static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
-       struct irq_desc *desc;
  
 -      cpus_and(tmp, mask, cpu_online_map);
 -      if (cpus_empty(tmp))
 +      if (!cpumask_intersects(mask, cpu_online_mask))
                return;
  
-       if (assign_irq_vector(irq, *mask))
+       cfg = desc->chip_data;
 -      if (assign_irq_vector(irq, cfg, mask))
++      if (assign_irq_vector(irq, cfg, *mask))
                return;
  
-       cfg = irq_cfg(irq);
 -      set_extra_move_desc(desc, mask);
++      set_extra_move_desc(desc, *mask);
 -      cpus_and(tmp, cfg->domain, mask);
 +      cpumask_and(&tmp, &cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
  
-       read_msi_msg(irq, &msg);
+       read_msi_msg_desc(desc, &msg);
  
        msg.data &= ~MSI_DATA_VECTOR_MASK;
        msg.data |= MSI_DATA_VECTOR(cfg->vector);
        msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
-       write_msi_msg(irq, &msg);
-       desc = irq_to_desc(irq);
+       write_msi_msg_desc(desc, &msg);
 -      desc->affinity = mask;
 +      cpumask_copy(&desc->affinity, mask);
  }
  #ifdef CONFIG_INTR_REMAP
  /*
   * Migrate the MSI irq to another cpumask. This migration is
   * done in the process context using interrupt-remapping hardware.
   */
 -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 +static void ir_set_msi_irq_affinity(unsigned int irq,
 +                                  const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        unsigned int dest;
        cpumask_t tmp, cleanup_mask;
        struct irte irte;
-       struct irq_desc *desc;
  
 -      cpus_and(tmp, mask, cpu_online_map);
 -      if (cpus_empty(tmp))
 +      if (!cpumask_intersects(mask, cpu_online_mask))
                return;
  
        if (get_irte(irq, &irte))
                return;
  
-       if (assign_irq_vector(irq, *mask))
+       cfg = desc->chip_data;
 -      if (assign_irq_vector(irq, cfg, mask))
++      if (assign_irq_vector(irq, cfg, *mask))
                return;
  
-       cfg = irq_cfg(irq);
 -      set_extra_move_desc(desc, mask);
++      set_extra_move_desc(desc, *mask);
 -      cpus_and(tmp, cfg->domain, mask);
 +      cpumask_and(&tmp, &cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
  
        irte.vector = cfg->vector;
                cfg->move_in_progress = 0;
        }
  
-       desc = irq_to_desc(irq);
 -      desc->affinity = mask;
 +      cpumask_copy(&desc->affinity, mask);
  }
  #endif
  #endif /* CONFIG_SMP */
  
@@@ -3165,7 -3426,7 +3427,7 @@@ static int msi_alloc_irte(struct pci_de
  }
  #endif
  
- static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
  {
        int ret;
        struct msi_msg msg;
        if (ret < 0)
                return ret;
  
-       set_irq_msi(irq, desc);
+       set_irq_msi(irq, msidesc);
        write_msi_msg(irq, &msg);
  
  #ifdef CONFIG_INTR_REMAP
        return 0;
  }
  
- static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
- {
-       unsigned int irq;
-       irq = dev->bus->number;
-       irq <<= 8;
-       irq |= dev->devfn;
-       irq <<= 12;
-       return irq;
- }
- int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
  {
        unsigned int irq;
        int ret;
        unsigned int irq_want;
  
-       irq_want = build_irq_for_pci_dev(dev) + 0x100;
+       irq_want = nr_irqs_gsi;
        irq = create_irq_nr(irq_want);
        if (irq == 0)
                return -1;
                goto error;
  no_ir:
  #endif
-       ret = setup_msi_irq(dev, desc, irq);
+       ret = setup_msi_irq(dev, msidesc, irq);
        if (ret < 0) {
                destroy_irq(irq);
                return ret;
@@@ -3245,7 -3493,7 +3494,7 @@@ int arch_setup_msi_irqs(struct pci_dev 
  {
        unsigned int irq;
        int ret, sub_handle;
-       struct msi_desc *desc;
+       struct msi_desc *msidesc;
        unsigned int irq_want;
  
  #ifdef CONFIG_INTR_REMAP
        int index = 0;
  #endif
  
-       irq_want = build_irq_for_pci_dev(dev) + 0x100;
+       irq_want = nr_irqs_gsi;
        sub_handle = 0;
-       list_for_each_entry(desc, &dev->msi_list, list) {
-               irq = create_irq_nr(irq_want--);
+       list_for_each_entry(msidesc, &dev->msi_list, list) {
+               irq = create_irq_nr(irq_want);
+               irq_want++;
                if (irq == 0)
                        return -1;
  #ifdef CONFIG_INTR_REMAP
                }
  no_ir:
  #endif
-               ret = setup_msi_irq(dev, desc, irq);
+               ret = setup_msi_irq(dev, msidesc, irq);
                if (ret < 0)
                        goto error;
                sub_handle++;
@@@ -3307,22 -3556,25 +3557,24 @@@ void arch_teardown_msi_irq(unsigned in
  
  #ifdef CONFIG_DMAR
  #ifdef CONFIG_SMP
 -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 +static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
-       struct irq_desc *desc;
  
 -      cpus_and(tmp, mask, cpu_online_map);
 -      if (cpus_empty(tmp))
 +      if (!cpumask_intersects(mask, cpu_online_mask))
                return;
  
-       if (assign_irq_vector(irq, *mask))
+       cfg = desc->chip_data;
 -      if (assign_irq_vector(irq, cfg, mask))
++      if (assign_irq_vector(irq, cfg, *mask))
                return;
  
-       cfg = irq_cfg(irq);
 -      set_extra_move_desc(desc, mask);
++      set_extra_move_desc(desc, *mask);
 -      cpus_and(tmp, cfg->domain, mask);
 +      cpumask_and(&tmp, &cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
  
        dmar_msi_read(irq, &msg);
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
        dmar_msi_write(irq, &msg);
-       desc = irq_to_desc(irq);
 -      desc->affinity = mask;
 +      cpumask_copy(&desc->affinity, mask);
  }
  #endif /* CONFIG_SMP */
  
  struct irq_chip dmar_msi_type = {
@@@ -3367,22 -3619,25 +3619,24 @@@ int arch_setup_dmar_msi(unsigned int ir
  #ifdef CONFIG_HPET_TIMER
  
  #ifdef CONFIG_SMP
 -static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 +static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
-       struct irq_desc *desc;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
  
 -      cpus_and(tmp, mask, cpu_online_map);
 -      if (cpus_empty(tmp))
 +      if (!cpumask_intersects(mask, cpu_online_mask))
                return;
  
-       if (assign_irq_vector(irq, *mask))
+       cfg = desc->chip_data;
 -      if (assign_irq_vector(irq, cfg, mask))
++      if (assign_irq_vector(irq, cfg, *mask))
                return;
  
-       cfg = irq_cfg(irq);
 -      set_extra_move_desc(desc, mask);
++      set_extra_move_desc(desc, *mask);
 -      cpus_and(tmp, cfg->domain, mask);
 +      cpumask_and(&tmp, &cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
  
        hpet_msi_read(irq, &msg);
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
        hpet_msi_write(irq, &msg);
-       desc = irq_to_desc(irq);
 -      desc->affinity = mask;
 +      cpumask_copy(&desc->affinity, mask);
  }
  #endif /* CONFIG_SMP */
  
  struct irq_chip hpet_msi_type = {
@@@ -3448,27 -3703,30 +3702,29 @@@ static void target_ht_irq(unsigned int 
        write_ht_irq_msg(irq, &msg);
  }
  
 -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 +static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        unsigned int dest;
        cpumask_t tmp;
-       struct irq_desc *desc;
  
 -      cpus_and(tmp, mask, cpu_online_map);
 -      if (cpus_empty(tmp))
 +      if (!cpumask_intersects(mask, cpu_online_mask))
                return;
  
-       if (assign_irq_vector(irq, *mask))
+       cfg = desc->chip_data;
 -      if (assign_irq_vector(irq, cfg, mask))
++      if (assign_irq_vector(irq, cfg, *mask))
                return;
  
-       cfg = irq_cfg(irq);
 -      set_extra_move_desc(desc, mask);
++      set_extra_move_desc(desc, *mask);
 -      cpus_and(tmp, cfg->domain, mask);
 +      cpumask_and(&tmp, &cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
  
        target_ht_irq(irq, dest, cfg->vector);
-       desc = irq_to_desc(irq);
 -      desc->affinity = mask;
 +      cpumask_copy(&desc->affinity, mask);
  }
  #endif
  
  static struct irq_chip ht_irq_chip = {
@@@ -3488,13 -3746,13 +3744,13 @@@ int arch_setup_ht_irq(unsigned int irq
        int err;
        cpumask_t tmp;
  
+       cfg = irq_cfg(irq);
        tmp = TARGET_CPUS;
-       err = assign_irq_vector(irq, tmp);
+       err = assign_irq_vector(irq, cfg, tmp);
        if (!err) {
                struct ht_irq_msg msg;
                unsigned dest;
  
-               cfg = irq_cfg(irq);
                cpus_and(tmp, cfg->domain, tmp);
                dest = cpu_mask_to_apicid(tmp);
  
@@@ -3540,7 -3798,9 +3796,9 @@@ int arch_enable_uv_irq(char *irq_name, 
        unsigned long flags;
        int err;
  
-       err = assign_irq_vector(irq, *eligible_cpu);
+       cfg = irq_cfg(irq);
+       err = assign_irq_vector(irq, cfg, *eligible_cpu);
        if (err != 0)
                return err;
  
                                      irq_name);
        spin_unlock_irqrestore(&vector_lock, flags);
  
-       cfg = irq_cfg(irq);
        mmr_value = 0;
        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
        BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@@ -3602,9 -3860,16 +3858,16 @@@ int __init io_apic_get_redir_entries (i
        return reg_01.bits.entries;
  }
  
int __init probe_nr_irqs(void)
void __init probe_nr_irqs_gsi(void)
  {
-       return NR_IRQS;
+       int idx;
+       int nr = 0;
+       for (idx = 0; idx < nr_ioapics; idx++)
+               nr += io_apic_get_redir_entries(idx) + 1;
+       if (nr > nr_irqs_gsi)
+               nr_irqs_gsi = nr;
  }
  
  /* --------------------------------------------------------------------------
@@@ -3703,19 -3968,31 +3966,31 @@@ int __init io_apic_get_version(int ioap
  
  int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
  {
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
        if (!IO_APIC_IRQ(irq)) {
                apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
                        ioapic);
                return -EINVAL;
        }
  
+       desc = irq_to_desc_alloc_cpu(irq, cpu);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc %d\n", irq);
+               return 0;
+       }
        /*
         * IRQs < 16 are already in the irq_2_pin[] map
         */
-       if (irq >= 16)
-               add_pin_to_irq(irq, ioapic, pin);
+       if (irq >= NR_IRQS_LEGACY) {
+               cfg = desc->chip_data;
+               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+       }
  
-       setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
  
        return 0;
  }
@@@ -3769,9 -4046,10 +4044,10 @@@ void __init setup_ioapic_dest(void
                         * when you have too many devices, because at that time only boot
                         * cpu is online.
                         */
-                       cfg = irq_cfg(irq);
+                       desc = irq_to_desc(irq);
+                       cfg = desc->chip_data;
                        if (!cfg->vector) {
-                               setup_IO_APIC_irq(ioapic, pin, irq,
+                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
                                                  irq_trigger(irq_entry),
                                                  irq_polarity(irq_entry));
                                continue;
                        /*
                         * Honour affinities which have been set in early boot
                         */
-                       desc = irq_to_desc(irq);
                        if (desc->status &
                            (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
                                mask = desc->affinity;
  
  #ifdef CONFIG_INTR_REMAP
                        if (intr_remapping_enabled)
-                               set_ir_ioapic_affinity_irq(irq, &mask);
+                               set_ir_ioapic_affinity_irq_desc(desc, mask);
                        else
  #endif
-                               set_ioapic_affinity_irq(irq, &mask);
+                               set_ioapic_affinity_irq_desc(desc, mask);
                }
  
        }
@@@ -3842,7 -4119,6 +4117,6 @@@ void __init ioapic_init_mappings(void
        struct resource *ioapic_res;
        int i;
  
-       irq_2_pin_init();
        ioapic_res = ioapic_setup_resources();
        for (i = 0; i < nr_ioapics; i++) {
                if (smp_found_config) {
diff --combined arch/x86/kernel/irq_32.c
index 87870a49be4e19a26e2201a56533ab7fb7fdfaae,119fc9c8ff7f29135b0e353cc31722cd6abe452f..9cf9cbbf7a028b74bf4bec2c0f16a8e9290dc926
@@@ -242,6 -242,8 +242,8 @@@ void fixup_irqs(cpumask_t map
        for_each_irq_desc(irq, desc) {
                cpumask_t mask;
  
+               if (!desc)
+                       continue;
                if (irq == 2)
                        continue;
  
                        mask = map;
                }
                if (desc->chip->set_affinity)
 -                      desc->chip->set_affinity(irq, mask);
 +                      desc->chip->set_affinity(irq, &mask);
                else if (desc->action && !(warned++))
                        printk("Cannot set affinity for irq %i\n", irq);
        }
diff --combined arch/x86/kernel/irq_64.c
index 8cbd069e5b41f6412aad93b0dea2409bd5934448,a174a217eb1aefc25c971c2d6155e483d2d50192..54c69d47a771d0cf969ab6d135579a72121acb0c
@@@ -91,6 -91,8 +91,8 @@@ void fixup_irqs(cpumask_t map
                int break_affinity = 0;
                int set_affinity = 1;
  
+               if (!desc)
+                       continue;
                if (irq == 2)
                        continue;
  
                        desc->chip->mask(irq);
  
                if (desc->chip->set_affinity)
 -                      desc->chip->set_affinity(irq, mask);
 +                      desc->chip->set_affinity(irq, &mask);
                else if (!(warned++))
                        set_affinity = 0;
  
diff --combined drivers/xen/events.c
index eba5ec5b020e1801b5f8a97c3b144cf9583f9eba,46625cd38743c2506e1fc024c8e1c4d434a4fa26..add640ff5c6c873c6b43383b096ac947c6eab02f
@@@ -141,8 -141,12 +141,12 @@@ static void init_evtchn_cpu_bindings(vo
        int i;
  
        /* By default all event channels notify CPU#0. */
-       for_each_irq_desc(i, desc)
+       for_each_irq_desc(i, desc) {
+               if (!desc)
+                       continue;
                desc->affinity = cpumask_of_cpu(0);
+       }
  #endif
  
        memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@@ -229,15 -233,20 +233,20 @@@ static void unmask_evtchn(int port
  static int find_unbound_irq(void)
  {
        int irq;
+       struct irq_desc *desc;
  
        /* Only allocate from dynirq range */
-       for_each_irq_nr(irq)
+       for (irq = 0; irq < nr_irqs; irq++)
                if (irq_bindcount[irq] == 0)
                        break;
  
        if (irq == nr_irqs)
                panic("No available IRQ to bind to: increase nr_irqs!\n");
  
+       desc = irq_to_desc_alloc_cpu(irq, 0);
+       if (WARN_ON(desc == NULL))
+               return -1;
        return irq;
  }
  
@@@ -579,7 -588,7 +588,7 @@@ void rebind_evtchn_irq(int evtchn, int 
        spin_unlock(&irq_mapping_update_lock);
  
        /* new event channels are always bound to cpu 0 */
 -      irq_set_affinity(irq, cpumask_of_cpu(0));
 +      irq_set_affinity(irq, cpumask_of(0));
  
        /* Unmask the event channel. */
        enable_irq(irq);
@@@ -608,9 -617,9 +617,9 @@@ static void rebind_irq_to_cpu(unsigned 
  }
  
  
 -static void set_affinity_irq(unsigned irq, cpumask_t dest)
 +static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
  {
 -      unsigned tcpu = first_cpu(dest);
 +      unsigned tcpu = cpumask_first(dest);
        rebind_irq_to_cpu(irq, tcpu);
  }
  
@@@ -792,7 -801,7 +801,7 @@@ void xen_irq_resume(void
                mask_evtchn(evtchn);
  
        /* No IRQ <-> event-channel mappings. */
-       for_each_irq_nr(irq)
+       for (irq = 0; irq < nr_irqs; irq++)
                irq_info[irq].evtchn = 0; /* zap event-channel binding */
  
        for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@@ -824,7 -833,7 +833,7 @@@ void __init xen_init_IRQ(void
                mask_evtchn(i);
  
        /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-       for_each_irq_nr(i)
+       for (i = 0; i < nr_irqs; i++)
                irq_bindcount[i] = 0;
  
        irq_ctx_init(smp_processor_id());
index 48e63934fabeb01e4480c92cb829dc1a9e2ce9df,be3c484b5242555082763718f4818eacdb37f020..dfaee6bd265bc4ac375179636cd6d83b60a1d5eb
@@@ -14,6 -14,8 +14,8 @@@
  #include <linux/irqflags.h>
  #include <linux/smp.h>
  #include <linux/percpu.h>
+ #include <linux/irqnr.h>
  #include <asm/atomic.h>
  #include <asm/ptrace.h>
  #include <asm/system.h>
@@@ -109,13 -111,13 +111,13 @@@ extern void enable_irq(unsigned int irq
  
  extern cpumask_t irq_default_affinity;
  
 -extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
 +extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
  extern int irq_can_set_affinity(unsigned int irq);
  extern int irq_select_affinity(unsigned int irq);
  
  #else /* CONFIG_SMP */
  
 -static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 +static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
  {
        return -EINVAL;
  }
@@@ -251,9 -253,6 +253,6 @@@ enu
        BLOCK_SOFTIRQ,
        TASKLET_SOFTIRQ,
        SCHED_SOFTIRQ,
- #ifdef CONFIG_HIGH_RES_TIMERS
-       HRTIMER_SOFTIRQ,
- #endif
        RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */
  
        NR_SOFTIRQS
diff --combined include/linux/irq.h
index ab70fd604d3a71e93110f2577b2dcdd851e5568b,98564dc6447627f0d6c25033e3ba601db819daf1..5845bdc1ac0940fe43d1b66d435589d9967558ad
@@@ -113,8 -113,7 +113,8 @@@ struct irq_chip 
        void            (*eoi)(unsigned int irq);
  
        void            (*end)(unsigned int irq);
 -      void            (*set_affinity)(unsigned int irq, cpumask_t dest);
 +      void            (*set_affinity)(unsigned int irq,
 +                                      const struct cpumask *dest);
        int             (*retrigger)(unsigned int irq);
        int             (*set_type)(unsigned int irq, unsigned int flow_type);
        int             (*set_wake)(unsigned int irq, unsigned int on);
        const char      *typename;
  };
  
+ struct timer_rand_state;
+ struct irq_2_iommu;
  /**
   * struct irq_desc - interrupt descriptor
   * @irq:              interrupt number for this descriptor
+  * @timer_rand_state: pointer to timer rand state struct
+  * @kstat_irqs:               irq stats per cpu
+  * @irq_2_iommu:      iommu with this irq
   * @handle_irq:               highlevel irq-events handler [if NULL, __do_IRQ()]
   * @chip:             low level interrupt hardware access
   * @msi_desc:         MSI descriptor
   * @depth:            disable-depth, for nested irq_disable() calls
   * @wake_depth:               enable depth, for multiple set_irq_wake() callers
   * @irq_count:                stats field to detect stalled irqs
-  * @irqs_unhandled:   stats field for spurious unhandled interrupts
   * @last_unhandled:   aging timer for unhandled count
+  * @irqs_unhandled:   stats field for spurious unhandled interrupts
   * @lock:             locking for SMP
   * @affinity:         IRQ affinity on SMP
   * @cpu:              cpu index useful for balancing
   */
  struct irq_desc {
        unsigned int            irq;
+ #ifdef CONFIG_SPARSE_IRQ
+       struct timer_rand_state *timer_rand_state;
+       unsigned int            *kstat_irqs;
+ # ifdef CONFIG_INTR_REMAP
+       struct irq_2_iommu      *irq_2_iommu;
+ # endif
+ #endif
        irq_flow_handler_t      handle_irq;
        struct irq_chip         *chip;
        struct msi_desc         *msi_desc;
        unsigned int            depth;          /* nested irq disables */
        unsigned int            wake_depth;     /* nested wake enables */
        unsigned int            irq_count;      /* For detecting broken IRQs */
-       unsigned int            irqs_unhandled;
        unsigned long           last_unhandled; /* Aging timer for unhandled count */
+       unsigned int            irqs_unhandled;
        spinlock_t              lock;
  #ifdef CONFIG_SMP
        cpumask_t               affinity;
        const char              *name;
  } ____cacheline_internodealigned_in_smp;
  
+ extern void early_irq_init(void);
+ extern void arch_early_irq_init(void);
+ extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
+ extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
+                                       struct irq_desc *desc, int cpu);
+ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
  
+ #ifndef CONFIG_SPARSE_IRQ
  extern struct irq_desc irq_desc[NR_IRQS];
  
  static inline struct irq_desc *irq_to_desc(unsigned int irq)
  {
-       return (irq < nr_irqs) ? irq_desc + irq : NULL;
+       return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+ }
+ static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+ {
+       return irq_to_desc(irq);
+ }
+ #else
+ extern struct irq_desc *irq_to_desc(unsigned int irq);
+ extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
+ # define for_each_irq_desc(irq, desc)         \
+       for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
+ # define for_each_irq_desc_reverse(irq, desc)                          \
+       for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
+ #define kstat_irqs_this_cpu(DESC) \
+       ((DESC)->kstat_irqs[smp_processor_id()])
+ #define kstat_incr_irqs_this_cpu(irqno, DESC) \
+       ((DESC)->kstat_irqs[smp_processor_id()]++)
+ #endif
+ static inline struct irq_desc *
+ irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
+ {
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+       return irq_to_desc(irq);
+ #else
+       return desc;
+ #endif
  }
  
  /*
@@@ -381,6 -431,11 +432,11 @@@ extern int set_irq_msi(unsigned int irq
  #define get_irq_data(irq)     (irq_to_desc(irq)->handler_data)
  #define get_irq_msi(irq)      (irq_to_desc(irq)->msi_desc)
  
+ #define get_irq_desc_chip(desc)               ((desc)->chip)
+ #define get_irq_desc_chip_data(desc)  ((desc)->chip_data)
+ #define get_irq_desc_data(desc)               ((desc)->handler_data)
+ #define get_irq_desc_msi(desc)                ((desc)->msi_desc)
  #endif /* CONFIG_GENERIC_HARDIRQS */
  
  #endif /* !CONFIG_S390 */
diff --combined init/Kconfig
index 8e9904fc3024c623f23db90703e402cb031db523,13627191a60d194de08aaa4b410aa752cfd7cb21..f6281711166d5dbeba4f55121547396d00b6ff2e
@@@ -924,15 -924,6 +924,15 @@@ config KMO
  
  endif # MODULES
  
 +config INIT_ALL_POSSIBLE
 +      bool
 +      help
 +        Back when each arch used to define their own cpu_online_map and
 +        cpu_possible_map, some of them chose to initialize cpu_possible_map
 +        with all 1s, and others with all 0s.  When they were centralised,
 +        it was better to provide this option than to break all the archs
 +        and have several arch maintainers persuing me down dark alleys.
 +
  config STOP_MACHINE
        bool
        default y
@@@ -945,10 -936,90 +945,90 @@@ source "block/Kconfig
  config PREEMPT_NOTIFIERS
        bool
  
+ choice
+       prompt "RCU Implementation"
+       default CLASSIC_RCU
  config CLASSIC_RCU
-       def_bool !PREEMPT_RCU
+       bool "Classic RCU"
        help
          This option selects the classic RCU implementation that is
          designed for best read-side performance on non-realtime
-         systems.  Classic RCU is the default.  Note that the
-         PREEMPT_RCU symbol is used to select/deselect this option.
+         systems.
+         Select this option if you are unsure.
+ config TREE_RCU
+       bool "Tree-based hierarchical RCU"
+       help
+         This option selects the RCU implementation that is
+         designed for very large SMP system with hundreds or
+         thousands of CPUs.
+ config PREEMPT_RCU
+       bool "Preemptible RCU"
+       depends on PREEMPT
+       help
+         This option reduces the latency of the kernel by making certain
+         RCU sections preemptible. Normally RCU code is non-preemptible, if
+         this option is selected then read-only RCU sections become
+         preemptible. This helps latency, but may expose bugs due to
+         now-naive assumptions about each RCU read-side critical section
+         remaining on a given CPU through its execution.
+ endchoice
+ config RCU_TRACE
+       bool "Enable tracing for RCU"
+       depends on TREE_RCU || PREEMPT_RCU
+       help
+         This option provides tracing in RCU which presents stats
+         in debugfs for debugging RCU implementation.
+         Say Y here if you want to enable RCU tracing
+         Say N if you are unsure.
+ config RCU_FANOUT
+       int "Tree-based hierarchical RCU fanout value"
+       range 2 64 if 64BIT
+       range 2 32 if !64BIT
+       depends on TREE_RCU
+       default 64 if 64BIT
+       default 32 if !64BIT
+       help
+         This option controls the fanout of hierarchical implementations
+         of RCU, allowing RCU to work efficiently on machines with
+         large numbers of CPUs.  This value must be at least the cube
+         root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+         systems and up to 262,144 for 64-bit systems.
+         Select a specific number if testing RCU itself.
+         Take the default if unsure.
+ config RCU_FANOUT_EXACT
+       bool "Disable tree-based hierarchical RCU auto-balancing"
+       depends on TREE_RCU
+       default n
+       help
+         This option forces use of the exact RCU_FANOUT value specified,
+         regardless of imbalances in the hierarchy.  This is useful for
+         testing RCU itself, and might one day be useful on systems with
+         strong NUMA behavior.
+         Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+         Say N if unsure.
+ config TREE_RCU_TRACE
+       def_bool RCU_TRACE && TREE_RCU
+       select DEBUG_FS
+       help
+         This option provides tracing for the TREE_RCU implementation,
+         permitting Makefile to trivially select kernel/rcutree_trace.c.
+ config PREEMPT_RCU_TRACE
+       def_bool RCU_TRACE && PREEMPT_RCU
+       select DEBUG_FS
+       help
+         This option provides tracing for the PREEMPT_RCU implementation,
+         permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --combined kernel/irq/chip.c
index 58d8e31daa49d295e5063439aad46a5786d21518,6eb3c7952b6496fc9c5f8da49b982d79866e1504..f63c706d25e15f481f61548dd248d1eaf69702bb
   */
  void dynamic_irq_init(unsigned int irq)
  {
-       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_desc *desc;
        unsigned long flags;
  
+       desc = irq_to_desc(irq);
        if (!desc) {
                WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
                return;
@@@ -45,7 -46,7 +46,7 @@@
        desc->irq_count = 0;
        desc->irqs_unhandled = 0;
  #ifdef CONFIG_SMP
 -      cpus_setall(desc->affinity);
 +      cpumask_setall(&desc->affinity);
  #endif
        spin_unlock_irqrestore(&desc->lock, flags);
  }
@@@ -124,6 -125,7 +125,7 @@@ int set_irq_type(unsigned int irq, unsi
                return -ENODEV;
        }
  
+       type &= IRQ_TYPE_SENSE_MASK;
        if (type == IRQ_TYPE_NONE)
                return 0;
  
@@@ -352,6 -354,7 +354,7 @@@ handle_level_irq(unsigned int irq, stru
  
        spin_lock(&desc->lock);
        mask_ack_irq(desc, irq);
+       desc = irq_remap_to_desc(irq, desc);
  
        if (unlikely(desc->status & IRQ_INPROGRESS))
                goto out_unlock;
@@@ -429,6 -432,7 +432,7 @@@ handle_fasteoi_irq(unsigned int irq, st
        desc->status &= ~IRQ_INPROGRESS;
  out:
        desc->chip->eoi(irq);
+       desc = irq_remap_to_desc(irq, desc);
  
        spin_unlock(&desc->lock);
  }
@@@ -465,12 -469,14 +469,14 @@@ handle_edge_irq(unsigned int irq, struc
                    !desc->action)) {
                desc->status |= (IRQ_PENDING | IRQ_MASKED);
                mask_ack_irq(desc, irq);
+               desc = irq_remap_to_desc(irq, desc);
                goto out_unlock;
        }
        kstat_incr_irqs_this_cpu(irq, desc);
  
        /* Start handling the irq */
        desc->chip->ack(irq);
+       desc = irq_remap_to_desc(irq, desc);
  
        /* Mark the IRQ currently in progress.*/
        desc->status |= IRQ_INPROGRESS;
@@@ -531,8 -537,10 +537,10 @@@ handle_percpu_irq(unsigned int irq, str
        if (!noirqdebug)
                note_interrupt(irq, desc, action_ret);
  
-       if (desc->chip->eoi)
+       if (desc->chip->eoi) {
                desc->chip->eoi(irq);
+               desc = irq_remap_to_desc(irq, desc);
+       }
  }
  
  void
@@@ -567,8 -575,10 +575,10 @@@ __set_irq_handler(unsigned int irq, irq
  
        /* Uninstall? */
        if (handle == handle_bad_irq) {
-               if (desc->chip != &no_irq_chip)
+               if (desc->chip != &no_irq_chip) {
                        mask_ack_irq(desc, irq);
+                       desc = irq_remap_to_desc(irq, desc);
+               }
                desc->status |= IRQ_DISABLED;
                desc->depth = 1;
        }
diff --combined kernel/irq/manage.c
index 10ad2f87ed9a0cfe6d20844f05231b7f0fd0506a,540f6c49f3fa156b2bd0d61ad2c5090ea0e46013..61c4a9b6216546aac546b4a6ab1427ed4ac2a00a
@@@ -79,7 -79,7 +79,7 @@@ int irq_can_set_affinity(unsigned int i
   *    @cpumask:       cpumask
   *
   */
 -int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 +int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
  {
        struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
  
  #ifdef CONFIG_GENERIC_PENDING_IRQ
        if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
 -              desc->affinity = cpumask;
 +              cpumask_copy(&desc->affinity, cpumask);
                desc->chip->set_affinity(irq, cpumask);
        } else {
                desc->status |= IRQ_MOVE_PENDING;
 -              desc->pending_mask = cpumask;
 +              cpumask_copy(&desc->pending_mask, cpumask);
        }
  #else
 -      desc->affinity = cpumask;
 +      cpumask_copy(&desc->affinity, cpumask);
        desc->chip->set_affinity(irq, cpumask);
  #endif
        desc->status |= IRQ_AFFINITY_SET;
   */
  int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
  {
 -      cpumask_t mask;
 -
        if (!irq_can_set_affinity(irq))
                return 0;
  
 -      cpus_and(mask, cpu_online_map, irq_default_affinity);
 -
        /*
         * Preserve an userspace affinity setup, but make sure that
         * one of the targets is online.
         */
        if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
 -              if (cpus_intersects(desc->affinity, cpu_online_map))
 -                      mask = desc->affinity;
 +              if (cpumask_any_and(&desc->affinity, cpu_online_mask)
 +                  < nr_cpu_ids)
 +                      goto set_affinity;
                else
                        desc->status &= ~IRQ_AFFINITY_SET;
        }
  
 -      desc->affinity = mask;
 -      desc->chip->set_affinity(irq, mask);
 +      cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
 +set_affinity:
 +      desc->chip->set_affinity(irq, &desc->affinity);
  
        return 0;
  }
@@@ -368,16 -370,18 +368,18 @@@ int __irq_set_trigger(struct irq_desc *
                return 0;
        }
  
-       ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK);
+       /* caller masked out all except trigger mode flags */
+       ret = chip->set_type(irq, flags);
  
        if (ret)
                pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
-                               (int)(flags & IRQF_TRIGGER_MASK),
-                               irq, chip->set_type);
+                               (int)flags, irq, chip->set_type);
        else {
+               if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
+                       flags |= IRQ_LEVEL;
                /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
-               desc->status &= ~IRQ_TYPE_SENSE_MASK;
-               desc->status |= flags & IRQ_TYPE_SENSE_MASK;
+               desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);
+               desc->status |= flags;
        }
  
        return ret;
@@@ -457,7 -461,8 +459,8 @@@ __setup_irq(unsigned int irq, struct ir
  
                /* Setup the type (level, edge polarity) if configured: */
                if (new->flags & IRQF_TRIGGER_MASK) {
-                       ret = __irq_set_trigger(desc, irq, new->flags);
+                       ret = __irq_set_trigger(desc, irq,
+                                       new->flags & IRQF_TRIGGER_MASK);
  
                        if (ret) {
                                spin_unlock_irqrestore(&desc->lock, flags);
@@@ -671,6 -676,18 +674,18 @@@ int request_irq(unsigned int irq, irq_h
        struct irq_desc *desc;
        int retval;
  
+       /*
+        * handle_IRQ_event() always ignores IRQF_DISABLED except for
+        * the _first_ irqaction (sigh).  That can cause oopsing, but
+        * the behavior is classified as "will not fix" so we need to
+        * start nudging drivers away from using that idiom.
+        */
+       if ((irqflags & (IRQF_SHARED|IRQF_DISABLED))
+                       == (IRQF_SHARED|IRQF_DISABLED))
+               pr_warning("IRQ %d/%s: IRQF_DISABLED is not "
+                               "guaranteed on shared IRQs\n",
+                               irq, devname);
  #ifdef CONFIG_LOCKDEP
        /*
         * Lockdep wants atomic interrupt handlers:
diff --combined kernel/irq/proc.c
index 8e91c9762520de79b38d7971dd9c87752f456e39,f6b3440f05bc50dbe68cdb7026e4e287e227b934..d2c0e5ee53c573019f45942e4d63bb362db42551
@@@ -40,42 -40,33 +40,42 @@@ static ssize_t irq_affinity_proc_write(
                const char __user *buffer, size_t count, loff_t *pos)
  {
        unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
 -      cpumask_t new_value;
 +      cpumask_var_t new_value;
        int err;
  
        if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
            irq_balancing_disabled(irq))
                return -EIO;
  
 +      if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
 +              return -ENOMEM;
 +
        err = cpumask_parse_user(buffer, count, new_value);
        if (err)
 -              return err;
 +              goto free_cpumask;
  
 -      if (!is_affinity_mask_valid(new_value))
 -              return -EINVAL;
 +      if (!is_affinity_mask_valid(*new_value)) {
 +              err = -EINVAL;
 +              goto free_cpumask;
 +      }
  
        /*
         * Do not allow disabling IRQs completely - it's a too easy
         * way to make the system unusable accidentally :-) At least
         * one online CPU still has to be targeted.
         */
 -      if (!cpus_intersects(new_value, cpu_online_map))
 +      if (!cpumask_intersects(new_value, cpu_online_mask)) {
                /* Special case for empty set - allow the architecture
                   code to set default SMP affinity. */
 -              return irq_select_affinity_usr(irq) ? -EINVAL : count;
 -
 -      irq_set_affinity(irq, new_value);
 -
 -      return count;
 +              err = irq_select_affinity_usr(irq) ? -EINVAL : count;
 +      } else {
 +              irq_set_affinity(irq, new_value);
 +              err = count;
 +      }
 +
 +free_cpumask:
 +      free_cpumask_var(new_value);
 +      return err;
  }
  
  static int irq_affinity_proc_open(struct inode *inode, struct file *file)
@@@ -104,7 -95,7 +104,7 @@@ static ssize_t default_affinity_write(s
        cpumask_t new_value;
        int err;
  
 -      err = cpumask_parse_user(buffer, count, new_value);
 +      err = cpumask_parse_user(buffer, count, &new_value);
        if (err)
                return err;
  
@@@ -252,7 -243,11 +252,11 @@@ void init_irq_proc(void
        /*
         * Create entries for all existing IRQs.
         */
-       for_each_irq_desc(irq, desc)
+       for_each_irq_desc(irq, desc) {
+               if (!desc)
+                       continue;
                register_irq_proc(irq, desc);
+       }
  }
  
diff --combined kernel/sched.c
index bdd180a0c6be0da437e048e9d0154eb903729863,fff1c4a20b6538966a0cf2b97a012c045d52b84d..f2095660efec72966823de6b66c6147367b8327b
@@@ -209,7 -209,6 +209,6 @@@ void init_rt_bandwidth(struct rt_bandwi
        hrtimer_init(&rt_b->rt_period_timer,
                        CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        rt_b->rt_period_timer.function = sched_rt_period_timer;
-       rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
  }
  
  static inline int rt_bandwidth_enabled(void)
@@@ -1139,7 -1138,6 +1138,6 @@@ static void init_rq_hrtick(struct rq *r
  
        hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        rq->hrtick_timer.function = hrtick;
-       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
  }
  #else /* CONFIG_SCHED_HRTICK */
  static inline void hrtick_clear(struct rq *rq)
@@@ -4192,7 -4190,6 +4190,6 @@@ void account_steal_time(struct task_str
  
        if (p == rq->idle) {
                p->stime = cputime_add(p->stime, steal);
-               account_group_system_time(p, steal);
                if (atomic_read(&rq->nr_iowait) > 0)
                        cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                else
@@@ -4328,7 -4325,7 +4325,7 @@@ void __kprobes sub_preempt_count(int va
        /*
         * Underflow?
         */
-       if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+        if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
                return;
        /*
         * Is the spinlock portion underflowing?
@@@ -6647,7 -6644,7 +6644,7 @@@ static int sched_domain_debug_one(struc
        struct sched_group *group = sd->groups;
        char str[256];
  
 -      cpulist_scnprintf(str, sizeof(str), sd->span);
 +      cpulist_scnprintf(str, sizeof(str), &sd->span);
        cpus_clear(*groupmask);
  
        printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
  
                cpus_or(*groupmask, *groupmask, group->cpumask);
  
 -              cpulist_scnprintf(str, sizeof(str), group->cpumask);
 +              cpulist_scnprintf(str, sizeof(str), &group->cpumask);
                printk(KERN_CONT " %s", str);
  
                group = group->next;
@@@ -7101,7 -7098,7 +7098,7 @@@ cpu_to_phys_group(int cpu, const cpumas
  {
        int group;
  #ifdef CONFIG_SCHED_MC
 -      *mask = cpu_coregroup_map(cpu);
 +      *mask = *cpu_coregroup_mask(cpu);
        cpus_and(*mask, *mask, *cpu_map);
        group = first_cpu(*mask);
  #elif defined(CONFIG_SCHED_SMT)
@@@ -7474,7 -7471,7 +7471,7 @@@ static int __build_sched_domains(const 
                sd = &per_cpu(core_domains, i);
                SD_INIT(sd, MC);
                set_domain_attribute(sd, attr);
 -              sd->span = cpu_coregroup_map(i);
 +              sd->span = *cpu_coregroup_mask(i);
                cpus_and(sd->span, sd->span, *cpu_map);
                sd->parent = p;
                p->child = sd;
                SCHED_CPUMASK_VAR(this_core_map, allmasks);
                SCHED_CPUMASK_VAR(send_covered, allmasks);
  
 -              *this_core_map = cpu_coregroup_map(i);
 +              *this_core_map = *cpu_coregroup_mask(i);
                cpus_and(*this_core_map, *this_core_map, *cpu_map);
                if (i != first_cpu(*this_core_map))
                        continue;
diff --combined kernel/trace/trace.c
index c8760ec0e4631c044305643226003fb6d90d1314,4185d5221633f19755efb818a1fb969a69d0d29c..0e91f43b6baf53cac177af4ca02f0939997c1a0c
@@@ -30,7 -30,6 +30,6 @@@
  #include <linux/gfp.h>
  #include <linux/fs.h>
  #include <linux/kprobes.h>
- #include <linux/seq_file.h>
  #include <linux/writeback.h>
  
  #include <linux/stacktrace.h>
@@@ -1310,7 -1309,7 +1309,7 @@@ enum trace_file_type 
        TRACE_FILE_ANNOTATE     = 2,
  };
  
- static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
+ static void trace_iterator_increment(struct trace_iterator *iter)
  {
        /* Don't allow ftrace to trace into the ring buffers */
        ftrace_disable_cpu();
@@@ -1389,7 -1388,7 +1388,7 @@@ static void *find_next_entry_inc(struc
        iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
  
        if (iter->ent)
-               trace_iterator_increment(iter, iter->cpu);
+               trace_iterator_increment(iter);
  
        return iter->ent ? iter : NULL;
  }
@@@ -2675,7 -2674,7 +2674,7 @@@ tracing_cpumask_read(struct file *filp
  
        mutex_lock(&tracing_cpumask_update_lock);
  
 -      len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
 +      len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
        if (count - len < 2) {
                count = -EINVAL;
                goto out_err;
@@@ -2696,7 -2695,7 +2695,7 @@@ tracing_cpumask_write(struct file *filp
        int err, cpu;
  
        mutex_lock(&tracing_cpumask_update_lock);
 -      err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
 +      err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
        if (err)
                goto err_unlock;
  
diff --combined mm/slub.c
index 8e516e29f98920f65e8da441e5d2cc51e4cf4f97,6cb7ad10785227f2b889bcff96d6610ddeabe37a..0d861c3154b6eeed56981c33b3505ecca87d5712
+++ b/mm/slub.c
@@@ -24,6 -24,7 +24,7 @@@
  #include <linux/kallsyms.h>
  #include <linux/memory.h>
  #include <linux/math64.h>
+ #include <linux/fault-inject.h>
  
  /*
   * Lock order:
  #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
  #endif
  
+ #define OO_SHIFT      16
+ #define OO_MASK               ((1 << OO_SHIFT) - 1)
+ #define MAX_OBJS_PER_PAGE     65535 /* since page.objects is u16 */
  /* Internal SLUB flags */
  #define __OBJECT_POISON               0x80000000 /* Poison object */
  #define __SYSFS_ADD_DEFERRED  0x40000000 /* Not yet visible via sysfs */
@@@ -178,7 -183,7 +183,7 @@@ static LIST_HEAD(slab_caches)
   * Tracking user of a slab.
   */
  struct track {
-       void *addr;             /* Called from address */
+       unsigned long addr;     /* Called from address */
        int cpu;                /* Was running on cpu */
        int pid;                /* Pid context */
        unsigned long when;     /* When did the operation occur */
@@@ -290,7 -295,7 +295,7 @@@ static inline struct kmem_cache_order_o
                                                unsigned long size)
  {
        struct kmem_cache_order_objects x = {
-               (order << 16) + (PAGE_SIZE << order) / size
+               (order << OO_SHIFT) + (PAGE_SIZE << order) / size
        };
  
        return x;
  
  static inline int oo_order(struct kmem_cache_order_objects x)
  {
-       return x.x >> 16;
+       return x.x >> OO_SHIFT;
  }
  
  static inline int oo_objects(struct kmem_cache_order_objects x)
  {
-       return x.x & ((1 << 16) - 1);
+       return x.x & OO_MASK;
  }
  
  #ifdef CONFIG_SLUB_DEBUG
@@@ -367,7 -372,7 +372,7 @@@ static struct track *get_track(struct k
  }
  
  static void set_track(struct kmem_cache *s, void *object,
-                               enum track_item alloc, void *addr)
+                       enum track_item alloc, unsigned long addr)
  {
        struct track *p;
  
@@@ -391,8 -396,8 +396,8 @@@ static void init_tracking(struct kmem_c
        if (!(s->flags & SLAB_STORE_USER))
                return;
  
-       set_track(s, object, TRACK_FREE, NULL);
-       set_track(s, object, TRACK_ALLOC, NULL);
+       set_track(s, object, TRACK_FREE, 0UL);
+       set_track(s, object, TRACK_ALLOC, 0UL);
  }
  
  static void print_track(const char *s, struct track *t)
                return;
  
        printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
-               s, t->addr, jiffies - t->when, t->cpu, t->pid);
+               s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
  }
  
  static void print_tracking(struct kmem_cache *s, void *object)
@@@ -692,7 -697,7 +697,7 @@@ static int check_object(struct kmem_cac
        if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
                object_err(s, page, p, "Freepointer corrupt");
                /*
-                * No choice but to zap it and thus loose the remainder
+                * No choice but to zap it and thus lose the remainder
                 * of the free objects in this slab. May cause
                 * another error because the object count is now wrong.
                 */
@@@ -764,8 -769,8 +769,8 @@@ static int on_freelist(struct kmem_cach
        }
  
        max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
-       if (max_objects > 65535)
-               max_objects = 65535;
+       if (max_objects > MAX_OBJS_PER_PAGE)
+               max_objects = MAX_OBJS_PER_PAGE;
  
        if (page->objects != max_objects) {
                slab_err(s, page, "Wrong number of objects. Found %d but "
@@@ -866,7 -871,7 +871,7 @@@ static void setup_object_debug(struct k
  }
  
  static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
-                                               void *object, void *addr)
+                                       void *object, unsigned long addr)
  {
        if (!check_slab(s, page))
                goto bad;
@@@ -906,7 -911,7 +911,7 @@@ bad
  }
  
  static int free_debug_processing(struct kmem_cache *s, struct page *page,
-                                               void *object, void *addr)
+                                       void *object, unsigned long addr)
  {
        if (!check_slab(s, page))
                goto fail;
@@@ -1029,10 -1034,10 +1034,10 @@@ static inline void setup_object_debug(s
                        struct page *page, void *object) {}
  
  static inline int alloc_debug_processing(struct kmem_cache *s,
-       struct page *page, void *object, void *addr) { return 0; }
+       struct page *page, void *object, unsigned long addr) { return 0; }
  
  static inline int free_debug_processing(struct kmem_cache *s,
-       struct page *page, void *object, void *addr) { return 0; }
+       struct page *page, void *object, unsigned long addr) { return 0; }
  
  static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
                        { return 1; }
@@@ -1499,8 -1504,8 +1504,8 @@@ static inline int node_match(struct kme
   * we need to allocate a new slab. This is the slowest path since it involves
   * a call to the page allocator and the setup of a new slab.
   */
- static void *__slab_alloc(struct kmem_cache *s,
-               gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
+ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+                         unsigned long addr, struct kmem_cache_cpu *c)
  {
        void **object;
        struct page *new;
@@@ -1584,13 -1589,18 +1589,18 @@@ debug
   * Otherwise we can simply pick the next object from the lockless free list.
   */
  static __always_inline void *slab_alloc(struct kmem_cache *s,
-               gfp_t gfpflags, int node, void *addr)
+               gfp_t gfpflags, int node, unsigned long addr)
  {
        void **object;
        struct kmem_cache_cpu *c;
        unsigned long flags;
        unsigned int objsize;
  
+       might_sleep_if(gfpflags & __GFP_WAIT);
+       if (should_failslab(s->objsize, gfpflags))
+               return NULL;
        local_irq_save(flags);
        c = get_cpu_slab(s, smp_processor_id());
        objsize = c->objsize;
  
  void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
  {
-       return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
+       return slab_alloc(s, gfpflags, -1, _RET_IP_);
  }
  EXPORT_SYMBOL(kmem_cache_alloc);
  
  #ifdef CONFIG_NUMA
  void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
  {
-       return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
+       return slab_alloc(s, gfpflags, node, _RET_IP_);
  }
  EXPORT_SYMBOL(kmem_cache_alloc_node);
  #endif
   * handling required then we can return immediately.
   */
  static void __slab_free(struct kmem_cache *s, struct page *page,
-                               void *x, void *addr, unsigned int offset)
+                       void *x, unsigned long addr, unsigned int offset)
  {
        void *prior;
        void **object = (void *)x;
@@@ -1704,7 -1714,7 +1714,7 @@@ debug
   * with all sorts of special processing.
   */
  static __always_inline void slab_free(struct kmem_cache *s,
-                       struct page *page, void *x, void *addr)
+                       struct page *page, void *x, unsigned long addr)
  {
        void **object = (void *)x;
        struct kmem_cache_cpu *c;
@@@ -1731,11 -1741,11 +1741,11 @@@ void kmem_cache_free(struct kmem_cache 
  
        page = virt_to_head_page(x);
  
-       slab_free(s, page, x, __builtin_return_address(0));
+       slab_free(s, page, x, _RET_IP_);
  }
  EXPORT_SYMBOL(kmem_cache_free);
  
- /* Figure out on which slab object the object resides */
+ /* Figure out on which slab page the object resides */
  static struct page *get_object_page(const void *x)
  {
        struct page *page = virt_to_head_page(x);
@@@ -1807,8 -1817,8 +1817,8 @@@ static inline int slab_order(int size, 
        int rem;
        int min_order = slub_min_order;
  
-       if ((PAGE_SIZE << min_order) / size > 65535)
-               return get_order(size * 65535) - 1;
+       if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
+               return get_order(size * MAX_OBJS_PER_PAGE) - 1;
  
        for (order = max(min_order,
                                fls(min_objects * size - 1) - PAGE_SHIFT);
@@@ -2073,8 -2083,7 +2083,7 @@@ static inline int alloc_kmem_cache_cpus
   * when allocating for the kmalloc_node_cache. This is used for bootstrapping
   * memory on a fresh node that has no slab structures yet.
   */
- static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
-                                                          int node)
+ static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
  {
        struct page *page;
        struct kmem_cache_node *n;
        local_irq_save(flags);
        add_partial(n, page, 0);
        local_irq_restore(flags);
-       return n;
  }
  
  static void free_kmem_cache_nodes(struct kmem_cache *s)
@@@ -2144,8 -2152,7 +2152,7 @@@ static int init_kmem_cache_nodes(struc
                        n = &s->local_node;
                else {
                        if (slab_state == DOWN) {
-                               n = early_kmem_cache_node_alloc(gfpflags,
-                                                               node);
+                               early_kmem_cache_node_alloc(gfpflags, node);
                                continue;
                        }
                        n = kmem_cache_alloc_node(kmalloc_caches,
@@@ -2659,7 -2666,7 +2666,7 @@@ void *__kmalloc(size_t size, gfp_t flag
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
  
-       return slab_alloc(s, flags, -1, __builtin_return_address(0));
+       return slab_alloc(s, flags, -1, _RET_IP_);
  }
  EXPORT_SYMBOL(__kmalloc);
  
@@@ -2687,7 -2694,7 +2694,7 @@@ void *__kmalloc_node(size_t size, gfp_
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
  
-       return slab_alloc(s, flags, node, __builtin_return_address(0));
+       return slab_alloc(s, flags, node, _RET_IP_);
  }
  EXPORT_SYMBOL(__kmalloc_node);
  #endif
@@@ -2744,7 -2751,7 +2751,7 @@@ void kfree(const void *x
                put_page(page);
                return;
        }
-       slab_free(page->slab, page, object, __builtin_return_address(0));
+       slab_free(page->slab, page, object, _RET_IP_);
  }
  EXPORT_SYMBOL(kfree);
  
@@@ -3123,8 -3130,12 +3130,12 @@@ struct kmem_cache *kmem_cache_create(co
                s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
                up_write(&slub_lock);
  
-               if (sysfs_slab_alias(s, name))
+               if (sysfs_slab_alias(s, name)) {
+                       down_write(&slub_lock);
+                       s->refcount--;
+                       up_write(&slub_lock);
                        goto err;
+               }
                return s;
        }
  
                                size, align, flags, ctor)) {
                        list_add(&s->list, &slab_caches);
                        up_write(&slub_lock);
-                       if (sysfs_slab_add(s))
+                       if (sysfs_slab_add(s)) {
+                               down_write(&slub_lock);
+                               list_del(&s->list);
+                               up_write(&slub_lock);
+                               kfree(s);
                                goto err;
+                       }
                        return s;
                }
                kfree(s);
@@@ -3202,7 -3218,7 +3218,7 @@@ static struct notifier_block __cpuinitd
  
  #endif
  
- void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
+ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
  {
        struct kmem_cache *s;
  
  }
  
  void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
-                                       int node, void *caller)
+                                       int node, unsigned long caller)
  {
        struct kmem_cache *s;
  
@@@ -3429,7 -3445,7 +3445,7 @@@ static void resiliency_test(void) {}
  
  struct location {
        unsigned long count;
-       void *addr;
+       unsigned long addr;
        long long sum_time;
        long min_time;
        long max_time;
@@@ -3477,7 -3493,7 +3493,7 @@@ static int add_location(struct loc_trac
  {
        long start, end, pos;
        struct location *l;
-       void *caddr;
+       unsigned long caddr;
        unsigned long age = jiffies - track->when;
  
        start = -1;
@@@ -3626,7 -3642,7 +3642,7 @@@ static int list_locations(struct kmem_c
                                len < PAGE_SIZE - 60) {
                        len += sprintf(buf + len, " cpus=");
                        len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
 -                                      l->cpus);
 +                                      &l->cpus);
                }
  
                if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
@@@ -4345,7 -4361,7 +4361,7 @@@ static void sysfs_slab_remove(struct km
  
  /*
   * Need to buffer aliases during bootup until sysfs becomes
-  * available lest we loose that information.
+  * available lest we lose that information.
   */
  struct saved_alias {
        struct kmem_cache *s;