]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'linus' into x86/cleanups
authorIngo Molnar <mingo@elte.hu>
Fri, 2 Jan 2009 21:41:36 +0000 (22:41 +0100)
committerIngo Molnar <mingo@elte.hu>
Fri, 2 Jan 2009 21:41:36 +0000 (22:41 +0100)
Conflicts:
arch/x86/kernel/reboot.c

1  2 
arch/x86/kernel/apic.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/genx2apic_phys.c
arch/x86/kernel/io_apic.c
arch/x86/kernel/reboot.c
arch/x86/kernel/traps.c

diff --combined arch/x86/kernel/apic.c
index 66198cbe464daee6f6a3b37a5b9b842b35bbe026,6b7f824db16003a89dd2fca8786c130459e26dda..d652515e2855fd61cb5252ecd9d92d9f1653bf81
@@@ -98,8 -98,8 +98,8 @@@ __setup("apicpmtimer", setup_apicpmtime
  #ifdef HAVE_X2APIC
  int x2apic;
  /* x2apic enabled before OS handover */
 -int x2apic_preenabled;
 -int disable_x2apic;
 +static int x2apic_preenabled;
 +static int disable_x2apic;
  static __init int setup_nox2apic(char *str)
  {
        disable_x2apic = 1;
@@@ -119,8 -119,6 +119,6 @@@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_o
  
  int first_system_vector = 0xfe;
  
- char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
  /*
   * Debug level, exported for io_apic.c
   */
@@@ -142,7 -140,7 +140,7 @@@ static int lapic_next_event(unsigned lo
                            struct clock_event_device *evt);
  static void lapic_timer_setup(enum clock_event_mode mode,
                              struct clock_event_device *evt);
- static void lapic_timer_broadcast(cpumask_t mask);
+ static void lapic_timer_broadcast(const cpumask_t *mask);
  static void apic_pm_activate(void);
  
  /*
@@@ -228,7 -226,7 +226,7 @@@ void xapic_icr_write(u32 low, u32 id
        apic_write(APIC_ICR, low);
  }
  
 -u64 xapic_icr_read(void)
 +static u64 xapic_icr_read(void)
  {
        u32 icr1, icr2;
  
@@@ -268,7 -266,7 +266,7 @@@ void x2apic_icr_write(u32 low, u32 id
        wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
  }
  
 -u64 x2apic_icr_read(void)
 +static u64 x2apic_icr_read(void)
  {
        unsigned long val;
  
@@@ -455,7 -453,7 +453,7 @@@ static void lapic_timer_setup(enum cloc
  /*
   * Local APIC timer broadcast function
   */
- static void lapic_timer_broadcast(cpumask_t mask)
+ static void lapic_timer_broadcast(const cpumask_t *mask)
  {
  #ifdef CONFIG_SMP
        send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@@ -471,7 -469,7 +469,7 @@@ static void __cpuinit setup_APIC_timer(
        struct clock_event_device *levt = &__get_cpu_var(lapic_events);
  
        memcpy(levt, &lapic_clockevent, sizeof(*levt));
-       levt->cpumask = cpumask_of_cpu(smp_processor_id());
+       levt->cpumask = cpumask_of(smp_processor_id());
  
        clockevents_register_device(levt);
  }
@@@ -1807,28 -1805,32 +1805,32 @@@ void disconnect_bsp_APIC(int virt_wire_
  void __cpuinit generic_processor_info(int apicid, int version)
  {
        int cpu;
-       cpumask_t tmp_map;
  
        /*
         * Validate version
         */
        if (version == 0x0) {
                pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
-                       "fixing up to 0x10. (tell your hw vendor)\n",
-                       version);
+                          "fixing up to 0x10. (tell your hw vendor)\n",
+                               version);
                version = 0x10;
        }
        apic_version[apicid] = version;
  
-       if (num_processors >= NR_CPUS) {
-               pr_warning("WARNING: NR_CPUS limit of %i reached."
-                       "  Processor ignored.\n", NR_CPUS);
+       if (num_processors >= nr_cpu_ids) {
+               int max = nr_cpu_ids;
+               int thiscpu = max + disabled_cpus;
+               pr_warning(
+                       "ACPI: NR_CPUS/possible_cpus limit of %i reached."
+                       "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+               disabled_cpus++;
                return;
        }
  
        num_processors++;
-       cpus_complement(tmp_map, cpu_present_map);
-       cpu = first_cpu(tmp_map);
+       cpu = cpumask_next_zero(-1, cpu_present_mask);
  
        physid_set(apicid, phys_cpu_present_map);
        if (apicid == boot_cpu_physical_apicid) {
        }
  #endif
  
-       cpu_set(cpu, cpu_possible_map);
-       cpu_set(cpu, cpu_present_map);
+       set_cpu_possible(cpu, true);
+       set_cpu_present(cpu, true);
  }
  
  #ifdef CONFIG_X86_64
@@@ -2081,7 -2083,7 +2083,7 @@@ __cpuinit int apic_is_clustered_box(voi
        bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
        bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
  
-       for (i = 0; i < NR_CPUS; i++) {
+       for (i = 0; i < nr_cpu_ids; i++) {
                /* are we being called early in kernel startup? */
                if (bios_cpu_apicid) {
                        id = bios_cpu_apicid[i];
index 44fcb237bd52f6315556a6f8996ec29c7f11d4dd,d6ec7ec30274fc35d95440d9dcb819f0a4504d84..d259e5d2e0546adbec36d55f63b7f2a81f88e002
@@@ -49,7 -49,7 +49,7 @@@
  
  u32 num_var_ranges = 0;
  
- unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
  static DEFINE_MUTEX(mtrr_mutex);
  
  u64 size_or_mask, size_and_mask;
@@@ -574,7 -574,7 +574,7 @@@ struct mtrr_value 
        unsigned long   lsize;
  };
  
- static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
+ static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
  
  static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
  {
@@@ -824,14 -824,16 +824,14 @@@ static int enable_mtrr_cleanup __initda
  
  static int __init disable_mtrr_cleanup_setup(char *str)
  {
 -      if (enable_mtrr_cleanup != -1)
 -              enable_mtrr_cleanup = 0;
 +      enable_mtrr_cleanup = 0;
        return 0;
  }
  early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
  
  static int __init enable_mtrr_cleanup_setup(char *str)
  {
 -      if (enable_mtrr_cleanup != -1)
 -              enable_mtrr_cleanup = 1;
 +      enable_mtrr_cleanup = 1;
        return 0;
  }
  early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
index a177c7880ab580ceac14190cb7215d86d99696dd,62895cf315ffb1041e57364cd35d385a62826b25..21bcc0e098ba48c3cd78b068f88dd858c97d98dd
@@@ -29,16 -29,15 +29,15 @@@ static int x2apic_acpi_madt_oem_check(c
  
  /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
  
- static cpumask_t x2apic_target_cpus(void)
+ static const struct cpumask *x2apic_target_cpus(void)
  {
-       return cpumask_of_cpu(0);
+       return cpumask_of(0);
  }
  
- static cpumask_t x2apic_vector_allocation_domain(int cpu)
+ static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
  {
-       cpumask_t domain = CPU_MASK_NONE;
-       cpu_set(cpu, domain);
-       return domain;
+       cpumask_clear(retmask);
+       cpumask_set_cpu(cpu, retmask);
  }
  
  static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
        x2apic_icr_write(cfg, apicid);
  }
  
- static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
  {
        unsigned long flags;
        unsigned long query_cpu;
  
        local_irq_save(flags);
-       for_each_cpu_mask(query_cpu, mask) {
+       for_each_cpu(query_cpu, mask) {
                __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
                                       vector, APIC_DEST_PHYSICAL);
        }
        local_irq_restore(flags);
  }
  
- static void x2apic_send_IPI_allbutself(int vector)
+ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+                                           int vector)
  {
-       cpumask_t mask = cpu_online_map;
+       unsigned long flags;
+       unsigned long query_cpu;
+       unsigned long this_cpu = smp_processor_id();
+       local_irq_save(flags);
+       for_each_cpu(query_cpu, mask) {
+               if (query_cpu != this_cpu)
+                       __x2apic_send_IPI_dest(
+                               per_cpu(x86_cpu_to_apicid, query_cpu),
+                               vector, APIC_DEST_PHYSICAL);
+       }
+       local_irq_restore(flags);
+ }
  
-       cpu_clear(smp_processor_id(), mask);
+ static void x2apic_send_IPI_allbutself(int vector)
+ {
+       unsigned long flags;
+       unsigned long query_cpu;
+       unsigned long this_cpu = smp_processor_id();
  
-       if (!cpus_empty(mask))
-               x2apic_send_IPI_mask(mask, vector);
+       local_irq_save(flags);
+       for_each_online_cpu(query_cpu)
+               if (query_cpu != this_cpu)
+                       __x2apic_send_IPI_dest(
+                               per_cpu(x86_cpu_to_apicid, query_cpu),
+                               vector, APIC_DEST_PHYSICAL);
+       local_irq_restore(flags);
  }
  
  static void x2apic_send_IPI_all(int vector)
  {
-       x2apic_send_IPI_mask(cpu_online_map, vector);
+       x2apic_send_IPI_mask(cpu_online_mask, vector);
  }
  
  static int x2apic_apic_id_registered(void)
        return 1;
  }
  
- static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+ static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
  {
        int cpu;
  
         * We're using fixed IRQ delivery, can only return one phys APIC ID.
         * May as well be the first.
         */
-       cpu = first_cpu(cpumask);
-       if ((unsigned)cpu < NR_CPUS)
+       cpu = cpumask_first(cpumask);
+       if ((unsigned)cpu < nr_cpu_ids)
                return per_cpu(x86_cpu_to_apicid, cpu);
        else
                return BAD_APICID;
  }
  
+ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+                                                 const struct cpumask *andmask)
+ {
+       int cpu;
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       for_each_cpu_and(cpu, cpumask, andmask)
+               if (cpumask_test_cpu(cpu, cpu_online_mask))
+                       break;
+       if (cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+       return BAD_APICID;
+ }
  static unsigned int get_apic_id(unsigned long x)
  {
        unsigned int id;
@@@ -123,12 -161,12 +161,12 @@@ static unsigned int phys_pkg_id(int ind
        return current_cpu_data.initial_apicid >> index_msb;
  }
  
 -void x2apic_send_IPI_self(int vector)
 +static void x2apic_send_IPI_self(int vector)
  {
        apic_write(APIC_SELF_IPI, vector);
  }
  
 -void init_x2apic_ldr(void)
 +static void init_x2apic_ldr(void)
  {
        return;
  }
@@@ -145,8 -183,10 +183,10 @@@ struct genapic apic_x2apic_phys = 
        .send_IPI_all = x2apic_send_IPI_all,
        .send_IPI_allbutself = x2apic_send_IPI_allbutself,
        .send_IPI_mask = x2apic_send_IPI_mask,
+       .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
        .send_IPI_self = x2apic_send_IPI_self,
        .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+       .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
        .phys_pkg_id = phys_pkg_id,
        .get_apic_id = get_apic_id,
        .set_apic_id = set_apic_id,
index b8c8a8e99341d4cd31aff2cfc9cffa15f479e41d,62ecfc991e1e61b747567cb2ada8f8d68d0774b2..69911722b9d38acefc7f0f50e5d1c2df4b5b8b49
@@@ -108,94 -108,277 +108,277 @@@ static int __init parse_noapic(char *st
  early_param("noapic", parse_noapic);
  
  struct irq_pin_list;
+ /*
+  * This is performance-critical, we want to do it O(1)
+  *
+  * the indexing order of this array favors 1:1 mappings
+  * between pins and IRQs.
+  */
+ struct irq_pin_list {
+       int apic, pin;
+       struct irq_pin_list *next;
+ };
+ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+ {
+       struct irq_pin_list *pin;
+       int node;
+       node = cpu_to_node(cpu);
+       pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+       printk(KERN_DEBUG "  alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+       return pin;
+ }
  struct irq_cfg {
-       unsigned int irq;
        struct irq_pin_list *irq_2_pin;
-       cpumask_t domain;
-       cpumask_t old_domain;
+       cpumask_var_t domain;
+       cpumask_var_t old_domain;
        unsigned move_cleanup_count;
        u8 vector;
        u8 move_in_progress : 1;
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+       u8 move_desc_pending : 1;
+ #endif
  };
  
  /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+ #ifdef CONFIG_SPARSE_IRQ
+ static struct irq_cfg irq_cfgx[] = {
+ #else
  static struct irq_cfg irq_cfgx[NR_IRQS] = {
-       [0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+ #endif
+       [0]  = { .vector = IRQ0_VECTOR,  },
+       [1]  = { .vector = IRQ1_VECTOR,  },
+       [2]  = { .vector = IRQ2_VECTOR,  },
+       [3]  = { .vector = IRQ3_VECTOR,  },
+       [4]  = { .vector = IRQ4_VECTOR,  },
+       [5]  = { .vector = IRQ5_VECTOR,  },
+       [6]  = { .vector = IRQ6_VECTOR,  },
+       [7]  = { .vector = IRQ7_VECTOR,  },
+       [8]  = { .vector = IRQ8_VECTOR,  },
+       [9]  = { .vector = IRQ9_VECTOR,  },
+       [10] = { .vector = IRQ10_VECTOR, },
+       [11] = { .vector = IRQ11_VECTOR, },
+       [12] = { .vector = IRQ12_VECTOR, },
+       [13] = { .vector = IRQ13_VECTOR, },
+       [14] = { .vector = IRQ14_VECTOR, },
+       [15] = { .vector = IRQ15_VECTOR, },
  };
  
- #define for_each_irq_cfg(irq, cfg)            \
-       for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
+ int __init arch_early_irq_init(void)
+ {
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       int count;
+       int i;
+       cfg = irq_cfgx;
+       count = ARRAY_SIZE(irq_cfgx);
+       for (i = 0; i < count; i++) {
+               desc = irq_to_desc(i);
+               desc->chip_data = &cfg[i];
+               alloc_bootmem_cpumask_var(&cfg[i].domain);
+               alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+               if (i < NR_IRQS_LEGACY)
+                       cpumask_setall(cfg[i].domain);
+       }
+       return 0;
+ }
  
+ #ifdef CONFIG_SPARSE_IRQ
  static struct irq_cfg *irq_cfg(unsigned int irq)
  {
-       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+       struct irq_cfg *cfg = NULL;
+       struct irq_desc *desc;
+       desc = irq_to_desc(irq);
+       if (desc)
+               cfg = desc->chip_data;
+       return cfg;
  }
  
- static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
  {
-       return irq_cfg(irq);
+       struct irq_cfg *cfg;
+       int node;
+       node = cpu_to_node(cpu);
+       cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+       if (cfg) {
+               /* FIXME: needs alloc_cpumask_var_node() */
+               if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
+                       kfree(cfg);
+                       cfg = NULL;
+               } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
+                       free_cpumask_var(cfg->domain);
+                       kfree(cfg);
+                       cfg = NULL;
+               } else {
+                       cpumask_clear(cfg->domain);
+                       cpumask_clear(cfg->old_domain);
+               }
+       }
+       printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
+       return cfg;
  }
  
- /*
-  * Rough estimation of how many shared IRQs there are, can be changed
-  * anytime.
-  */
- #define MAX_PLUS_SHARED_IRQS NR_IRQS
- #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+ int arch_init_chip_data(struct irq_desc *desc, int cpu)
+ {
+       struct irq_cfg *cfg;
  
- /*
-  * This is performance-critical, we want to do it O(1)
-  *
-  * the indexing order of this array favors 1:1 mappings
-  * between pins and IRQs.
-  */
+       cfg = desc->chip_data;
+       if (!cfg) {
+               desc->chip_data = get_one_free_irq_cfg(cpu);
+               if (!desc->chip_data) {
+                       printk(KERN_ERR "can not alloc irq_cfg\n");
+                       BUG_ON(1);
+               }
+       }
  
- struct irq_pin_list {
-       int apic, pin;
-       struct irq_pin_list *next;
- };
+       return 0;
+ }
  
- static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
- static struct irq_pin_list *irq_2_pin_ptr;
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
  
- static void __init irq_2_pin_init(void)
+ static void
+ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
  {
-       struct irq_pin_list *pin = irq_2_pin_head;
-       int i;
+       struct irq_pin_list *old_entry, *head, *tail, *entry;
+       cfg->irq_2_pin = NULL;
+       old_entry = old_cfg->irq_2_pin;
+       if (!old_entry)
+               return;
+       entry = get_one_free_irq_2_pin(cpu);
+       if (!entry)
+               return;
  
-       for (i = 1; i < PIN_MAP_SIZE; i++)
-               pin[i-1].next = &pin[i];
+       entry->apic     = old_entry->apic;
+       entry->pin      = old_entry->pin;
+       head            = entry;
+       tail            = entry;
+       old_entry       = old_entry->next;
+       while (old_entry) {
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       entry = head;
+                       while (entry) {
+                               head = entry->next;
+                               kfree(entry);
+                               entry = head;
+                       }
+                       /* still use the old one */
+                       return;
+               }
+               entry->apic     = old_entry->apic;
+               entry->pin      = old_entry->pin;
+               tail->next      = entry;
+               tail            = entry;
+               old_entry       = old_entry->next;
+       }
  
-       irq_2_pin_ptr = &pin[0];
+       tail->next = NULL;
+       cfg->irq_2_pin = head;
  }
  
- static struct irq_pin_list *get_one_free_irq_2_pin(void)
+ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
  {
-       struct irq_pin_list *pin = irq_2_pin_ptr;
+       struct irq_pin_list *entry, *next;
  
-       if (!pin)
-               panic("can not get more irq_2_pin\n");
+       if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+               return;
  
-       irq_2_pin_ptr = pin->next;
-       pin->next = NULL;
-       return pin;
+       entry = old_cfg->irq_2_pin;
+       while (entry) {
+               next = entry->next;
+               kfree(entry);
+               entry = next;
+       }
+       old_cfg->irq_2_pin = NULL;
+ }
+ void arch_init_copy_chip_data(struct irq_desc *old_desc,
+                                struct irq_desc *desc, int cpu)
+ {
+       struct irq_cfg *cfg;
+       struct irq_cfg *old_cfg;
+       cfg = get_one_free_irq_cfg(cpu);
+       if (!cfg)
+               return;
+       desc->chip_data = cfg;
+       old_cfg = old_desc->chip_data;
+       memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+       init_copy_irq_2_pin(old_cfg, cfg, cpu);
  }
  
+ static void free_irq_cfg(struct irq_cfg *old_cfg)
+ {
+       kfree(old_cfg);
+ }
+ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+ {
+       struct irq_cfg *old_cfg, *cfg;
+       old_cfg = old_desc->chip_data;
+       cfg = desc->chip_data;
+       if (old_cfg == cfg)
+               return;
+       if (old_cfg) {
+               free_irq_2_pin(old_cfg, cfg);
+               free_irq_cfg(old_cfg);
+               old_desc->chip_data = NULL;
+       }
+ }
+ static void
+ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+ {
+       struct irq_cfg *cfg = desc->chip_data;
+       if (!cfg->move_in_progress) {
+               /* it means that domain is not changed */
+               if (!cpumask_intersects(&desc->affinity, mask))
+                       cfg->move_desc_pending = 1;
+       }
+ }
+ #endif
+ #else
+ static struct irq_cfg *irq_cfg(unsigned int irq)
+ {
+       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ }
+ #endif
+ #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ static inline void
+ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+ {
+ }
+ #endif
  struct io_apic {
        unsigned int index;
        unsigned int unused[3];
@@@ -237,11 -420,10 +420,10 @@@ static inline void io_apic_modify(unsig
        writel(value, &io_apic->data);
  }
  
- static bool io_apic_level_ack_pending(unsigned int irq)
+ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
  {
        struct irq_pin_list *entry;
        unsigned long flags;
-       struct irq_cfg *cfg = irq_cfg(irq);
  
        spin_lock_irqsave(&ioapic_lock, flags);
        entry = cfg->irq_2_pin;
@@@ -323,13 -505,32 +505,32 @@@ static void ioapic_mask_entry(int apic
  }
  
  #ifdef CONFIG_SMP
- static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+ static void send_cleanup_vector(struct irq_cfg *cfg)
+ {
+       cpumask_var_t cleanup_mask;
+       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+               unsigned int i;
+               cfg->move_cleanup_count = 0;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       cfg->move_cleanup_count++;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+       } else {
+               cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+               cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               free_cpumask_var(cleanup_mask);
+       }
+       cfg->move_in_progress = 0;
+ }
+ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
  {
        int apic, pin;
-       struct irq_cfg *cfg;
        struct irq_pin_list *entry;
+       u8 vector = cfg->vector;
  
-       cfg = irq_cfg(irq);
        entry = cfg->irq_2_pin;
        for (;;) {
                unsigned int reg;
        }
  }
  
- static int assign_irq_vector(int irq, cpumask_t mask);
+ static int
+ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
  
- static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+ /*
+  * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
+  * of that, or returns BAD_APICID and leaves desc->affinity untouched.
+  */
+ static unsigned int
+ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+ {
+       struct irq_cfg *cfg;
+       unsigned int irq;
+       if (!cpumask_intersects(mask, cpu_online_mask))
+               return BAD_APICID;
+       irq = desc->irq;
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
+               return BAD_APICID;
+       cpumask_and(&desc->affinity, cfg->domain, mask);
+       set_extra_move_desc(desc, mask);
+       return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+ }
+ static void
+ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
  {
        struct irq_cfg *cfg;
        unsigned long flags;
        unsigned int dest;
-       cpumask_t tmp;
-       struct irq_desc *desc;
+       unsigned int irq;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
-               return;
+       irq = desc->irq;
+       cfg = desc->chip_data;
  
-       cfg = irq_cfg(irq);
-       if (assign_irq_vector(irq, mask))
-               return;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       dest = set_desc_affinity(desc, mask);
+       if (dest != BAD_APICID) {
+               /* Only the high 8 bits are valid. */
+               dest = SET_APIC_LOGICAL_ID(dest);
+               __target_IO_APIC_irq(irq, dest, cfg);
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
  
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
-       /*
-        * Only the high 8 bits are valid.
-        */
-       dest = SET_APIC_LOGICAL_ID(dest);
+ static void
+ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+ {
+       struct irq_desc *desc;
  
        desc = irq_to_desc(irq);
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __target_IO_APIC_irq(irq, dest, cfg->vector);
-       desc->affinity = mask;
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       set_ioapic_affinity_irq_desc(desc, mask);
  }
  #endif /* CONFIG_SMP */
  
   * shared ISA-space IRQs, so we have to support them. We are super
   * fast in the common case, and fast for shared ISA-space IRQs.
   */
- static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
  {
-       struct irq_cfg *cfg;
        struct irq_pin_list *entry;
  
-       /* first time to refer irq_cfg, so with new */
-       cfg = irq_cfg_alloc(irq);
        entry = cfg->irq_2_pin;
        if (!entry) {
-               entry = get_one_free_irq_2_pin();
+               entry = get_one_free_irq_2_pin(cpu);
+               if (!entry) {
+                       printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+                                       apic, pin);
+                       return;
+               }
                cfg->irq_2_pin = entry;
                entry->apic = apic;
                entry->pin = pin;
                entry = entry->next;
        }
  
-       entry->next = get_one_free_irq_2_pin();
+       entry->next = get_one_free_irq_2_pin(cpu);
        entry = entry->next;
        entry->apic = apic;
        entry->pin = pin;
  /*
   * Reroute an IRQ to a different pin.
   */
- static void __init replace_pin_at_irq(unsigned int irq,
+ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
                                      int oldapic, int oldpin,
                                      int newapic, int newpin)
  {
-       struct irq_cfg *cfg = irq_cfg(irq);
        struct irq_pin_list *entry = cfg->irq_2_pin;
        int replaced = 0;
  
  
        /* why? call replace before add? */
        if (!replaced)
-               add_pin_to_irq(irq, newapic, newpin);
+               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
  }
  
- static inline void io_apic_modify_irq(unsigned int irq,
+ static inline void io_apic_modify_irq(struct irq_cfg *cfg,
                                int mask_and, int mask_or,
                                void (*final)(struct irq_pin_list *entry))
  {
        int pin;
-       struct irq_cfg *cfg;
        struct irq_pin_list *entry;
  
-       cfg = irq_cfg(irq);
        for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
                unsigned int reg;
                pin = entry->pin;
        }
  }
  
- static void __unmask_IO_APIC_irq(unsigned int irq)
+ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
  {
-       io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
  }
  
  #ifdef CONFIG_X86_64
 -void io_apic_sync(struct irq_pin_list *entry)
 +static void io_apic_sync(struct irq_pin_list *entry)
  {
        /*
         * Synchronize the IO-APIC and the CPU by doing
        readl(&io_apic->data);
  }
  
- static void __mask_IO_APIC_irq(unsigned int irq)
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
  {
-       io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
  }
  #else /* CONFIG_X86_32 */
- static void __mask_IO_APIC_irq(unsigned int irq)
+ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
  {
-       io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+       io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
  }
  
- static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+ static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
  {
-       io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
                        IO_APIC_REDIR_MASKED, NULL);
  }
  
- static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
  {
-       io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+       io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
                        IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
  }
  #endif /* CONFIG_X86_32 */
  
- static void mask_IO_APIC_irq (unsigned int irq)
+ static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
  {
+       struct irq_cfg *cfg = desc->chip_data;
        unsigned long flags;
  
+       BUG_ON(!cfg);
        spin_lock_irqsave(&ioapic_lock, flags);
-       __mask_IO_APIC_irq(irq);
+       __mask_IO_APIC_irq(cfg);
        spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  
- static void unmask_IO_APIC_irq (unsigned int irq)
+ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
  {
+       struct irq_cfg *cfg = desc->chip_data;
        unsigned long flags;
  
        spin_lock_irqsave(&ioapic_lock, flags);
-       __unmask_IO_APIC_irq(irq);
+       __unmask_IO_APIC_irq(cfg);
        spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  
+ static void mask_IO_APIC_irq(unsigned int irq)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       mask_IO_APIC_irq_desc(desc);
+ }
+ static void unmask_IO_APIC_irq(unsigned int irq)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       unmask_IO_APIC_irq_desc(desc);
+ }
  static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
  {
        struct IO_APIC_route_entry entry;
@@@ -809,7 -1051,7 +1051,7 @@@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vecto
   */
  static int EISA_ELCR(unsigned int irq)
  {
-       if (irq < 16) {
+       if (irq < NR_IRQS_LEGACY) {
                unsigned int port = 0x4d0 + (irq >> 3);
                return (inb(port) >> (irq & 7)) & 1;
        }
@@@ -1034,7 -1276,8 +1276,8 @@@ void unlock_vector_lock(void
        spin_unlock(&vector_lock);
  }
  
- static int __assign_irq_vector(int irq, cpumask_t mask)
+ static int
+ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
  {
        /*
         * NOTE! The local APIC isn't very good at handling
         */
        static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
        unsigned int old_vector;
-       int cpu;
-       struct irq_cfg *cfg;
-       cfg = irq_cfg(irq);
-       /* Only try and allocate irqs on cpus that are present */
-       cpus_and(mask, mask, cpu_online_map);
+       int cpu, err;
+       cpumask_var_t tmp_mask;
  
        if ((cfg->move_in_progress) || cfg->move_cleanup_count)
                return -EBUSY;
  
+       if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+               return -ENOMEM;
        old_vector = cfg->vector;
        if (old_vector) {
-               cpumask_t tmp;
-               cpus_and(tmp, cfg->domain, mask);
-               if (!cpus_empty(tmp))
+               cpumask_and(tmp_mask, mask, cpu_online_mask);
+               cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+               if (!cpumask_empty(tmp_mask)) {
+                       free_cpumask_var(tmp_mask);
                        return 0;
+               }
        }
  
-       for_each_cpu_mask_nr(cpu, mask) {
-               cpumask_t domain, new_mask;
+       /* Only try and allocate irqs on cpus that are present */
+       err = -ENOSPC;
+       for_each_cpu_and(cpu, mask, cpu_online_mask) {
                int new_cpu;
                int vector, offset;
  
-               domain = vector_allocation_domain(cpu);
-               cpus_and(new_mask, domain, cpu_online_map);
+               vector_allocation_domain(cpu, tmp_mask);
  
                vector = current_vector;
                offset = current_offset;
  next:
                vector += 8;
                if (vector >= first_system_vector) {
-                       /* If we run out of vectors on large boxen, must share them. */
+                       /* If out of vectors on large boxen, must share them. */
                        offset = (offset + 1) % 8;
                        vector = FIRST_DEVICE_VECTOR + offset;
                }
                if (unlikely(current_vector == vector))
                        continue;
- #ifdef CONFIG_X86_64
-               if (vector == IA32_SYSCALL_VECTOR)
-                       goto next;
- #else
-               if (vector == SYSCALL_VECTOR)
+               if (test_bit(vector, used_vectors))
                        goto next;
- #endif
-               for_each_cpu_mask_nr(new_cpu, new_mask)
+               for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
                        if (per_cpu(vector_irq, new_cpu)[vector] != -1)
                                goto next;
                /* Found one! */
                current_offset = offset;
                if (old_vector) {
                        cfg->move_in_progress = 1;
-                       cfg->old_domain = cfg->domain;
+                       cpumask_copy(cfg->old_domain, cfg->domain);
                }
-               for_each_cpu_mask_nr(new_cpu, new_mask)
+               for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
                        per_cpu(vector_irq, new_cpu)[vector] = irq;
                cfg->vector = vector;
-               cfg->domain = domain;
-               return 0;
+               cpumask_copy(cfg->domain, tmp_mask);
+               err = 0;
+               break;
        }
-       return -ENOSPC;
+       free_cpumask_var(tmp_mask);
+       return err;
  }
  
- static int assign_irq_vector(int irq, cpumask_t mask)
+ static int
+ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
  {
        int err;
        unsigned long flags;
  
        spin_lock_irqsave(&vector_lock, flags);
-       err = __assign_irq_vector(irq, mask);
+       err = __assign_irq_vector(irq, cfg, mask);
        spin_unlock_irqrestore(&vector_lock, flags);
        return err;
  }
  
- static void __clear_irq_vector(int irq)
+ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
  {
-       struct irq_cfg *cfg;
-       cpumask_t mask;
        int cpu, vector;
  
-       cfg = irq_cfg(irq);
        BUG_ON(!cfg->vector);
  
        vector = cfg->vector;
-       cpus_and(mask, cfg->domain, cpu_online_map);
-       for_each_cpu_mask_nr(cpu, mask)
+       for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
                per_cpu(vector_irq, cpu)[vector] = -1;
  
        cfg->vector = 0;
-       cpus_clear(cfg->domain);
+       cpumask_clear(cfg->domain);
  
        if (likely(!cfg->move_in_progress))
                return;
-       cpus_and(mask, cfg->old_domain, cpu_online_map);
-       for_each_cpu_mask_nr(cpu, mask) {
+       for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
                for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
                                                                vector++) {
                        if (per_cpu(vector_irq, cpu)[vector] != irq)
@@@ -1162,10 -1400,12 +1400,12 @@@ void __setup_vector_irq(int cpu
        /* This function must be called with vector_lock held */
        int irq, vector;
        struct irq_cfg *cfg;
+       struct irq_desc *desc;
  
        /* Mark the inuse vectors */
-       for_each_irq_cfg(irq, cfg) {
-               if (!cpu_isset(cpu, cfg->domain))
+       for_each_irq_desc(irq, desc) {
+               cfg = desc->chip_data;
+               if (!cpumask_test_cpu(cpu, cfg->domain))
                        continue;
                vector = cfg->vector;
                per_cpu(vector_irq, cpu)[vector] = irq;
                        continue;
  
                cfg = irq_cfg(irq);
-               if (!cpu_isset(cpu, cfg->domain))
+               if (!cpumask_test_cpu(cpu, cfg->domain))
                        per_cpu(vector_irq, cpu)[vector] = -1;
        }
  }
@@@ -1215,11 -1455,8 +1455,8 @@@ static inline int IO_APIC_irq_trigger(i
  }
  #endif
  
- static void ioapic_register_intr(int irq, unsigned long trigger)
+ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
  {
-       struct irq_desc *desc;
-       desc = irq_to_desc(irq);
  
        if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
            trigger == IOAPIC_LEVEL)
@@@ -1311,23 -1548,22 +1548,22 @@@ static int setup_ioapic_entry(int apic
        return 0;
  }
  
- static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
                              int trigger, int polarity)
  {
        struct irq_cfg *cfg;
        struct IO_APIC_route_entry entry;
-       cpumask_t mask;
+       unsigned int dest;
  
        if (!IO_APIC_IRQ(irq))
                return;
  
-       cfg = irq_cfg(irq);
+       cfg = desc->chip_data;
  
-       mask = TARGET_CPUS;
-       if (assign_irq_vector(irq, mask))
+       if (assign_irq_vector(irq, cfg, TARGET_CPUS))
                return;
  
-       cpus_and(mask, cfg->domain, mask);
+       dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
  
        apic_printk(APIC_VERBOSE,KERN_DEBUG
                    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
  
  
        if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
-                              cpu_mask_to_apicid(mask), trigger, polarity,
-                              cfg->vector)) {
+                              dest, trigger, polarity, cfg->vector)) {
                printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
                       mp_ioapics[apic].mp_apicid, pin);
-               __clear_irq_vector(irq);
+               __clear_irq_vector(irq, cfg);
                return;
        }
  
-       ioapic_register_intr(irq, trigger);
-       if (irq < 16)
+       ioapic_register_intr(irq, desc, trigger);
+       if (irq < NR_IRQS_LEGACY)
                disable_8259A_irq(irq);
  
        ioapic_write_entry(apic, pin, entry);
@@@ -1356,6 -1591,9 +1591,9 @@@ static void __init setup_IO_APIC_irqs(v
  {
        int apic, pin, idx, irq;
        int notcon = 0;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
  
        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
  
                        if (multi_timer_check(apic, irq))
                                continue;
  #endif
-                       add_pin_to_irq(irq, apic, pin);
+                       desc = irq_to_desc_alloc_cpu(irq, cpu);
+                       if (!desc) {
+                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+                               continue;
+                       }
+                       cfg = desc->chip_data;
+                       add_pin_to_irq_cpu(cfg, cpu, apic, pin);
  
-                       setup_IO_APIC_irq(apic, pin, irq,
+                       setup_IO_APIC_irq(apic, pin, irq, desc,
                                        irq_trigger(idx), irq_polarity(idx));
                }
        }
@@@ -1448,6 -1692,7 +1692,7 @@@ __apicdebuginit(void) print_IO_APIC(voi
        union IO_APIC_reg_03 reg_03;
        unsigned long flags;
        struct irq_cfg *cfg;
+       struct irq_desc *desc;
        unsigned int irq;
  
        if (apic_verbosity == APIC_QUIET)
        }
        }
        printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for_each_irq_cfg(irq, cfg) {
-               struct irq_pin_list *entry = cfg->irq_2_pin;
+       for_each_irq_desc(irq, desc) {
+               struct irq_pin_list *entry;
+               cfg = desc->chip_data;
+               entry = cfg->irq_2_pin;
                if (!entry)
                        continue;
                printk(KERN_DEBUG "IRQ%d ", irq);
@@@ -2022,14 -2270,16 +2270,16 @@@ static unsigned int startup_ioapic_irq(
  {
        int was_pending = 0;
        unsigned long flags;
+       struct irq_cfg *cfg;
  
        spin_lock_irqsave(&ioapic_lock, flags);
-       if (irq < 16) {
+       if (irq < NR_IRQS_LEGACY) {
                disable_8259A_irq(irq);
                if (i8259A_irq_pending(irq))
                        was_pending = 1;
        }
-       __unmask_IO_APIC_irq(irq);
+       cfg = irq_cfg(irq);
+       __unmask_IO_APIC_irq(cfg);
        spin_unlock_irqrestore(&ioapic_lock, flags);
  
        return was_pending;
@@@ -2043,7 -2293,7 +2293,7 @@@ static int ioapic_retrigger_irq(unsigne
        unsigned long flags;
  
        spin_lock_irqsave(&vector_lock, flags);
-       send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+       send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
        spin_unlock_irqrestore(&vector_lock, flags);
  
        return 1;
@@@ -2092,35 -2342,35 +2342,35 @@@ static DECLARE_DELAYED_WORK(ir_migratio
   * as simple as edge triggered migration and we can do the irq migration
   * with a simple atomic update to IO-APIC RTE.
   */
- static void migrate_ioapic_irq(int irq, cpumask_t mask)
+ static void
+ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
  {
        struct irq_cfg *cfg;
-       struct irq_desc *desc;
-       cpumask_t tmp, cleanup_mask;
        struct irte irte;
        int modify_ioapic_rte;
        unsigned int dest;
        unsigned long flags;
+       unsigned int irq;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       if (!cpumask_intersects(mask, cpu_online_mask))
                return;
  
+       irq = desc->irq;
        if (get_irte(irq, &irte))
                return;
  
-       if (assign_irq_vector(irq, mask))
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
                return;
  
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
+       set_extra_move_desc(desc, mask);
+       dest = cpu_mask_to_apicid_and(cfg->domain, mask);
  
-       desc = irq_to_desc(irq);
        modify_ioapic_rte = desc->status & IRQ_LEVEL;
        if (modify_ioapic_rte) {
                spin_lock_irqsave(&ioapic_lock, flags);
-               __target_IO_APIC_irq(irq, dest, cfg->vector);
+               __target_IO_APIC_irq(irq, dest, cfg);
                spin_unlock_irqrestore(&ioapic_lock, flags);
        }
  
         */
        modify_irte(irq, &irte);
  
-       if (cfg->move_in_progress) {
-               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               cfg->move_in_progress = 0;
-       }
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
  
-       desc->affinity = mask;
+       cpumask_copy(&desc->affinity, mask);
  }
  
- static int migrate_irq_remapped_level(int irq)
+ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
  {
        int ret = -1;
-       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg = desc->chip_data;
  
-       mask_IO_APIC_irq(irq);
+       mask_IO_APIC_irq_desc(desc);
  
-       if (io_apic_level_ack_pending(irq)) {
+       if (io_apic_level_ack_pending(cfg)) {
                /*
                 * Interrupt in progress. Migrating irq now will change the
                 * vector information in the IO-APIC RTE and that will confuse
        }
  
        /* everthing is clear. we have right of way */
-       migrate_ioapic_irq(irq, desc->pending_mask);
+       migrate_ioapic_irq_desc(desc, &desc->pending_mask);
  
        ret = 0;
        desc->status &= ~IRQ_MOVE_PENDING;
-       cpus_clear(desc->pending_mask);
+       cpumask_clear(&desc->pending_mask);
  
  unmask:
-       unmask_IO_APIC_irq(irq);
+       unmask_IO_APIC_irq_desc(desc);
        return ret;
  }
  
@@@ -2189,7 -2436,7 +2436,7 @@@ static void ir_irq_migration(struct wor
                                continue;
                        }
  
-                       desc->chip->set_affinity(irq, desc->pending_mask);
+                       desc->chip->set_affinity(irq, &desc->pending_mask);
                        spin_unlock_irqrestore(&desc->lock, flags);
                }
        }
  /*
   * Migrates the IRQ destination in the process context.
   */
- static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+                                           const struct cpumask *mask)
  {
-       struct irq_desc *desc = irq_to_desc(irq);
        if (desc->status & IRQ_LEVEL) {
                desc->status |= IRQ_MOVE_PENDING;
-               desc->pending_mask = mask;
-               migrate_irq_remapped_level(irq);
+               cpumask_copy(&desc->pending_mask, mask);
+               migrate_irq_remapped_level_desc(desc);
                return;
        }
  
-       migrate_ioapic_irq(irq, mask);
+       migrate_ioapic_irq_desc(desc, mask);
+ }
+ static void set_ir_ioapic_affinity_irq(unsigned int irq,
+                                      const struct cpumask *mask)
+ {
+       struct irq_desc *desc = irq_to_desc(irq);
+       set_ir_ioapic_affinity_irq_desc(desc, mask);
  }
  #endif
  
@@@ -2228,6 -2481,9 +2481,9 @@@ asmlinkage void smp_irq_move_cleanup_in
                struct irq_cfg *cfg;
                irq = __get_cpu_var(vector_irq)[vector];
  
+               if (irq == -1)
+                       continue;
                desc = irq_to_desc(irq);
                if (!desc)
                        continue;
                if (!cfg->move_cleanup_count)
                        goto unlock;
  
-               if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+               if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
                        goto unlock;
  
                __get_cpu_var(vector_irq)[vector] = -1;
@@@ -2249,28 -2505,44 +2505,44 @@@ unlock
        irq_exit();
  }
  
- static void irq_complete_move(unsigned int irq)
+ static void irq_complete_move(struct irq_desc **descp)
  {
-       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_desc *desc = *descp;
+       struct irq_cfg *cfg = desc->chip_data;
        unsigned vector, me;
  
-       if (likely(!cfg->move_in_progress))
+       if (likely(!cfg->move_in_progress)) {
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+               if (likely(!cfg->move_desc_pending))
+                       return;
+               /* domain has not changed, but affinity did */
+               me = smp_processor_id();
+               if (cpu_isset(me, desc->affinity)) {
+                       *descp = desc = move_irq_desc(desc, me);
+                       /* get the new one */
+                       cfg = desc->chip_data;
+                       cfg->move_desc_pending = 0;
+               }
+ #endif
                return;
+       }
  
        vector = ~get_irq_regs()->orig_ax;
        me = smp_processor_id();
-       if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
-               cpumask_t cleanup_mask;
+ #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+               *descp = desc = move_irq_desc(desc, me);
+               /* get the new one */
+               cfg = desc->chip_data;
+ #endif
  
-               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               cfg->move_in_progress = 0;
-       }
+       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+               send_cleanup_vector(cfg);
  }
  #else
- static inline void irq_complete_move(unsigned int irq) {}
+ static inline void irq_complete_move(struct irq_desc **descp) {}
  #endif
  #ifdef CONFIG_INTR_REMAP
  static void ack_x2apic_level(unsigned int irq)
  {
@@@ -2281,11 -2553,14 +2553,14 @@@ static void ack_x2apic_edge(unsigned in
  {
        ack_x2APIC_irq();
  }
  #endif
  
  static void ack_apic_edge(unsigned int irq)
  {
-       irq_complete_move(irq);
+       struct irq_desc *desc = irq_to_desc(irq);
+       irq_complete_move(&desc);
        move_native_irq(irq);
        ack_APIC_irq();
  }
@@@ -2294,18 -2569,21 +2569,21 @@@ atomic_t irq_mis_count
  
  static void ack_apic_level(unsigned int irq)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
  #ifdef CONFIG_X86_32
        unsigned long v;
        int i;
  #endif
+       struct irq_cfg *cfg;
        int do_unmask_irq = 0;
  
-       irq_complete_move(irq);
+       irq_complete_move(&desc);
  #ifdef CONFIG_GENERIC_PENDING_IRQ
        /* If we are moving the irq we need to mask it */
-       if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+       if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
                do_unmask_irq = 1;
-               mask_IO_APIC_irq(irq);
+               mask_IO_APIC_irq_desc(desc);
        }
  #endif
  
        * operation to prevent an edge-triggered interrupt escaping meanwhile.
        * The idea is from Manfred Spraul.  --macro
        */
-       i = irq_cfg(irq)->vector;
+       cfg = desc->chip_data;
+       i = cfg->vector;
  
        v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
  #endif
                 * accurate and is causing problems then it is a hardware bug
                 * and you can go talk to the chipset vendor about it.
                 */
-               if (!io_apic_level_ack_pending(irq))
+               cfg = desc->chip_data;
+               if (!io_apic_level_ack_pending(cfg))
                        move_masked_irq(irq);
-               unmask_IO_APIC_irq(irq);
+               unmask_IO_APIC_irq_desc(desc);
        }
  
  #ifdef CONFIG_X86_32
        if (!(v & (1 << (i & 0x1f)))) {
                atomic_inc(&irq_mis_count);
                spin_lock(&ioapic_lock);
-               __mask_and_edge_IO_APIC_irq(irq);
-               __unmask_and_level_IO_APIC_irq(irq);
+               __mask_and_edge_IO_APIC_irq(cfg);
+               __unmask_and_level_IO_APIC_irq(cfg);
                spin_unlock(&ioapic_lock);
        }
  #endif
@@@ -2429,20 -2709,19 +2709,19 @@@ static inline void init_IO_APIC_traps(v
         * Also, we've got to be careful not to trash gate
         * 0x80, because int 0x80 is hm, kind of importantish. ;)
         */
-       for_each_irq_cfg(irq, cfg) {
-               if (IO_APIC_IRQ(irq) && !cfg->vector) {
+       for_each_irq_desc(irq, desc) {
+               cfg = desc->chip_data;
+               if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
                        /*
                         * Hmm.. We don't have an entry for this,
                         * so default to an old-fashioned 8259
                         * interrupt if we can..
                         */
-                       if (irq < 16)
+                       if (irq < NR_IRQS_LEGACY)
                                make_8259A_irq(irq);
-                       else {
-                               desc = irq_to_desc(irq);
+                       else
                                /* Strange. Oh, well.. */
                                desc->chip = &no_irq_chip;
-                       }
                }
        }
  }
@@@ -2467,7 -2746,7 +2746,7 @@@ static void unmask_lapic_irq(unsigned i
        apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
  }
  
- static void ack_lapic_irq (unsigned int irq)
+ static void ack_lapic_irq(unsigned int irq)
  {
        ack_APIC_irq();
  }
@@@ -2479,11 -2758,8 +2758,8 @@@ static struct irq_chip lapic_chip __rea
        .ack            = ack_lapic_irq,
  };
  
- static void lapic_register_intr(int irq)
+ static void lapic_register_intr(int irq, struct irq_desc *desc)
  {
-       struct irq_desc *desc;
-       desc = irq_to_desc(irq);
        desc->status &= ~IRQ_LEVEL;
        set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
                                      "edge");
@@@ -2587,7 -2863,9 +2863,9 @@@ int timer_through_8259 __initdata
   */
  static inline void __init check_timer(void)
  {
-       struct irq_cfg *cfg = irq_cfg(0);
+       struct irq_desc *desc = irq_to_desc(0);
+       struct irq_cfg *cfg = desc->chip_data;
+       int cpu = boot_cpu_id;
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
        unsigned int ver;
         * get/set the timer IRQ vector:
         */
        disable_8259A_irq(0);
-       assign_irq_vector(0, TARGET_CPUS);
+       assign_irq_vector(0, cfg, TARGET_CPUS);
  
        /*
         * As IRQ0 is to be enabled in the 8259A, the virtual
                 * Ok, does IRQ0 through the IOAPIC work?
                 */
                if (no_pin1) {
-                       add_pin_to_irq(0, apic1, pin1);
+                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                }
-               unmask_IO_APIC_irq(0);
+               unmask_IO_APIC_irq_desc(desc);
                if (timer_irq_works()) {
                        if (nmi_watchdog == NMI_IO_APIC) {
                                setup_nmi();
                /*
                 * legacy devices should be connected to IO APIC #0
                 */
-               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
                setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-               unmask_IO_APIC_irq(0);
+               unmask_IO_APIC_irq_desc(desc);
                enable_8259A_irq(0);
                if (timer_irq_works()) {
                        apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
        apic_printk(APIC_QUIET, KERN_INFO
                    "...trying to set up timer as Virtual Wire IRQ...\n");
  
-       lapic_register_intr(0);
+       lapic_register_intr(0, desc);
        apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
        enable_8259A_irq(0);
  
@@@ -2901,22 -3179,26 +3179,26 @@@ unsigned int create_irq_nr(unsigned in
        unsigned int irq;
        unsigned int new;
        unsigned long flags;
-       struct irq_cfg *cfg_new;
-       irq_want = nr_irqs - 1;
+       struct irq_cfg *cfg_new = NULL;
+       int cpu = boot_cpu_id;
+       struct irq_desc *desc_new = NULL;
  
        irq = 0;
        spin_lock_irqsave(&vector_lock, flags);
-       for (new = irq_want; new > 0; new--) {
+       for (new = irq_want; new < NR_IRQS; new++) {
                if (platform_legacy_irq(new))
                        continue;
-               cfg_new = irq_cfg(new);
-               if (cfg_new && cfg_new->vector != 0)
+               desc_new = irq_to_desc_alloc_cpu(new, cpu);
+               if (!desc_new) {
+                       printk(KERN_INFO "can not get irq_desc for %d\n", new);
                        continue;
-               /* check if need to create one */
-               if (!cfg_new)
-                       cfg_new = irq_cfg_alloc(new);
-               if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+               }
+               cfg_new = desc_new->chip_data;
+               if (cfg_new->vector != 0)
+                       continue;
+               if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
                        irq = new;
                break;
        }
  
        if (irq > 0) {
                dynamic_irq_init(irq);
+               /* restore it, in case dynamic_irq_init clear it */
+               if (desc_new)
+                       desc_new->chip_data = cfg_new;
        }
        return irq;
  }
  
+ static int nr_irqs_gsi = NR_IRQS_LEGACY;
  int create_irq(void)
  {
+       unsigned int irq_want;
        int irq;
  
-       irq = create_irq_nr(nr_irqs - 1);
+       irq_want = nr_irqs_gsi;
+       irq = create_irq_nr(irq_want);
  
        if (irq == 0)
                irq = -1;
  void destroy_irq(unsigned int irq)
  {
        unsigned long flags;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
  
+       /* store it, in case dynamic_irq_cleanup clear it */
+       desc = irq_to_desc(irq);
+       cfg = desc->chip_data;
        dynamic_irq_cleanup(irq);
+       /* connect back irq_cfg */
+       if (desc)
+               desc->chip_data = cfg;
  
  #ifdef CONFIG_INTR_REMAP
        free_irte(irq);
  #endif
        spin_lock_irqsave(&vector_lock, flags);
-       __clear_irq_vector(irq);
+       __clear_irq_vector(irq, cfg);
        spin_unlock_irqrestore(&vector_lock, flags);
  }
  
@@@ -2963,16 -3259,13 +3259,13 @@@ static int msi_compose_msg(struct pci_d
        struct irq_cfg *cfg;
        int err;
        unsigned dest;
-       cpumask_t tmp;
  
-       tmp = TARGET_CPUS;
-       err = assign_irq_vector(irq, tmp);
+       cfg = irq_cfg(irq);
+       err = assign_irq_vector(irq, cfg, TARGET_CPUS);
        if (err)
                return err;
  
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, tmp);
-       dest = cpu_mask_to_apicid(tmp);
+       dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
  
  #ifdef CONFIG_INTR_REMAP
        if (irq_remapped(irq)) {
  }
  
  #ifdef CONFIG_SMP
- static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
-       cpumask_t tmp;
-       struct irq_desc *desc;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
                return;
  
-       if (assign_irq_vector(irq, mask))
-               return;
+       cfg = desc->chip_data;
  
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
-       read_msi_msg(irq, &msg);
+       read_msi_msg_desc(desc, &msg);
  
        msg.data &= ~MSI_DATA_VECTOR_MASK;
        msg.data |= MSI_DATA_VECTOR(cfg->vector);
        msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
-       write_msi_msg(irq, &msg);
-       desc = irq_to_desc(irq);
-       desc->affinity = mask;
+       write_msi_msg_desc(desc, &msg);
  }
  #ifdef CONFIG_INTR_REMAP
  /*
   * Migrate the MSI irq to another cpumask. This migration is
   * done in the process context using interrupt-remapping hardware.
   */
- static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+ static void
+ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
  {
-       struct irq_cfg *cfg;
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg = desc->chip_data;
        unsigned int dest;
-       cpumask_t tmp, cleanup_mask;
        struct irte irte;
-       struct irq_desc *desc;
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
-               return;
  
        if (get_irte(irq, &irte))
                return;
  
-       if (assign_irq_vector(irq, mask))
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
                return;
  
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
        irte.vector = cfg->vector;
        irte.dest_id = IRTE_DEST(dest);
  
         * at the new destination. So, time to cleanup the previous
         * vector allocation.
         */
-       if (cfg->move_in_progress) {
-               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
-               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
-               send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               cfg->move_in_progress = 0;
-       }
-       desc = irq_to_desc(irq);
-       desc->affinity = mask;
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
  }
  #endif
  #endif /* CONFIG_SMP */
  
@@@ -3165,7 -3436,7 +3436,7 @@@ static int msi_alloc_irte(struct pci_de
  }
  #endif
  
- static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
  {
        int ret;
        struct msi_msg msg;
        if (ret < 0)
                return ret;
  
-       set_irq_msi(irq, desc);
+       set_irq_msi(irq, msidesc);
        write_msi_msg(irq, &msg);
  
  #ifdef CONFIG_INTR_REMAP
        return 0;
  }
  
- static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
- {
-       unsigned int irq;
-       irq = dev->bus->number;
-       irq <<= 8;
-       irq |= dev->devfn;
-       irq <<= 12;
-       return irq;
- }
- int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
  {
        unsigned int irq;
        int ret;
        unsigned int irq_want;
  
-       irq_want = build_irq_for_pci_dev(dev) + 0x100;
+       irq_want = nr_irqs_gsi;
        irq = create_irq_nr(irq_want);
        if (irq == 0)
                return -1;
                goto error;
  no_ir:
  #endif
-       ret = setup_msi_irq(dev, desc, irq);
+       ret = setup_msi_irq(dev, msidesc, irq);
        if (ret < 0) {
                destroy_irq(irq);
                return ret;
@@@ -3245,7 -3503,7 +3503,7 @@@ int arch_setup_msi_irqs(struct pci_dev 
  {
        unsigned int irq;
        int ret, sub_handle;
-       struct msi_desc *desc;
+       struct msi_desc *msidesc;
        unsigned int irq_want;
  
  #ifdef CONFIG_INTR_REMAP
        int index = 0;
  #endif
  
-       irq_want = build_irq_for_pci_dev(dev) + 0x100;
+       irq_want = nr_irqs_gsi;
        sub_handle = 0;
-       list_for_each_entry(desc, &dev->msi_list, list) {
-               irq = create_irq_nr(irq_want--);
+       list_for_each_entry(msidesc, &dev->msi_list, list) {
+               irq = create_irq_nr(irq_want);
+               irq_want++;
                if (irq == 0)
                        return -1;
  #ifdef CONFIG_INTR_REMAP
                }
  no_ir:
  #endif
-               ret = setup_msi_irq(dev, desc, irq);
+               ret = setup_msi_irq(dev, msidesc, irq);
                if (ret < 0)
                        goto error;
                sub_handle++;
@@@ -3307,24 -3566,18 +3566,18 @@@ void arch_teardown_msi_irq(unsigned in
  
  #ifdef CONFIG_DMAR
  #ifdef CONFIG_SMP
- static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
-       cpumask_t tmp;
-       struct irq_desc *desc;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
                return;
  
-       if (assign_irq_vector(irq, mask))
-               return;
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
+       cfg = desc->chip_data;
  
        dmar_msi_read(irq, &msg);
  
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
        dmar_msi_write(irq, &msg);
-       desc = irq_to_desc(irq);
-       desc->affinity = mask;
  }
  #endif /* CONFIG_SMP */
  
  struct irq_chip dmar_msi_type = {
@@@ -3368,24 -3620,18 +3620,18 @@@ int arch_setup_dmar_msi(unsigned int ir
  #ifdef CONFIG_HPET_TIMER
  
  #ifdef CONFIG_SMP
- static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
-       struct irq_desc *desc;
        struct msi_msg msg;
        unsigned int dest;
-       cpumask_t tmp;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
                return;
  
-       if (assign_irq_vector(irq, mask))
-               return;
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
+       cfg = desc->chip_data;
  
        hpet_msi_read(irq, &msg);
  
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
        hpet_msi_write(irq, &msg);
-       desc = irq_to_desc(irq);
-       desc->affinity = mask;
  }
  #endif /* CONFIG_SMP */
  
  struct irq_chip hpet_msi_type = {
@@@ -3450,28 -3695,21 +3695,21 @@@ static void target_ht_irq(unsigned int 
        write_ht_irq_msg(irq, &msg);
  }
  
- static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
  {
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        unsigned int dest;
-       cpumask_t tmp;
-       struct irq_desc *desc;
  
-       cpus_and(tmp, mask, cpu_online_map);
-       if (cpus_empty(tmp))
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
                return;
  
-       if (assign_irq_vector(irq, mask))
-               return;
-       cfg = irq_cfg(irq);
-       cpus_and(tmp, cfg->domain, mask);
-       dest = cpu_mask_to_apicid(tmp);
+       cfg = desc->chip_data;
  
        target_ht_irq(irq, dest, cfg->vector);
-       desc = irq_to_desc(irq);
-       desc->affinity = mask;
  }
  #endif
  
  static struct irq_chip ht_irq_chip = {
@@@ -3489,17 -3727,14 +3727,14 @@@ int arch_setup_ht_irq(unsigned int irq
  {
        struct irq_cfg *cfg;
        int err;
-       cpumask_t tmp;
  
-       tmp = TARGET_CPUS;
-       err = assign_irq_vector(irq, tmp);
+       cfg = irq_cfg(irq);
+       err = assign_irq_vector(irq, cfg, TARGET_CPUS);
        if (!err) {
                struct ht_irq_msg msg;
                unsigned dest;
  
-               cfg = irq_cfg(irq);
-               cpus_and(tmp, cfg->domain, tmp);
-               dest = cpu_mask_to_apicid(tmp);
+               dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
  
                msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
  
  int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
                       unsigned long mmr_offset)
  {
-       const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+       const struct cpumask *eligible_cpu = cpumask_of(cpu);
        struct irq_cfg *cfg;
        int mmr_pnode;
        unsigned long mmr_value;
        unsigned long flags;
        int err;
  
-       err = assign_irq_vector(irq, *eligible_cpu);
+       cfg = irq_cfg(irq);
+       err = assign_irq_vector(irq, cfg, eligible_cpu);
        if (err != 0)
                return err;
  
                                      irq_name);
        spin_unlock_irqrestore(&vector_lock, flags);
  
-       cfg = irq_cfg(irq);
        mmr_value = 0;
        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
        BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
        entry->polarity = 0;
        entry->trigger = 0;
        entry->mask = 0;
-       entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+       entry->dest = cpu_mask_to_apicid(eligible_cpu);
  
        mmr_pnode = uv_blade_to_pnode(mmr_blade);
        uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@@ -3605,9 -3840,16 +3840,16 @@@ int __init io_apic_get_redir_entries (i
        return reg_01.bits.entries;
  }
  
int __init probe_nr_irqs(void)
void __init probe_nr_irqs_gsi(void)
  {
-       return NR_IRQS;
+       int idx;
+       int nr = 0;
+       for (idx = 0; idx < nr_ioapics; idx++)
+               nr += io_apic_get_redir_entries(idx) + 1;
+       if (nr > nr_irqs_gsi)
+               nr_irqs_gsi = nr;
  }
  
  /* --------------------------------------------------------------------------
@@@ -3706,19 -3948,31 +3948,31 @@@ int __init io_apic_get_version(int ioap
  
  int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
  {
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int cpu = boot_cpu_id;
        if (!IO_APIC_IRQ(irq)) {
                apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
                        ioapic);
                return -EINVAL;
        }
  
+       desc = irq_to_desc_alloc_cpu(irq, cpu);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc %d\n", irq);
+               return 0;
+       }
        /*
         * IRQs < 16 are already in the irq_2_pin[] map
         */
-       if (irq >= 16)
-               add_pin_to_irq(irq, ioapic, pin);
+       if (irq >= NR_IRQS_LEGACY) {
+               cfg = desc->chip_data;
+               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+       }
  
-       setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
  
        return 0;
  }
@@@ -3756,7 -4010,7 +4010,7 @@@ void __init setup_ioapic_dest(void
        int pin, ioapic, irq, irq_entry;
        struct irq_desc *desc;
        struct irq_cfg *cfg;
-       cpumask_t mask;
+       const struct cpumask *mask;
  
        if (skip_ioapic_setup == 1)
                return;
                         * when you have too many devices, because at that time only boot
                         * cpu is online.
                         */
-                       cfg = irq_cfg(irq);
+                       desc = irq_to_desc(irq);
+                       cfg = desc->chip_data;
                        if (!cfg->vector) {
-                               setup_IO_APIC_irq(ioapic, pin, irq,
+                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
                                                  irq_trigger(irq_entry),
                                                  irq_polarity(irq_entry));
                                continue;
                        /*
                         * Honour affinities which have been set in early boot
                         */
-                       desc = irq_to_desc(irq);
                        if (desc->status &
                            (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-                               mask = desc->affinity;
+                               mask = &desc->affinity;
                        else
                                mask = TARGET_CPUS;
  
  #ifdef CONFIG_INTR_REMAP
                        if (intr_remapping_enabled)
-                               set_ir_ioapic_affinity_irq(irq, mask);
+                               set_ir_ioapic_affinity_irq_desc(desc, mask);
                        else
  #endif
-                               set_ioapic_affinity_irq(irq, mask);
+                               set_ioapic_affinity_irq_desc(desc, mask);
                }
  
        }
@@@ -3845,7 -4099,6 +4099,6 @@@ void __init ioapic_init_mappings(void
        struct resource *ioapic_res;
        int i;
  
-       irq_2_pin_init();
        ioapic_res = ioapic_setup_resources();
        for (i = 0; i < nr_ioapics; i++) {
                if (smp_found_config) {
diff --combined arch/x86/kernel/reboot.c
index a90913cccfb7a1f577b78e30e0ade0ec9557c159,39643b1df061d5ae049e8814a0abf126a430f362..bf088c61fa40c07fc8616505a8cee640b8f35e1d
@@@ -12,7 -12,7 +12,8 @@@
  #include <asm/proto.h>
  #include <asm/reboot_fixups.h>
  #include <asm/reboot.h>
 +#include <asm/pci_x86.h>
+ #include <asm/virtext.h>
  
  #ifdef CONFIG_X86_32
  # include <linux/dmi.h>
@@@ -24,6 -24,7 +25,6 @@@
  
  #include <mach_ipi.h>
  
 -
  /*
   * Power off function, if any
   */
@@@ -39,6 -40,12 +40,12 @@@ int reboot_force
  static int reboot_cpu = -1;
  #endif
  
+ /* This is set if we need to go through the 'emergency' path.
+  * When machine_emergency_restart() is called, we may be on
+  * an inconsistent state and won't be able to do a clean cleanup
+  */
+ static int reboot_emergency;
  /* This is set by the PCI code if either type 1 or type 2 PCI is detected */
  bool port_cf9_safe = false;
  
@@@ -368,6 -375,48 +375,48 @@@ static inline void kb_wait(void
        }
  }
  
+ static void vmxoff_nmi(int cpu, struct die_args *args)
+ {
+       cpu_emergency_vmxoff();
+ }
+ /* Use NMIs as IPIs to tell all CPUs to disable virtualization
+  */
+ static void emergency_vmx_disable_all(void)
+ {
+       /* Just make sure we won't change CPUs while doing this */
+       local_irq_disable();
+       /* We need to disable VMX on all CPUs before rebooting, otherwise
+        * we risk hanging up the machine, because the CPU ignore INIT
+        * signals when VMX is enabled.
+        *
+        * We can't take any locks and we may be on an inconsistent
+        * state, so we use NMIs as IPIs to tell the other CPUs to disable
+        * VMX and halt.
+        *
+        * For safety, we will avoid running the nmi_shootdown_cpus()
+        * stuff unnecessarily, but we don't have a way to check
+        * if other CPUs have VMX enabled. So we will call it only if the
+        * CPU we are running on has VMX enabled.
+        *
+        * We will miss cases where VMX is not enabled on all CPUs. This
+        * shouldn't do much harm because KVM always enable VMX on all
+        * CPUs anyway. But we can miss it on the small window where KVM
+        * is still enabling VMX.
+        */
+       if (cpu_has_vmx() && cpu_vmx_enabled()) {
+               /* Disable VMX on this CPU.
+                */
+               cpu_vmxoff();
+               /* Halt and disable VMX on the other CPUs */
+               nmi_shootdown_cpus(vmxoff_nmi);
+       }
+ }
  void __attribute__((weak)) mach_reboot_fixups(void)
  {
  }
@@@ -376,6 -425,9 +425,9 @@@ static void native_machine_emergency_re
  {
        int i;
  
+       if (reboot_emergency)
+               emergency_vmx_disable_all();
        /* Tell the BIOS if we want cold or warm reboot */
        *((unsigned short *)__va(0x472)) = reboot_mode;
  
@@@ -482,13 -534,19 +534,19 @@@ void native_machine_shutdown(void
  #endif
  }
  
+ static void __machine_emergency_restart(int emergency)
+ {
+       reboot_emergency = emergency;
+       machine_ops.emergency_restart();
+ }
  static void native_machine_restart(char *__unused)
  {
        printk("machine restart\n");
  
        if (!reboot_force)
                machine_shutdown();
-       machine_emergency_restart();
+       __machine_emergency_restart(0);
  }
  
  static void native_machine_halt(void)
@@@ -532,7 -590,7 +590,7 @@@ void machine_shutdown(void
  
  void machine_emergency_restart(void)
  {
-       machine_ops.emergency_restart();
+       __machine_emergency_restart(1);
  }
  
  void machine_restart(char *cmd)
@@@ -592,10 -650,7 +650,7 @@@ static int crash_nmi_callback(struct no
  
  static void smp_send_nmi_allbutself(void)
  {
-       cpumask_t mask = cpu_online_map;
-       cpu_clear(safe_smp_processor_id(), mask);
-       if (!cpus_empty(mask))
-               send_IPI_mask(mask, NMI_VECTOR);
+       send_IPI_allbutself(NMI_VECTOR);
  }
  
  static struct notifier_block crash_nmi_nb = {
diff --combined arch/x86/kernel/traps.c
index 961e26a69d55e38bc5fe32034d420eaab98dd24e,2d1f4c7e40524ba7c3d0847583f0b5041ccdba30..ce6650eb64e976e120cbefa87adae9b4675e94ed
@@@ -72,9 -72,6 +72,6 @@@
  
  #include "cpu/mcheck/mce.h"
  
- DECLARE_BITMAP(used_vectors, NR_VECTORS);
- EXPORT_SYMBOL_GPL(used_vectors);
  asmlinkage int system_call(void);
  
  /* Do we ignore FPU interrupts ? */
@@@ -89,6 -86,9 +86,9 @@@ gate_desc idt_table[256
        __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
  #endif
  
+ DECLARE_BITMAP(used_vectors, NR_VECTORS);
+ EXPORT_SYMBOL_GPL(used_vectors);
  static int ignore_nmis;
  
  static inline void conditional_sti(struct pt_regs *regs)
@@@ -292,10 -292,8 +292,10 @@@ dotraplinkage void do_double_fault(stru
        tsk->thread.error_code = error_code;
        tsk->thread.trap_no = 8;
  
 -      /* This is always a kernel trap and never fixable (and thus must
 -         never return). */
 +      /*
 +       * This is always a kernel trap and never fixable (and thus must
 +       * never return).
 +       */
        for (;;)
                die(str, regs, error_code);
  }
@@@ -522,11 -520,9 +522,11 @@@ dotraplinkage void __kprobes do_int3(st
  }
  
  #ifdef CONFIG_X86_64
 -/* Help handler running on IST stack to switch back to user stack
 -   for scheduling or signal handling. The actual stack switch is done in
 -   entry.S */
 +/*
 + * Help handler running on IST stack to switch back to user stack
 + * for scheduling or signal handling. The actual stack switch is done in
 + * entry.S
 + */
  asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
  {
        struct pt_regs *regs = eregs;
        /* Exception from user space */
        else if (user_mode(eregs))
                regs = task_pt_regs(current);
 -      /* Exception from kernel and interrupts are enabled. Move to
 -         kernel process stack. */
 +      /*
 +       * Exception from kernel and interrupts are enabled. Move to
 +       * kernel process stack.
 +       */
        else if (eregs->flags & X86_EFLAGS_IF)
                regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
        if (eregs != regs)
@@@ -691,7 -685,12 +691,7 @@@ void math_error(void __user *ip
        cwd = get_fpu_cwd(task);
        swd = get_fpu_swd(task);
  
 -      err = swd & ~cwd & 0x3f;
 -
 -#ifdef CONFIG_X86_32
 -      if (!err)
 -              return;
 -#endif
 +      err = swd & ~cwd;
  
        if (err & 0x001) {      /* Invalid op */
                /*
        } else if (err & 0x020) { /* Precision */
                info.si_code = FPE_FLTRES;
        } else {
 -              info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
 +              /*
 +               * If we're using IRQ 13, or supposedly even some trap 16
 +               * implementations, it's possible we get a spurious trap...
 +               */
 +              return;         /* Spurious trap, no error */
        }
        force_sig_info(SIGFPE, &info, task);
  }
@@@ -946,9 -941,7 +946,7 @@@ dotraplinkage void do_iret_error(struc
  
  void __init trap_init(void)
  {
- #ifdef CONFIG_X86_32
        int i;
- #endif
  
  #ifdef CONFIG_EISA
        void __iomem *p = early_ioremap(0x0FFFD9, 4);
        }
  
        set_system_trap_gate(SYSCALL_VECTOR, &system_call);
+ #endif
  
        /* Reserve all the builtin and the syscall vector: */
        for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
                set_bit(i, used_vectors);
  
+ #ifdef CONFIG_X86_64
+       set_bit(IA32_SYSCALL_VECTOR, used_vectors);
+ #else
        set_bit(SYSCALL_VECTOR, used_vectors);
  #endif
        /*