if (i == 0) {
                seq_printf(p, "           ");
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
                seq_putc(p, '\n');
        }
 
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
                seq_printf(p, " %14s", irq_desc[i].handler->typename);
                seq_printf(p, "  %s", action->name);
 
        switch (i) {
        case 0:
                seq_printf(p, "           ");
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
 
                seq_putc(p, '\n');
                break;
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i - 1]);
 #endif
 
                level = group->sources[ix]->level - frv_irq_levels;
 
 {
        unsigned int i, supported_cpus = 0;
 
-       for (i=0; i<NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
+       for_each_cpu(i) {
                if (check_supported_cpu(i))
                        supported_cpus++;
        }
 
 {
        int i, j;
        Dprintk("Rotating IRQs among CPUs.\n");
-       for (i = 0; i < NR_CPUS; i++) {
-               for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+       for_each_online_cpu(i) {
+               for (j = 0; j < NR_IRQS; j++) {
                        if (!irq_desc[j].action)
                                continue;
                        /* Is it a significant load ?  */
        unsigned long imbalance = 0;
        cpumask_t allowed_mask, target_cpu_mask, tmp;
 
-       for (i = 0; i < NR_CPUS; i++) {
+       for_each_cpu(i) {
                int package_index;
                CPU_IRQ(i) = 0;
                if (!cpu_online(i))
                }
        }
        /* Find the least loaded processor package */
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
+       for_each_online_cpu(i) {
                if (i != CPU_TO_PACKAGEINDEX(i))
                        continue;
                if (min_cpu_irq > CPU_IRQ(i)) {
         */
        tmp_cpu_irq = 0;
        tmp_loaded = -1;
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
+       for_each_online_cpu(i) {
                if (i != CPU_TO_PACKAGEINDEX(i))
                        continue;
                if (max_cpu_irq <= CPU_IRQ(i)) 
        if (smp_num_siblings > 1 && !cpus_empty(tmp))
                physical_balance = 1;
 
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
+       for_each_online_cpu(i) {
                irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
                irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
                if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
        else 
                printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 failed:
-       for (i = 0; i < NR_CPUS; i++) {
+       for_each_cpu(i) {
                kfree(irq_cpu_data[i].irq_delta);
+               irq_cpu_data[i].irq_delta = NULL;
                kfree(irq_cpu_data[i].last_irq);
+               irq_cpu_data[i].last_irq = NULL;
        }
        return 0;
 }
 
        local_irq_enable();
        mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+       for_each_cpu(cpu) {
 #ifdef CONFIG_SMP
                /* Check cpu_callin_map here because that is set
                   after the timer is started. */
         * Just reset the alert counters, (other CPUs might be
         * spinning on locks we hold):
         */
-       for (i = 0; i < NR_CPUS; i++)
+       for_each_cpu(i)
                alert_counter[i] = 0;
 
        /*
 
 static void free_msrs(void)
 {
        int i;
-       for (i = 0; i < NR_CPUS; ++i) {
+       for_each_cpu(i) {
                kfree(cpu_msrs[i].counters);
                cpu_msrs[i].counters = NULL;
                kfree(cpu_msrs[i].controls);
        size_t counters_size = sizeof(struct op_msr) * model->num_counters;
 
        int i;
-       for (i = 0; i < NR_CPUS; ++i) {
-               if (!cpu_online(i))
-                       continue;
-
+       for_each_online_cpu(i) {
                cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
                if (!cpu_msrs[i].counters) {
                        success = 0;
 
 
        if (i == 0) {
                seq_printf(p, "           ");
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
                seq_putc(p, '\n');
        }
 
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
                seq_printf(p, " %14s", irq_desc[i].handler->typename);
                seq_printf(p, "  %s", action->name);
 
 
        if (i == 0) {
                seq_printf(p, "           ");
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
                seq_putc(p, '\n');
        }
 
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
                seq_printf(p, " %14s", irq_desc[i].handler->typename);
                seq_printf(p, "  %s", action->name);
 
        mb();
 
        /* Send a message to all other CPUs and wait for them to respond */
-       for (i = 0; i < NR_CPUS; i++)
-               if (cpu_online(i) && i != cpu)
+       for_each_online_cpu(i)
+               if (i != cpu)
                        core_send_ipi(i, SMP_CALL_FUNCTION);
 
        /* Wait for response */
 
 {
        int cpu, i;
 
-       for (cpu = 0; cpu <= NR_CPUS; cpu++) {
+       for_each_online_cpu(cpu) {
                struct slice_data *si = cpu_data[cpu].data;
 
-               if (!cpu_online(cpu))
-                       continue;
-
                for (i = BASE_PCI_IRQ; i < LEVELS_PER_SLICE; i++)
                        if (si->level_to_irq[i] == irq) {
                                *cpunum = cpu;
 
 {
        int i;
        
-       for (i = 0; i < NR_CPUS; i++) {
-               if (cpu_online(i) && i != smp_processor_id())
+       for_each_online_cpu(i) {
+               if (i != smp_processor_id())
                        send_IPI_single(i, op);
        }
 }
        if ( argc == 1 ){
        
 #ifdef DUMP_MORE_STATE
-               for(i=0; i<NR_CPUS; i++) {
+               for_each_online_cpu(i) {
                        int cpus_per_line = 4;
-                       if(cpu_online(i)) {
-                               if (j++ % cpus_per_line)
-                                       printk(" %3d",i);
-                               else
-                                       printk("\n %3d",i);
-                       }
+
+                       if (j++ % cpus_per_line)
+                               printk(" %3d",i);
+                       else
+                               printk("\n %3d",i);
                }
                printk("\n"); 
 #else
        } else if((argc==2) && !(strcmp(argv[1],"-l"))) {
                printk("\nCPUSTATE  TASK CPUNUM CPUID HARDCPU(HPA)\n");
 #ifdef DUMP_MORE_STATE
-               for(i=0;i<NR_CPUS;i++) {
-                       if (!cpu_online(i))
-                               continue;
+               for_each_online_cpu(i) {
                        if (cpu_data[i].cpuid != NO_PROC_ID) {
                                switch(cpu_data[i].state) {
                                        case STATE_RENDEZVOUS:
        } else if ((argc==2) && !(strcmp(argv[1],"-s"))) { 
 #ifdef DUMP_MORE_STATE
                printk("\nCPUSTATE   CPUID\n");
-               for (i=0;i<NR_CPUS;i++) {
-                       if (!cpu_online(i))
-                               continue;
+               for_each_online_cpu(i) {
                        if (cpu_data[i].cpuid != NO_PROC_ID) {
                                switch(cpu_data[i].state) {
                                        case STATE_RENDEZVOUS:
 
 #ifdef CONFIG_TAU_INT
                if (tau_initialized){
                        seq_puts(p, "TAU: ");
-                       for (j = 0; j < NR_CPUS; j++)
-                               if (cpu_online(j))
-                                       seq_printf(p, "%10u ", tau_interrupts(j));
+                       for_each_online_cpu(j)
+                               seq_printf(p, "%10u ", tau_interrupts(j));
                        seq_puts(p, "  PowerPC             Thermal Assist (cpu temp)\n");
                }
 #endif
 
 #if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
                unsigned long bogosum = 0;
                int i;
-               for (i = 0; i < NR_CPUS; ++i)
-                       if (cpu_online(i))
-                               bogosum += loops_per_jiffy;
+               for_each_online_cpu(i)
+                       bogosum += loops_per_jiffy;
                seq_printf(m, "total bogomips\t: %lu.%02lu\n",
                           bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
 #endif /* CONFIG_SMP && CONFIG_PPC32 */
 
        if ( ppc_md.progress ) ppc_md.progress("             ", 0xffff);
 
        /* register CPU devices */
-       for (i = 0; i < NR_CPUS; i++)
-               if (cpu_possible(i))
-                       register_cpu(&cpu_devices[i], i, NULL);
+       for_each_cpu(i)
+               register_cpu(&cpu_devices[i], i, NULL);
 
        /* call platform init */
        if (ppc_md.init != NULL) {
 
        if (num_online_cpus() < 2)
                return;
 
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
+       for_each_online_cpu(i) {
                if (target == MSG_ALL
                    || (target == MSG_ALL_BUT_SELF && i != smp_processor_id())
                    || target == i) {
 
                /* Show summary information */
 #ifdef CONFIG_SMP
                unsigned long bogosum = 0;
-               for (i = 0; i < NR_CPUS; ++i)
-                       if (cpu_online(i))
-                               bogosum += cpu_data[i].loops_per_jiffy;
+               for_each_online_cpu(i)
+                       bogosum += cpu_data[i].loops_per_jiffy;
                seq_printf(m, "total bogomips\t: %lu.%02lu\n",
                           bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
 #endif /* CONFIG_SMP */
        if ( ppc_md.progress ) ppc_md.progress("             ", 0xffff);
 
        /* register CPU devices */
-       for (i = 0; i < NR_CPUS; i++)
-               if (cpu_possible(i))
-                       register_cpu(&cpu_devices[i], i, NULL);
+       for_each_cpu(i)
+               register_cpu(&cpu_devices[i], i, NULL);
 
        /* call platform init */
        if (ppc_md.init != NULL) {
 
          */
        print_cpu_info(&S390_lowcore.cpu_data);
 
-        for(i = 0; i < NR_CPUS; i++) {
-               if (!cpu_possible(i))
-                       continue;
+        for_each_cpu(i) {
                lowcore_ptr[i] = (struct _lowcore *)
                        __get_free_pages(GFP_KERNEL|GFP_DMA, 
                                        sizeof(void*) == 8 ? 1 : 0);
 
 
        if (i == 0) {
                seq_puts(p, "           ");
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
                seq_putc(p, '\n');
        }
 
 
 {
        int cpu_id;
 
-       for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++)
-               if (cpu_possible(cpu_id))
-                       register_cpu(&cpu[cpu_id], cpu_id, NULL);
+       for_each_cpu(cpu_id)
+               register_cpu(&cpu[cpu_id], cpu_id, NULL);
 
        return 0;
 }
 
 
        if (i == 0) {
                seq_puts(p, "           ");
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
                seq_putc(p, '\n');
        }
 
 
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j = 0; j < NR_CPUS; j++) {
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ",
+               for_each_online_cpu(j) {
+                       seq_printf(p, "%10u ",
                                    kstat_cpu(cpu_logical_map(j)).irqs[i]);
                }
 #endif
 
                return -EINVAL;
 
        spin_lock_irqsave(&prof_setup_lock, flags);
-       for(i = 0; i < NR_CPUS; i++) {
-               if (cpu_possible(i))
-                       load_profile_irq(i, lvl14_resolution / multiplier);
+       for_each_cpu(i) {
+               load_profile_irq(i, lvl14_resolution / multiplier);
                prof_multiplier(i) = multiplier;
        }
        spin_unlock_irqrestore(&prof_setup_lock, flags);
 {
        int i;
        
-       for (i = 0; i < NR_CPUS; i++) {
-               if (cpu_online(i))
-                       seq_printf(m,
-                                  "Cpu%dBogo\t: %lu.%02lu\n", 
-                                  i,
-                                  cpu_data(i).udelay_val/(500000/HZ),
-                                  (cpu_data(i).udelay_val/(5000/HZ))%100);
+       for_each_online_cpu(i) {
+               seq_printf(m,
+                          "Cpu%dBogo\t: %lu.%02lu\n",
+                          i,
+                          cpu_data(i).udelay_val/(500000/HZ),
+                          (cpu_data(i).udelay_val/(5000/HZ))%100);
        }
 }
 
        int i;
 
        seq_printf(m, "State:\n");
-       for (i = 0; i < NR_CPUS; i++) {
-               if (cpu_online(i))
-                       seq_printf(m, "CPU%d\t\t: online\n", i);
-       }
+       for_each_online_cpu(i)
+               seq_printf(m, "CPU%d\t\t: online\n", i);
 }
 
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (x = 0; x < NR_CPUS; x++) {
-                       if (cpu_online(x))
-                               seq_printf(p, "%10u ",
-                                      kstat_cpu(cpu_logical_map(x)).irqs[i]);
-               }
+               for_each_online_cpu(x)
+                       seq_printf(p, "%10u ",
+                              kstat_cpu(cpu_logical_map(x)).irqs[i]);
 #endif
                seq_printf(p, "%c %s",
                        (action->flags & SA_INTERRUPT) ? '+' : ' ',
 
        } else {
                unsigned long bogosum = 0;
                
-               for(i = 0; i < NR_CPUS; i++) {
-                       if (cpu_isset(i, cpu_present_map)) {
-                               bogosum += cpu_data(i).udelay_val;
-                               smp_highest_cpu = i;
-                       }
+               for_each_present_cpu(i) {
+                       bogosum += cpu_data(i).udelay_val;
+                       smp_highest_cpu = i;
                }
                SMP_PRINTK(("Total of %d Processors activated (%lu.%02lu BogoMIPS).\n", cpucount + 1, bogosum/(500000/HZ), (bogosum/(5000/HZ))%100));
                printk("Total of %d Processors activated (%lu.%02lu BogoMIPS).\n",
 
                cpu_present_map = cpumask_of_cpu(smp_processor_id());
        } else {
                unsigned long bogosum = 0;
-               for(i = 0; i < NR_CPUS; i++) {
-                       if (cpu_isset(i, cpu_present_map))
-                               bogosum += cpu_data(i).udelay_val;
-               }
+               for_each_present_cpu(i)
+                       bogosum += cpu_data(i).udelay_val;
                printk("Total of %d Processors activated (%lu.%02lu BogoMIPS).\n",
                       cpucount + 1,
                       bogosum/(500000/HZ),
 
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j = 0; j < NR_CPUS; j++) {
-                       if (!cpu_online(j))
-                               continue;
+               for_each_online_cpu(j) {
                        seq_printf(p, "%10u ",
                                   kstat_cpu(j).irqs[i]);
                }
 
        int i;
        
        seq_printf(m, "State:\n");
-       for (i = 0; i < NR_CPUS; i++) {
-               if (cpu_online(i))
-                       seq_printf(m,
-                                  "CPU%d:\t\tonline\n", i);
-       }
+       for_each_online_cpu(i)
+               seq_printf(m, "CPU%d:\t\tonline\n", i);
 }
 
 void smp_bogo(struct seq_file *m)
 {
        int i;
        
-       for (i = 0; i < NR_CPUS; i++)
-               if (cpu_online(i))
-                       seq_printf(m,
-                                  "Cpu%dBogo\t: %lu.%02lu\n"
-                                  "Cpu%dClkTck\t: %016lx\n",
-                                  i, cpu_data(i).udelay_val / (500000/HZ),
-                                  (cpu_data(i).udelay_val / (5000/HZ)) % 100,
-                                  i, cpu_data(i).clock_tick);
+       for_each_online_cpu(i)
+               seq_printf(m,
+                          "Cpu%dBogo\t: %lu.%02lu\n"
+                          "Cpu%dClkTck\t: %016lx\n",
+                          i, cpu_data(i).udelay_val / (500000/HZ),
+                          (cpu_data(i).udelay_val / (5000/HZ)) % 100,
+                          i, cpu_data(i).clock_tick);
 }
 
 void __init smp_store_cpu_info(int id)
                return -EINVAL;
 
        spin_lock_irqsave(&prof_setup_lock, flags);
-       for (i = 0; i < NR_CPUS; i++)
+       for_each_cpu(i)
                prof_multiplier(i) = multiplier;
        current_tick_offset = (timer_tick_offset / multiplier);
        spin_unlock_irqrestore(&prof_setup_lock, flags);
        unsigned long bogosum = 0;
        int i;
 
-       for (i = 0; i < NR_CPUS; i++) {
-               if (cpu_online(i))
-                       bogosum += cpu_data(i).udelay_val;
-       }
+       for_each_online_cpu(i)
+               bogosum += cpu_data(i).udelay_val;
        printk("Total of %ld processors activated "
               "(%lu.%02lu BogoMIPS).\n",
               (long) num_online_cpus(),
 
 
        if (i == 0) {
                seq_printf(p, "           ");
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
                seq_putc(p, '\n');
        }
 
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                       seq_printf(p, "%10u ",
-                               kstat_cpu(j).irqs[i]);
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
                seq_printf(p, " %14s", irq_desc[i].handler->typename);
 
                spin_unlock_irqrestore(&irq_desc[i].lock, flags);
        } else if (i == NR_IRQS) {
                seq_printf(p, "NMI: ");
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
                seq_putc(p, '\n');
 #ifdef CONFIG_X86_LOCAL_APIC
                seq_printf(p, "LOC: ");
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
                seq_putc(p, '\n');
 #endif
                seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
 
        local_irq_enable();
        mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
-               if (!cpu_online(cpu))
-                       continue;
+       for_each_online_cpu(cpu) {
                if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
                        endflag = 1;
                        printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
 
 
        if (i == 0) {
                seq_printf(p, "           ");
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
                seq_putc(p, '\n');
        }
 
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
                seq_printf(p, " %14s", irq_desc[i].handler->typename);
                seq_printf(p, "  %s", action->name);
                spin_unlock_irqrestore(&irq_desc[i].lock, flags);
        } else if (i == NR_IRQS) {
                seq_printf(p, "NMI: ");
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", nmi_count(j));
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", nmi_count(j));
                seq_putc(p, '\n');
                seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
        }
 
 
        memset(stats, 0, sizeof(struct net_device_stats));
 
-       for (i=0; i < NR_CPUS; i++) {
+       for_each_cpu(i) {
                struct net_device_stats *lb_stats;
 
-               if (!cpu_possible(i)) 
-                       continue;
                lb_stats = &per_cpu(loopback_stats, i);
                stats->rx_bytes   += lb_stats->rx_bytes;
                stats->tx_bytes   += lb_stats->tx_bytes;
 
 {
        int i;
  
-       for_each_online_cpu(i) {
+       for_each_online_cpu(i)
                vfree(cpu_buffer[i].buffer);
-       }
 }
 
 int alloc_cpu_buffers(void)
 
                while (j < xstats[i].endpoint) {
                        val = 0;
                        /* sum over all cpus */
-                       for (c = 0; c < NR_CPUS; c++) {
-                               if (!cpu_possible(c)) continue;
+                       for_each_cpu(c)
                                val += *(((__u32*)&per_cpu(xfsstats, c) + j));
-                       }
                        len += sprintf(buffer + len, " %u", val);
                        j++;
                }
                buffer[len++] = '\n';
        }
        /* extra precision counters */
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_possible(i)) continue;
+       for_each_cpu(i) {
                xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
                xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
                xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
 
 
        if (!ret && write && *valp) {
                printk("XFS Clearing xfsstats\n");
-               for (c = 0; c < NR_CPUS; c++) {
-                       if (!cpu_possible(c)) continue;
+               for_each_cpu(c) {
                        preempt_disable();
                        /* save vn_active, it's a universal truth! */
                        vn_active = per_cpu(xfsstats, c).vn_active;
 
 {
        int i;
 
-       for (i = 0; i < NR_CPUS; i++)
-               if (cpu_online(i))
-                       mm->context[i] = 0;
+       for_each_online_cpu(i)
+               mm->context[i] = 0;
        if (tsk != current)
                task_thread_info(tsk)->pcb.ptbr
                  = ((unsigned long)mm->pgd - IDENT_ADDR) >> PAGE_SHIFT;
 
        cpumask_t node_cpu_mask = CPU_MASK_NONE;
        int cpu;
 
-       for(cpu = 0; cpu < NR_CPUS; cpu++) {
-               if (cpu_online(cpu) && (cpu_to_node(cpu) == node))
+       for_each_online_cpu(cpu) {
+               if (cpu_to_node(cpu) == node)
                        cpu_set(cpu, node_cpu_mask);
        }
 
 
 #define percpu_modcopy(pcpudst, src, size)                     \
 do {                                                           \
        unsigned int __i;                                       \
-       for (__i = 0; __i < NR_CPUS; __i++)                     \
-               if (cpu_possible(__i))                          \
-                       memcpy((pcpudst)+__per_cpu_offset[__i], \
-                              (src), (size));                  \
+       for_each_cpu(__i)                                       \
+               memcpy((pcpudst)+__per_cpu_offset[__i],         \
+                      (src), (size));                          \
 } while (0)
 #else /* ! SMP */
 
 
 #define percpu_modcopy(pcpudst, src, size)                     \
 do {                                                           \
        unsigned int __i;                                       \
-       for (__i = 0; __i < NR_CPUS; __i++)                     \
-               if (cpu_possible(__i))                          \
-                       memcpy((pcpudst)+__per_cpu_offset(__i), \
-                              (src), (size));                  \
+       for_each_cpu(__i)                                       \
+               memcpy((pcpudst)+__per_cpu_offset(__i),         \
+                      (src), (size));                          \
 } while (0)
 
 extern void setup_per_cpu_areas(void);
 
 #define percpu_modcopy(pcpudst, src, size)                     \
 do {                                                           \
        unsigned int __i;                                       \
-       for (__i = 0; __i < NR_CPUS; __i++)                     \
-               if (cpu_possible(__i))                          \
-                       memcpy((pcpudst)+__per_cpu_offset[__i], \
-                              (src), (size));                  \
+       for_each_cpu(__i)                                       \
+               memcpy((pcpudst)+__per_cpu_offset[__i],         \
+                      (src), (size));                          \
 } while (0)
 
 #else /* ! SMP */
 
 #define percpu_modcopy(pcpudst, src, size)                     \
 do {                                                           \
        unsigned int __i;                                       \
-       for (__i = 0; __i < NR_CPUS; __i++)                     \
-               if (cpu_possible(__i))                          \
-                       memcpy((pcpudst)+__per_cpu_offset(__i), \
-                              (src), (size));                  \
+       for_each_cpu(__i)                                       \
+               memcpy((pcpudst)+__per_cpu_offset(__i),         \
+                      (src), (size));                          \
 } while (0)
 #else /* ! SMP */
 
 
 #define percpu_modcopy(pcpudst, src, size)                     \
 do {                                                           \
        unsigned int __i;                                       \
-       for (__i = 0; __i < NR_CPUS; __i++)                     \
-               if (cpu_possible(__i))                          \
-                       memcpy((pcpudst)+__per_cpu_offset(__i), \
-                              (src), (size));                  \
+       for_each_cpu(__i)                                       \
+               memcpy((pcpudst)+__per_cpu_offset(__i),         \
+                      (src), (size));                          \
 } while (0)
 
 extern void setup_per_cpu_areas(void);
 
 ({                                                                     \
        typeof(gendiskp->dkstats->field) res = 0;                       \
        int i;                                                          \
-       for (i=0; i < NR_CPUS; i++) {                                   \
-               if (!cpu_possible(i))                                   \
-                       continue;                                       \
+       for_each_cpu(i)                                                 \
                res += per_cpu_ptr(gendiskp->dkstats, i)->field;        \
-       }                                                               \
        res;                                                            \
 })
 
 static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)      {
        int i;
-       for (i=0; i < NR_CPUS; i++) {
-               if (cpu_possible(i)) {
-                       memset(per_cpu_ptr(gendiskp->dkstats, i), value,        
-                                       sizeof (struct disk_stats));
-               }
-       }
+       for_each_cpu(i)
+               memset(per_cpu_ptr(gendiskp->dkstats, i), value,
+                               sizeof (struct disk_stats));
 }              
                                
 #else