]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/s390/kernel/smp.c
Merge branch 'linus' into cpus4096
[linux-2.6-omap-h63xx.git] / arch / s390 / kernel / smp.c
1 /*
2  *  arch/s390/kernel/smp.c
3  *
4  *    Copyright IBM Corp. 1999,2007
5  *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
6  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
7  *               Heiko Carstens (heiko.carstens@de.ibm.com)
8  *
9  *  based on other smp stuff by
10  *    (c) 1995 Alan Cox, CymruNET Ltd  <alan@cymru.net>
11  *    (c) 1998 Ingo Molnar
12  *
13  * We work with logical cpu numbering everywhere we can. The only
14  * functions using the real cpu address (got from STAP) are the sigp
15  * functions. For all other functions we use the identity mapping.
16  * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is
17  * used e.g. to find the idle task belonging to a logical cpu. Every array
18  * in the kernel is sorted by the logical cpu number and not by the physical
19  * one which is causing all the confusion with __cpu_logical_map and
20  * cpu_number_map in other architectures.
21  */
22
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/mm.h>
26 #include <linux/err.h>
27 #include <linux/spinlock.h>
28 #include <linux/kernel_stat.h>
29 #include <linux/delay.h>
30 #include <linux/cache.h>
31 #include <linux/interrupt.h>
32 #include <linux/cpu.h>
33 #include <linux/timex.h>
34 #include <linux/bootmem.h>
35 #include <asm/ipl.h>
36 #include <asm/setup.h>
37 #include <asm/sigp.h>
38 #include <asm/pgalloc.h>
39 #include <asm/irq.h>
40 #include <asm/s390_ext.h>
41 #include <asm/cpcmd.h>
42 #include <asm/tlbflush.h>
43 #include <asm/timer.h>
44 #include <asm/lowcore.h>
45 #include <asm/sclp.h>
46 #include <asm/cpu.h>
47 #include "entry.h"
48
49 /*
50  * An array with a pointer the lowcore of every CPU.
51  */
52 struct _lowcore *lowcore_ptr[NR_CPUS];
53 EXPORT_SYMBOL(lowcore_ptr);
54
55 static struct task_struct *current_set[NR_CPUS];
56
57 static u8 smp_cpu_type;
58 static int smp_use_sigp_detection;
59
60 enum s390_cpu_state {
61         CPU_STATE_STANDBY,
62         CPU_STATE_CONFIGURED,
63 };
64
65 DEFINE_MUTEX(smp_cpu_state_mutex);
66 int smp_cpu_polarization[NR_CPUS];
67 static int smp_cpu_state[NR_CPUS];
68 static int cpu_management;
69
70 static DEFINE_PER_CPU(struct cpu, cpu_devices);
71
72 static void smp_ext_bitcall(int, ec_bit_sig);
73
74 /*
75  * Structure and data for __smp_call_function_map(). This is designed to
76  * minimise static memory requirements. It also looks cleaner.
77  */
78 static DEFINE_SPINLOCK(call_lock);
79
80 struct call_data_struct {
81         void (*func) (void *info);
82         void *info;
83         cpumask_t started;
84         cpumask_t finished;
85         int wait;
86 };
87
88 static struct call_data_struct *call_data;
89
90 /*
91  * 'Call function' interrupt callback
92  */
93 static void do_call_function(void)
94 {
95         void (*func) (void *info) = call_data->func;
96         void *info = call_data->info;
97         int wait = call_data->wait;
98
99         cpu_set(smp_processor_id(), call_data->started);
100         (*func)(info);
101         if (wait)
102                 cpu_set(smp_processor_id(), call_data->finished);;
103 }
104
105 static void __smp_call_function_map(void (*func) (void *info), void *info,
106                                     int wait, cpumask_t map)
107 {
108         struct call_data_struct data;
109         int cpu, local = 0;
110
111         /*
112          * Can deadlock when interrupts are disabled or if in wrong context.
113          */
114         WARN_ON(irqs_disabled() || in_irq());
115
116         /*
117          * Check for local function call. We have to have the same call order
118          * as in on_each_cpu() because of machine_restart_smp().
119          */
120         if (cpu_isset(smp_processor_id(), map)) {
121                 local = 1;
122                 cpu_clear(smp_processor_id(), map);
123         }
124
125         cpus_and(map, map, cpu_online_map);
126         if (cpus_empty(map))
127                 goto out;
128
129         data.func = func;
130         data.info = info;
131         data.started = CPU_MASK_NONE;
132         data.wait = wait;
133         if (wait)
134                 data.finished = CPU_MASK_NONE;
135
136         call_data = &data;
137
138         for_each_cpu_mask(cpu, map)
139                 smp_ext_bitcall(cpu, ec_call_function);
140
141         /* Wait for response */
142         while (!cpus_equal(map, data.started))
143                 cpu_relax();
144         if (wait)
145                 while (!cpus_equal(map, data.finished))
146                         cpu_relax();
147 out:
148         if (local) {
149                 local_irq_disable();
150                 func(info);
151                 local_irq_enable();
152         }
153 }
154
155 /*
156  * smp_call_function:
157  * @func: the function to run; this must be fast and non-blocking
158  * @info: an arbitrary pointer to pass to the function
159  * @wait: if true, wait (atomically) until function has completed on other CPUs
160  *
161  * Run a function on all other CPUs.
162  *
163  * You must not call this function with disabled interrupts, from a
164  * hardware interrupt handler or from a bottom half.
165  */
166 int smp_call_function(void (*func) (void *info), void *info, int wait)
167 {
168         cpumask_t map;
169
170         spin_lock(&call_lock);
171         map = cpu_online_map;
172         cpu_clear(smp_processor_id(), map);
173         __smp_call_function_map(func, info, wait, map);
174         spin_unlock(&call_lock);
175         return 0;
176 }
177 EXPORT_SYMBOL(smp_call_function);
178
179 /*
180  * smp_call_function_single:
181  * @cpu: the CPU where func should run
182  * @func: the function to run; this must be fast and non-blocking
183  * @info: an arbitrary pointer to pass to the function
184  * @wait: if true, wait (atomically) until function has completed on other CPUs
185  *
186  * Run a function on one processor.
187  *
188  * You must not call this function with disabled interrupts, from a
189  * hardware interrupt handler or from a bottom half.
190  */
191 int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
192                              int wait)
193 {
194         spin_lock(&call_lock);
195         __smp_call_function_map(func, info, wait, cpumask_of_cpu(cpu));
196         spin_unlock(&call_lock);
197         return 0;
198 }
199 EXPORT_SYMBOL(smp_call_function_single);
200
201 /**
202  * smp_call_function_mask(): Run a function on a set of other CPUs.
203  * @mask: The set of cpus to run on.  Must not include the current cpu.
204  * @func: The function to run. This must be fast and non-blocking.
205  * @info: An arbitrary pointer to pass to the function.
206  * @wait: If true, wait (atomically) until function has completed on other CPUs.
207  *
208  * Returns 0 on success, else a negative status code.
209  *
210  * If @wait is true, then returns once @func has returned; otherwise
211  * it returns just before the target cpu calls @func.
212  *
213  * You must not call this function with disabled interrupts or from a
214  * hardware interrupt handler or from a bottom half handler.
215  */
216 int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
217                            int wait)
218 {
219         spin_lock(&call_lock);
220         cpu_clear(smp_processor_id(), mask);
221         __smp_call_function_map(func, info, wait, mask);
222         spin_unlock(&call_lock);
223         return 0;
224 }
225 EXPORT_SYMBOL(smp_call_function_mask);
226
227 void smp_send_stop(void)
228 {
229         int cpu, rc;
230
231         /* Disable all interrupts/machine checks */
232         __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK);
233
234         /* write magic number to zero page (absolute 0) */
235         lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC;
236
237         /* stop all processors */
238         for_each_online_cpu(cpu) {
239                 if (cpu == smp_processor_id())
240                         continue;
241                 do {
242                         rc = signal_processor(cpu, sigp_stop);
243                 } while (rc == sigp_busy);
244
245                 while (!smp_cpu_not_running(cpu))
246                         cpu_relax();
247         }
248 }
249
250 /*
251  * This is the main routine where commands issued by other
252  * cpus are handled.
253  */
254
255 static void do_ext_call_interrupt(__u16 code)
256 {
257         unsigned long bits;
258
259         /*
260          * handle bit signal external calls
261          *
262          * For the ec_schedule signal we have to do nothing. All the work
263          * is done automatically when we return from the interrupt.
264          */
265         bits = xchg(&S390_lowcore.ext_call_fast, 0);
266
267         if (test_bit(ec_call_function, &bits))
268                 do_call_function();
269 }
270
271 /*
272  * Send an external call sigp to another cpu and return without waiting
273  * for its completion.
274  */
275 static void smp_ext_bitcall(int cpu, ec_bit_sig sig)
276 {
277         /*
278          * Set signaling bit in lowcore of target cpu and kick it
279          */
280         set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast);
281         while (signal_processor(cpu, sigp_emergency_signal) == sigp_busy)
282                 udelay(10);
283 }
284
285 #ifndef CONFIG_64BIT
286 /*
287  * this function sends a 'purge tlb' signal to another CPU.
288  */
289 static void smp_ptlb_callback(void *info)
290 {
291         __tlb_flush_local();
292 }
293
294 void smp_ptlb_all(void)
295 {
296         on_each_cpu(smp_ptlb_callback, NULL, 1);
297 }
298 EXPORT_SYMBOL(smp_ptlb_all);
299 #endif /* ! CONFIG_64BIT */
300
301 /*
302  * this function sends a 'reschedule' IPI to another CPU.
303  * it goes straight through and wastes no time serializing
304  * anything. Worst case is that we lose a reschedule ...
305  */
306 void smp_send_reschedule(int cpu)
307 {
308         smp_ext_bitcall(cpu, ec_schedule);
309 }
310
311 /*
312  * parameter area for the set/clear control bit callbacks
313  */
314 struct ec_creg_mask_parms {
315         unsigned long orvals[16];
316         unsigned long andvals[16];
317 };
318
319 /*
320  * callback for setting/clearing control bits
321  */
322 static void smp_ctl_bit_callback(void *info)
323 {
324         struct ec_creg_mask_parms *pp = info;
325         unsigned long cregs[16];
326         int i;
327
328         __ctl_store(cregs, 0, 15);
329         for (i = 0; i <= 15; i++)
330                 cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i];
331         __ctl_load(cregs, 0, 15);
332 }
333
334 /*
335  * Set a bit in a control register of all cpus
336  */
337 void smp_ctl_set_bit(int cr, int bit)
338 {
339         struct ec_creg_mask_parms parms;
340
341         memset(&parms.orvals, 0, sizeof(parms.orvals));
342         memset(&parms.andvals, 0xff, sizeof(parms.andvals));
343         parms.orvals[cr] = 1 << bit;
344         on_each_cpu(smp_ctl_bit_callback, &parms, 1);
345 }
346 EXPORT_SYMBOL(smp_ctl_set_bit);
347
348 /*
349  * Clear a bit in a control register of all cpus
350  */
351 void smp_ctl_clear_bit(int cr, int bit)
352 {
353         struct ec_creg_mask_parms parms;
354
355         memset(&parms.orvals, 0, sizeof(parms.orvals));
356         memset(&parms.andvals, 0xff, sizeof(parms.andvals));
357         parms.andvals[cr] = ~(1L << bit);
358         on_each_cpu(smp_ctl_bit_callback, &parms, 1);
359 }
360 EXPORT_SYMBOL(smp_ctl_clear_bit);
361
362 /*
363  * In early ipl state a temp. logically cpu number is needed, so the sigp
364  * functions can be used to sense other cpus. Since NR_CPUS is >= 2 on
365  * CONFIG_SMP and the ipl cpu is logical cpu 0, it must be 1.
366  */
367 #define CPU_INIT_NO     1
368
369 #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
370
371 /*
372  * zfcpdump_prefix_array holds prefix registers for the following scenario:
373  * 64 bit zfcpdump kernel and 31 bit kernel which is to be dumped. We have to
374  * save its prefix registers, since they get lost, when switching from 31 bit
375  * to 64 bit.
376  */
377 unsigned int zfcpdump_prefix_array[NR_CPUS + 1] \
378         __attribute__((__section__(".data")));
379
380 static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu)
381 {
382         if (ipl_info.type != IPL_TYPE_FCP_DUMP)
383                 return;
384         if (cpu >= NR_CPUS) {
385                 printk(KERN_WARNING "Registers for cpu %i not saved since dump "
386                        "kernel was compiled with NR_CPUS=%i\n", cpu, NR_CPUS);
387                 return;
388         }
389         zfcpdump_save_areas[cpu] = kmalloc(sizeof(union save_area), GFP_KERNEL);
390         __cpu_logical_map[CPU_INIT_NO] = (__u16) phy_cpu;
391         while (signal_processor(CPU_INIT_NO, sigp_stop_and_store_status) ==
392                sigp_busy)
393                 cpu_relax();
394         memcpy(zfcpdump_save_areas[cpu],
395                (void *)(unsigned long) store_prefix() + SAVE_AREA_BASE,
396                SAVE_AREA_SIZE);
397 #ifdef CONFIG_64BIT
398         /* copy original prefix register */
399         zfcpdump_save_areas[cpu]->s390x.pref_reg = zfcpdump_prefix_array[cpu];
400 #endif
401 }
402
403 union save_area *zfcpdump_save_areas[NR_CPUS + 1];
404 EXPORT_SYMBOL_GPL(zfcpdump_save_areas);
405
406 #else
407
408 static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { }
409
410 #endif /* CONFIG_ZFCPDUMP || CONFIG_ZFCPDUMP_MODULE */
411
412 static int cpu_stopped(int cpu)
413 {
414         __u32 status;
415
416         /* Check for stopped state */
417         if (signal_processor_ps(&status, 0, cpu, sigp_sense) ==
418             sigp_status_stored) {
419                 if (status & 0x40)
420                         return 1;
421         }
422         return 0;
423 }
424
425 static int cpu_known(int cpu_id)
426 {
427         int cpu;
428
429         for_each_present_cpu(cpu) {
430                 if (__cpu_logical_map[cpu] == cpu_id)
431                         return 1;
432         }
433         return 0;
434 }
435
436 static int smp_rescan_cpus_sigp(cpumask_t avail)
437 {
438         int cpu_id, logical_cpu;
439
440         logical_cpu = first_cpu(avail);
441         if (logical_cpu == NR_CPUS)
442                 return 0;
443         for (cpu_id = 0; cpu_id <= 65535; cpu_id++) {
444                 if (cpu_known(cpu_id))
445                         continue;
446                 __cpu_logical_map[logical_cpu] = cpu_id;
447                 smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
448                 if (!cpu_stopped(logical_cpu))
449                         continue;
450                 cpu_set(logical_cpu, cpu_present_map);
451                 smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
452                 logical_cpu = next_cpu(logical_cpu, avail);
453                 if (logical_cpu == NR_CPUS)
454                         break;
455         }
456         return 0;
457 }
458
459 static int smp_rescan_cpus_sclp(cpumask_t avail)
460 {
461         struct sclp_cpu_info *info;
462         int cpu_id, logical_cpu, cpu;
463         int rc;
464
465         logical_cpu = first_cpu(avail);
466         if (logical_cpu == NR_CPUS)
467                 return 0;
468         info = kmalloc(sizeof(*info), GFP_KERNEL);
469         if (!info)
470                 return -ENOMEM;
471         rc = sclp_get_cpu_info(info);
472         if (rc)
473                 goto out;
474         for (cpu = 0; cpu < info->combined; cpu++) {
475                 if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type)
476                         continue;
477                 cpu_id = info->cpu[cpu].address;
478                 if (cpu_known(cpu_id))
479                         continue;
480                 __cpu_logical_map[logical_cpu] = cpu_id;
481                 smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
482                 cpu_set(logical_cpu, cpu_present_map);
483                 if (cpu >= info->configured)
484                         smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
485                 else
486                         smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
487                 logical_cpu = next_cpu(logical_cpu, avail);
488                 if (logical_cpu == NR_CPUS)
489                         break;
490         }
491 out:
492         kfree(info);
493         return rc;
494 }
495
496 static int __smp_rescan_cpus(void)
497 {
498         cpumask_t avail;
499
500         cpus_xor(avail, cpu_possible_map, cpu_present_map);
501         if (smp_use_sigp_detection)
502                 return smp_rescan_cpus_sigp(avail);
503         else
504                 return smp_rescan_cpus_sclp(avail);
505 }
506
507 static void __init smp_detect_cpus(void)
508 {
509         unsigned int cpu, c_cpus, s_cpus;
510         struct sclp_cpu_info *info;
511         u16 boot_cpu_addr, cpu_addr;
512
513         c_cpus = 1;
514         s_cpus = 0;
515         boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr;
516         info = kmalloc(sizeof(*info), GFP_KERNEL);
517         if (!info)
518                 panic("smp_detect_cpus failed to allocate memory\n");
519         /* Use sigp detection algorithm if sclp doesn't work. */
520         if (sclp_get_cpu_info(info)) {
521                 smp_use_sigp_detection = 1;
522                 for (cpu = 0; cpu <= 65535; cpu++) {
523                         if (cpu == boot_cpu_addr)
524                                 continue;
525                         __cpu_logical_map[CPU_INIT_NO] = cpu;
526                         if (!cpu_stopped(CPU_INIT_NO))
527                                 continue;
528                         smp_get_save_area(c_cpus, cpu);
529                         c_cpus++;
530                 }
531                 goto out;
532         }
533
534         if (info->has_cpu_type) {
535                 for (cpu = 0; cpu < info->combined; cpu++) {
536                         if (info->cpu[cpu].address == boot_cpu_addr) {
537                                 smp_cpu_type = info->cpu[cpu].type;
538                                 break;
539                         }
540                 }
541         }
542
543         for (cpu = 0; cpu < info->combined; cpu++) {
544                 if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type)
545                         continue;
546                 cpu_addr = info->cpu[cpu].address;
547                 if (cpu_addr == boot_cpu_addr)
548                         continue;
549                 __cpu_logical_map[CPU_INIT_NO] = cpu_addr;
550                 if (!cpu_stopped(CPU_INIT_NO)) {
551                         s_cpus++;
552                         continue;
553                 }
554                 smp_get_save_area(c_cpus, cpu_addr);
555                 c_cpus++;
556         }
557 out:
558         kfree(info);
559         printk(KERN_INFO "CPUs: %d configured, %d standby\n", c_cpus, s_cpus);
560         get_online_cpus();
561         __smp_rescan_cpus();
562         put_online_cpus();
563 }
564
565 /*
566  *      Activate a secondary processor.
567  */
568 int __cpuinit start_secondary(void *cpuvoid)
569 {
570         /* Setup the cpu */
571         cpu_init();
572         preempt_disable();
573         /* Enable TOD clock interrupts on the secondary cpu. */
574         init_cpu_timer();
575 #ifdef CONFIG_VIRT_TIMER
576         /* Enable cpu timer interrupts on the secondary cpu. */
577         init_cpu_vtimer();
578 #endif
579         /* Enable pfault pseudo page faults on this cpu. */
580         pfault_init();
581
582         /* call cpu notifiers */
583         notify_cpu_starting(smp_processor_id());
584         /* Mark this cpu as online */
585         spin_lock(&call_lock);
586         cpu_set(smp_processor_id(), cpu_online_map);
587         spin_unlock(&call_lock);
588         /* Switch on interrupts */
589         local_irq_enable();
590         /* Print info about this processor */
591         print_cpu_info(&S390_lowcore.cpu_data);
592         /* cpu_idle will call schedule for us */
593         cpu_idle();
594         return 0;
595 }
596
597 static void __init smp_create_idle(unsigned int cpu)
598 {
599         struct task_struct *p;
600
601         /*
602          *  don't care about the psw and regs settings since we'll never
603          *  reschedule the forked task.
604          */
605         p = fork_idle(cpu);
606         if (IS_ERR(p))
607                 panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
608         current_set[cpu] = p;
609 }
610
611 static int __cpuinit smp_alloc_lowcore(int cpu)
612 {
613         unsigned long async_stack, panic_stack;
614         struct _lowcore *lowcore;
615         int lc_order;
616
617         lc_order = sizeof(long) == 8 ? 1 : 0;
618         lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
619         if (!lowcore)
620                 return -ENOMEM;
621         async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
622         panic_stack = __get_free_page(GFP_KERNEL);
623         if (!panic_stack || !async_stack)
624                 goto out;
625         memcpy(lowcore, &S390_lowcore, 512);
626         memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512);
627         lowcore->async_stack = async_stack + ASYNC_SIZE;
628         lowcore->panic_stack = panic_stack + PAGE_SIZE;
629
630 #ifndef CONFIG_64BIT
631         if (MACHINE_HAS_IEEE) {
632                 unsigned long save_area;
633
634                 save_area = get_zeroed_page(GFP_KERNEL);
635                 if (!save_area)
636                         goto out_save_area;
637                 lowcore->extended_save_area_addr = (u32) save_area;
638         }
639 #endif
640         lowcore_ptr[cpu] = lowcore;
641         return 0;
642
643 #ifndef CONFIG_64BIT
644 out_save_area:
645         free_page(panic_stack);
646 #endif
647 out:
648         free_pages(async_stack, ASYNC_ORDER);
649         free_pages((unsigned long) lowcore, lc_order);
650         return -ENOMEM;
651 }
652
653 #ifdef CONFIG_HOTPLUG_CPU
654 static void smp_free_lowcore(int cpu)
655 {
656         struct _lowcore *lowcore;
657         int lc_order;
658
659         lc_order = sizeof(long) == 8 ? 1 : 0;
660         lowcore = lowcore_ptr[cpu];
661 #ifndef CONFIG_64BIT
662         if (MACHINE_HAS_IEEE)
663                 free_page((unsigned long) lowcore->extended_save_area_addr);
664 #endif
665         free_page(lowcore->panic_stack - PAGE_SIZE);
666         free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
667         free_pages((unsigned long) lowcore, lc_order);
668         lowcore_ptr[cpu] = NULL;
669 }
670 #endif /* CONFIG_HOTPLUG_CPU */
671
672 /* Upping and downing of CPUs */
673 int __cpuinit __cpu_up(unsigned int cpu)
674 {
675         struct task_struct *idle;
676         struct _lowcore *cpu_lowcore;
677         struct stack_frame *sf;
678         sigp_ccode ccode;
679
680         if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED)
681                 return -EIO;
682         if (smp_alloc_lowcore(cpu))
683                 return -ENOMEM;
684
685         ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]),
686                                    cpu, sigp_set_prefix);
687         if (ccode) {
688                 printk("sigp_set_prefix failed for cpu %d "
689                        "with condition code %d\n",
690                        (int) cpu, (int) ccode);
691                 return -EIO;
692         }
693
694         idle = current_set[cpu];
695         cpu_lowcore = lowcore_ptr[cpu];
696         cpu_lowcore->kernel_stack = (unsigned long)
697                 task_stack_page(idle) + THREAD_SIZE;
698         cpu_lowcore->thread_info = (unsigned long) task_thread_info(idle);
699         sf = (struct stack_frame *) (cpu_lowcore->kernel_stack
700                                      - sizeof(struct pt_regs)
701                                      - sizeof(struct stack_frame));
702         memset(sf, 0, sizeof(struct stack_frame));
703         sf->gprs[9] = (unsigned long) sf;
704         cpu_lowcore->save_area[15] = (unsigned long) sf;
705         __ctl_store(cpu_lowcore->cregs_save_area, 0, 15);
706         asm volatile(
707                 "       stam    0,15,0(%0)"
708                 : : "a" (&cpu_lowcore->access_regs_save_area) : "memory");
709         cpu_lowcore->percpu_offset = __per_cpu_offset[cpu];
710         cpu_lowcore->current_task = (unsigned long) idle;
711         cpu_lowcore->cpu_data.cpu_nr = cpu;
712         cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce;
713         cpu_lowcore->ipl_device = S390_lowcore.ipl_device;
714         eieio();
715
716         while (signal_processor(cpu, sigp_restart) == sigp_busy)
717                 udelay(10);
718
719         while (!cpu_online(cpu))
720                 cpu_relax();
721         return 0;
722 }
723
724 static int __init setup_possible_cpus(char *s)
725 {
726         int pcpus, cpu;
727
728         pcpus = simple_strtoul(s, NULL, 0);
729         cpu_possible_map = cpumask_of_cpu(0);
730         for (cpu = 1; cpu < pcpus && cpu < NR_CPUS; cpu++)
731                 cpu_set(cpu, cpu_possible_map);
732         return 0;
733 }
734 early_param("possible_cpus", setup_possible_cpus);
735
736 #ifdef CONFIG_HOTPLUG_CPU
737
738 int __cpu_disable(void)
739 {
740         struct ec_creg_mask_parms cr_parms;
741         int cpu = smp_processor_id();
742
743         cpu_clear(cpu, cpu_online_map);
744
745         /* Disable pfault pseudo page faults on this cpu. */
746         pfault_fini();
747
748         memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals));
749         memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals));
750
751         /* disable all external interrupts */
752         cr_parms.orvals[0] = 0;
753         cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 12 |
754                                 1 << 11 | 1 << 10 | 1 <<  6 | 1 <<  4);
755         /* disable all I/O interrupts */
756         cr_parms.orvals[6] = 0;
757         cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 |
758                                 1 << 27 | 1 << 26 | 1 << 25 | 1 << 24);
759         /* disable most machine checks */
760         cr_parms.orvals[14] = 0;
761         cr_parms.andvals[14] = ~(1 << 28 | 1 << 27 | 1 << 26 |
762                                  1 << 25 | 1 << 24);
763
764         smp_ctl_bit_callback(&cr_parms);
765
766         return 0;
767 }
768
769 void __cpu_die(unsigned int cpu)
770 {
771         /* Wait until target cpu is down */
772         while (!smp_cpu_not_running(cpu))
773                 cpu_relax();
774         smp_free_lowcore(cpu);
775         printk(KERN_INFO "Processor %d spun down\n", cpu);
776 }
777
778 void cpu_die(void)
779 {
780         idle_task_exit();
781         signal_processor(smp_processor_id(), sigp_stop);
782         BUG();
783         for (;;);
784 }
785
786 #endif /* CONFIG_HOTPLUG_CPU */
787
788 void __init smp_prepare_cpus(unsigned int max_cpus)
789 {
790 #ifndef CONFIG_64BIT
791         unsigned long save_area = 0;
792 #endif
793         unsigned long async_stack, panic_stack;
794         struct _lowcore *lowcore;
795         unsigned int cpu;
796         int lc_order;
797
798         smp_detect_cpus();
799
800         /* request the 0x1201 emergency signal external interrupt */
801         if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
802                 panic("Couldn't request external interrupt 0x1201");
803         print_cpu_info(&S390_lowcore.cpu_data);
804
805         /* Reallocate current lowcore, but keep its contents. */
806         lc_order = sizeof(long) == 8 ? 1 : 0;
807         lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
808         panic_stack = __get_free_page(GFP_KERNEL);
809         async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
810 #ifndef CONFIG_64BIT
811         if (MACHINE_HAS_IEEE)
812                 save_area = get_zeroed_page(GFP_KERNEL);
813 #endif
814         local_irq_disable();
815         local_mcck_disable();
816         lowcore_ptr[smp_processor_id()] = lowcore;
817         *lowcore = S390_lowcore;
818         lowcore->panic_stack = panic_stack + PAGE_SIZE;
819         lowcore->async_stack = async_stack + ASYNC_SIZE;
820 #ifndef CONFIG_64BIT
821         if (MACHINE_HAS_IEEE)
822                 lowcore->extended_save_area_addr = (u32) save_area;
823 #endif
824         set_prefix((u32)(unsigned long) lowcore);
825         local_mcck_enable();
826         local_irq_enable();
827         for_each_possible_cpu(cpu)
828                 if (cpu != smp_processor_id())
829                         smp_create_idle(cpu);
830 }
831
832 void __init smp_prepare_boot_cpu(void)
833 {
834         BUG_ON(smp_processor_id() != 0);
835
836         current_thread_info()->cpu = 0;
837         cpu_set(0, cpu_present_map);
838         cpu_set(0, cpu_online_map);
839         S390_lowcore.percpu_offset = __per_cpu_offset[0];
840         current_set[0] = current;
841         smp_cpu_state[0] = CPU_STATE_CONFIGURED;
842         smp_cpu_polarization[0] = POLARIZATION_UNKNWN;
843 }
844
845 void __init smp_cpus_done(unsigned int max_cpus)
846 {
847 }
848
849 /*
850  * the frequency of the profiling timer can be changed
851  * by writing a multiplier value into /proc/profile.
852  *
853  * usually you want to run this on all CPUs ;)
854  */
855 int setup_profiling_timer(unsigned int multiplier)
856 {
857         return 0;
858 }
859
860 #ifdef CONFIG_HOTPLUG_CPU
861 static ssize_t cpu_configure_show(struct sys_device *dev,
862                                 struct sysdev_attribute *attr, char *buf)
863 {
864         ssize_t count;
865
866         mutex_lock(&smp_cpu_state_mutex);
867         count = sprintf(buf, "%d\n", smp_cpu_state[dev->id]);
868         mutex_unlock(&smp_cpu_state_mutex);
869         return count;
870 }
871
872 static ssize_t cpu_configure_store(struct sys_device *dev,
873                                   struct sysdev_attribute *attr,
874                                   const char *buf, size_t count)
875 {
876         int cpu = dev->id;
877         int val, rc;
878         char delim;
879
880         if (sscanf(buf, "%d %c", &val, &delim) != 1)
881                 return -EINVAL;
882         if (val != 0 && val != 1)
883                 return -EINVAL;
884
885         get_online_cpus();
886         mutex_lock(&smp_cpu_state_mutex);
887         rc = -EBUSY;
888         if (cpu_online(cpu))
889                 goto out;
890         rc = 0;
891         switch (val) {
892         case 0:
893                 if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) {
894                         rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]);
895                         if (!rc) {
896                                 smp_cpu_state[cpu] = CPU_STATE_STANDBY;
897                                 smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
898                         }
899                 }
900                 break;
901         case 1:
902                 if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) {
903                         rc = sclp_cpu_configure(__cpu_logical_map[cpu]);
904                         if (!rc) {
905                                 smp_cpu_state[cpu] = CPU_STATE_CONFIGURED;
906                                 smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
907                         }
908                 }
909                 break;
910         default:
911                 break;
912         }
913 out:
914         mutex_unlock(&smp_cpu_state_mutex);
915         put_online_cpus();
916         return rc ? rc : count;
917 }
918 static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
919 #endif /* CONFIG_HOTPLUG_CPU */
920
921 static ssize_t cpu_polarization_show(struct sys_device *dev,
922                                      struct sysdev_attribute *attr, char *buf)
923 {
924         int cpu = dev->id;
925         ssize_t count;
926
927         mutex_lock(&smp_cpu_state_mutex);
928         switch (smp_cpu_polarization[cpu]) {
929         case POLARIZATION_HRZ:
930                 count = sprintf(buf, "horizontal\n");
931                 break;
932         case POLARIZATION_VL:
933                 count = sprintf(buf, "vertical:low\n");
934                 break;
935         case POLARIZATION_VM:
936                 count = sprintf(buf, "vertical:medium\n");
937                 break;
938         case POLARIZATION_VH:
939                 count = sprintf(buf, "vertical:high\n");
940                 break;
941         default:
942                 count = sprintf(buf, "unknown\n");
943                 break;
944         }
945         mutex_unlock(&smp_cpu_state_mutex);
946         return count;
947 }
948 static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL);
949
950 static ssize_t show_cpu_address(struct sys_device *dev,
951                                 struct sysdev_attribute *attr, char *buf)
952 {
953         return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
954 }
955 static SYSDEV_ATTR(address, 0444, show_cpu_address, NULL);
956
957
958 static struct attribute *cpu_common_attrs[] = {
959 #ifdef CONFIG_HOTPLUG_CPU
960         &attr_configure.attr,
961 #endif
962         &attr_address.attr,
963         &attr_polarization.attr,
964         NULL,
965 };
966
967 static struct attribute_group cpu_common_attr_group = {
968         .attrs = cpu_common_attrs,
969 };
970
971 static ssize_t show_capability(struct sys_device *dev,
972                                 struct sysdev_attribute *attr, char *buf)
973 {
974         unsigned int capability;
975         int rc;
976
977         rc = get_cpu_capability(&capability);
978         if (rc)
979                 return rc;
980         return sprintf(buf, "%u\n", capability);
981 }
982 static SYSDEV_ATTR(capability, 0444, show_capability, NULL);
983
984 static ssize_t show_idle_count(struct sys_device *dev,
985                                 struct sysdev_attribute *attr, char *buf)
986 {
987         struct s390_idle_data *idle;
988         unsigned long long idle_count;
989
990         idle = &per_cpu(s390_idle, dev->id);
991         spin_lock_irq(&idle->lock);
992         idle_count = idle->idle_count;
993         spin_unlock_irq(&idle->lock);
994         return sprintf(buf, "%llu\n", idle_count);
995 }
996 static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
997
998 static ssize_t show_idle_time(struct sys_device *dev,
999                                 struct sysdev_attribute *attr, char *buf)
1000 {
1001         struct s390_idle_data *idle;
1002         unsigned long long new_time;
1003
1004         idle = &per_cpu(s390_idle, dev->id);
1005         spin_lock_irq(&idle->lock);
1006         if (idle->in_idle) {
1007                 new_time = get_clock();
1008                 idle->idle_time += new_time - idle->idle_enter;
1009                 idle->idle_enter = new_time;
1010         }
1011         new_time = idle->idle_time;
1012         spin_unlock_irq(&idle->lock);
1013         return sprintf(buf, "%llu\n", new_time >> 12);
1014 }
1015 static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
1016
1017 static struct attribute *cpu_online_attrs[] = {
1018         &attr_capability.attr,
1019         &attr_idle_count.attr,
1020         &attr_idle_time_us.attr,
1021         NULL,
1022 };
1023
1024 static struct attribute_group cpu_online_attr_group = {
1025         .attrs = cpu_online_attrs,
1026 };
1027
1028 static int __cpuinit smp_cpu_notify(struct notifier_block *self,
1029                                     unsigned long action, void *hcpu)
1030 {
1031         unsigned int cpu = (unsigned int)(long)hcpu;
1032         struct cpu *c = &per_cpu(cpu_devices, cpu);
1033         struct sys_device *s = &c->sysdev;
1034         struct s390_idle_data *idle;
1035
1036         switch (action) {
1037         case CPU_ONLINE:
1038         case CPU_ONLINE_FROZEN:
1039                 idle = &per_cpu(s390_idle, cpu);
1040                 spin_lock_irq(&idle->lock);
1041                 idle->idle_enter = 0;
1042                 idle->idle_time = 0;
1043                 idle->idle_count = 0;
1044                 spin_unlock_irq(&idle->lock);
1045                 if (sysfs_create_group(&s->kobj, &cpu_online_attr_group))
1046                         return NOTIFY_BAD;
1047                 break;
1048         case CPU_DEAD:
1049         case CPU_DEAD_FROZEN:
1050                 sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
1051                 break;
1052         }
1053         return NOTIFY_OK;
1054 }
1055
1056 static struct notifier_block __cpuinitdata smp_cpu_nb = {
1057         .notifier_call = smp_cpu_notify,
1058 };
1059
1060 static int __devinit smp_add_present_cpu(int cpu)
1061 {
1062         struct cpu *c = &per_cpu(cpu_devices, cpu);
1063         struct sys_device *s = &c->sysdev;
1064         int rc;
1065
1066         c->hotpluggable = 1;
1067         rc = register_cpu(c, cpu);
1068         if (rc)
1069                 goto out;
1070         rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
1071         if (rc)
1072                 goto out_cpu;
1073         if (!cpu_online(cpu))
1074                 goto out;
1075         rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
1076         if (!rc)
1077                 return 0;
1078         sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
1079 out_cpu:
1080 #ifdef CONFIG_HOTPLUG_CPU
1081         unregister_cpu(c);
1082 #endif
1083 out:
1084         return rc;
1085 }
1086
1087 #ifdef CONFIG_HOTPLUG_CPU
1088
1089 int __ref smp_rescan_cpus(void)
1090 {
1091         cpumask_t newcpus;
1092         int cpu;
1093         int rc;
1094
1095         get_online_cpus();
1096         mutex_lock(&smp_cpu_state_mutex);
1097         newcpus = cpu_present_map;
1098         rc = __smp_rescan_cpus();
1099         if (rc)
1100                 goto out;
1101         cpus_andnot(newcpus, cpu_present_map, newcpus);
1102         for_each_cpu_mask(cpu, newcpus) {
1103                 rc = smp_add_present_cpu(cpu);
1104                 if (rc)
1105                         cpu_clear(cpu, cpu_present_map);
1106         }
1107         rc = 0;
1108 out:
1109         mutex_unlock(&smp_cpu_state_mutex);
1110         put_online_cpus();
1111         if (!cpus_empty(newcpus))
1112                 topology_schedule_update();
1113         return rc;
1114 }
1115
1116 static ssize_t __ref rescan_store(struct sysdev_class *class, const char *buf,
1117                                   size_t count)
1118 {
1119         int rc;
1120
1121         rc = smp_rescan_cpus();
1122         return rc ? rc : count;
1123 }
1124 static SYSDEV_CLASS_ATTR(rescan, 0200, NULL, rescan_store);
1125 #endif /* CONFIG_HOTPLUG_CPU */
1126
1127 static ssize_t dispatching_show(struct sysdev_class *class, char *buf)
1128 {
1129         ssize_t count;
1130
1131         mutex_lock(&smp_cpu_state_mutex);
1132         count = sprintf(buf, "%d\n", cpu_management);
1133         mutex_unlock(&smp_cpu_state_mutex);
1134         return count;
1135 }
1136
1137 static ssize_t dispatching_store(struct sysdev_class *dev, const char *buf,
1138                                  size_t count)
1139 {
1140         int val, rc;
1141         char delim;
1142
1143         if (sscanf(buf, "%d %c", &val, &delim) != 1)
1144                 return -EINVAL;
1145         if (val != 0 && val != 1)
1146                 return -EINVAL;
1147         rc = 0;
1148         get_online_cpus();
1149         mutex_lock(&smp_cpu_state_mutex);
1150         if (cpu_management == val)
1151                 goto out;
1152         rc = topology_set_cpu_management(val);
1153         if (!rc)
1154                 cpu_management = val;
1155 out:
1156         mutex_unlock(&smp_cpu_state_mutex);
1157         put_online_cpus();
1158         return rc ? rc : count;
1159 }
1160 static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show,
1161                          dispatching_store);
1162
1163 static int __init topology_init(void)
1164 {
1165         int cpu;
1166         int rc;
1167
1168         register_cpu_notifier(&smp_cpu_nb);
1169
1170 #ifdef CONFIG_HOTPLUG_CPU
1171         rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_rescan);
1172         if (rc)
1173                 return rc;
1174 #endif
1175         rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_dispatching);
1176         if (rc)
1177                 return rc;
1178         for_each_present_cpu(cpu) {
1179                 rc = smp_add_present_cpu(cpu);
1180                 if (rc)
1181                         return rc;
1182         }
1183         return 0;
1184 }
1185 subsys_initcall(topology_init);