]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/x86_64/kernel/process.c
[PATCH] Add the canary field to the PDA area and the task struct
[linux-2.6-omap-h63xx.git] / arch / x86_64 / kernel / process.c
1 /*
2  *  linux/arch/x86-64/kernel/process.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Pentium III FXSR, SSE support
7  *      Gareth Hughes <gareth@valinux.com>, May 2000
8  * 
9  *  X86-64 port
10  *      Andi Kleen.
11  *
12  *      CPU hotplug support - ashok.raj@intel.com
13  */
14
15 /*
16  * This file handles the architecture-dependent parts of process handling..
17  */
18
19 #include <stdarg.h>
20
21 #include <linux/cpu.h>
22 #include <linux/errno.h>
23 #include <linux/sched.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/elfcore.h>
27 #include <linux/smp.h>
28 #include <linux/slab.h>
29 #include <linux/user.h>
30 #include <linux/module.h>
31 #include <linux/a.out.h>
32 #include <linux/interrupt.h>
33 #include <linux/delay.h>
34 #include <linux/ptrace.h>
35 #include <linux/utsname.h>
36 #include <linux/random.h>
37 #include <linux/notifier.h>
38 #include <linux/kprobes.h>
39
40 #include <asm/uaccess.h>
41 #include <asm/pgtable.h>
42 #include <asm/system.h>
43 #include <asm/io.h>
44 #include <asm/processor.h>
45 #include <asm/i387.h>
46 #include <asm/mmu_context.h>
47 #include <asm/pda.h>
48 #include <asm/prctl.h>
49 #include <asm/kdebug.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54
55 asmlinkage extern void ret_from_fork(void);
56
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61
62 /*
63  * Powermanagement idle function, if any..
64  */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
69 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
70
71 void idle_notifier_register(struct notifier_block *n)
72 {
73         atomic_notifier_chain_register(&idle_notifier, n);
74 }
75 EXPORT_SYMBOL_GPL(idle_notifier_register);
76
77 void idle_notifier_unregister(struct notifier_block *n)
78 {
79         atomic_notifier_chain_unregister(&idle_notifier, n);
80 }
81 EXPORT_SYMBOL(idle_notifier_unregister);
82
83 enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
84 static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
85
86 void enter_idle(void)
87 {
88         __get_cpu_var(idle_state) = CPU_IDLE;
89         atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
90 }
91
92 static void __exit_idle(void)
93 {
94         __get_cpu_var(idle_state) = CPU_NOT_IDLE;
95         atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
96 }
97
98 /* Called from interrupts to signify idle end */
99 void exit_idle(void)
100 {
101         if (current->pid | read_pda(irqcount))
102                 return;
103         __exit_idle();
104 }
105
106 /*
107  * We use this if we don't have any better
108  * idle routine..
109  */
110 static void default_idle(void)
111 {
112         local_irq_enable();
113
114         current_thread_info()->status &= ~TS_POLLING;
115         smp_mb__after_clear_bit();
116         while (!need_resched()) {
117                 local_irq_disable();
118                 if (!need_resched())
119                         safe_halt();
120                 else
121                         local_irq_enable();
122         }
123         current_thread_info()->status |= TS_POLLING;
124 }
125
126 /*
127  * On SMP it's slightly faster (but much more power-consuming!)
128  * to poll the ->need_resched flag instead of waiting for the
129  * cross-CPU IPI to arrive. Use this option with caution.
130  */
131 static void poll_idle (void)
132 {
133         local_irq_enable();
134
135         asm volatile(
136                 "2:"
137                 "testl %0,%1;"
138                 "rep; nop;"
139                 "je 2b;"
140                 : :
141                 "i" (_TIF_NEED_RESCHED),
142                 "m" (current_thread_info()->flags));
143 }
144
145 void cpu_idle_wait(void)
146 {
147         unsigned int cpu, this_cpu = get_cpu();
148         cpumask_t map;
149
150         set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
151         put_cpu();
152
153         cpus_clear(map);
154         for_each_online_cpu(cpu) {
155                 per_cpu(cpu_idle_state, cpu) = 1;
156                 cpu_set(cpu, map);
157         }
158
159         __get_cpu_var(cpu_idle_state) = 0;
160
161         wmb();
162         do {
163                 ssleep(1);
164                 for_each_online_cpu(cpu) {
165                         if (cpu_isset(cpu, map) &&
166                                         !per_cpu(cpu_idle_state, cpu))
167                                 cpu_clear(cpu, map);
168                 }
169                 cpus_and(map, map, cpu_online_map);
170         } while (!cpus_empty(map));
171 }
172 EXPORT_SYMBOL_GPL(cpu_idle_wait);
173
174 #ifdef CONFIG_HOTPLUG_CPU
175 DECLARE_PER_CPU(int, cpu_state);
176
177 #include <asm/nmi.h>
178 /* We halt the CPU with physical CPU hotplug */
179 static inline void play_dead(void)
180 {
181         idle_task_exit();
182         wbinvd();
183         mb();
184         /* Ack it */
185         __get_cpu_var(cpu_state) = CPU_DEAD;
186
187         local_irq_disable();
188         while (1)
189                 halt();
190 }
191 #else
192 static inline void play_dead(void)
193 {
194         BUG();
195 }
196 #endif /* CONFIG_HOTPLUG_CPU */
197
198 /*
199  * The idle thread. There's no useful work to be
200  * done, so just try to conserve power and have a
201  * low exit latency (ie sit in a loop waiting for
202  * somebody to say that they'd like to reschedule)
203  */
204 void cpu_idle (void)
205 {
206         current_thread_info()->status |= TS_POLLING;
207         /* endless idle loop with no priority at all */
208         while (1) {
209                 while (!need_resched()) {
210                         void (*idle)(void);
211
212                         if (__get_cpu_var(cpu_idle_state))
213                                 __get_cpu_var(cpu_idle_state) = 0;
214
215                         rmb();
216                         idle = pm_idle;
217                         if (!idle)
218                                 idle = default_idle;
219                         if (cpu_is_offline(smp_processor_id()))
220                                 play_dead();
221                         enter_idle();
222                         idle();
223                         __exit_idle();
224                 }
225
226                 preempt_enable_no_resched();
227                 schedule();
228                 preempt_disable();
229         }
230 }
231
232 /*
233  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
234  * which can obviate IPI to trigger checking of need_resched.
235  * We execute MONITOR against need_resched and enter optimized wait state
236  * through MWAIT. Whenever someone changes need_resched, we would be woken
237  * up from MWAIT (without an IPI).
238  */
239 static void mwait_idle(void)
240 {
241         local_irq_enable();
242
243         while (!need_resched()) {
244                 __monitor((void *)&current_thread_info()->flags, 0, 0);
245                 smp_mb();
246                 if (need_resched())
247                         break;
248                 __mwait(0, 0);
249         }
250 }
251
252 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
253 {
254         static int printed;
255         if (cpu_has(c, X86_FEATURE_MWAIT)) {
256                 /*
257                  * Skip, if setup has overridden idle.
258                  * One CPU supports mwait => All CPUs supports mwait
259                  */
260                 if (!pm_idle) {
261                         if (!printed) {
262                                 printk("using mwait in idle threads.\n");
263                                 printed = 1;
264                         }
265                         pm_idle = mwait_idle;
266                 }
267         }
268 }
269
270 static int __init idle_setup (char *str)
271 {
272         if (!strncmp(str, "poll", 4)) {
273                 printk("using polling idle threads.\n");
274                 pm_idle = poll_idle;
275         }
276
277         boot_option_idle_override = 1;
278         return 1;
279 }
280
281 __setup("idle=", idle_setup);
282
283 /* Prints also some state that isn't saved in the pt_regs */ 
284 void __show_regs(struct pt_regs * regs)
285 {
286         unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
287         unsigned int fsindex,gsindex;
288         unsigned int ds,cs,es; 
289
290         printk("\n");
291         print_modules();
292         printk("Pid: %d, comm: %.20s %s %s %.*s\n",
293                 current->pid, current->comm, print_tainted(),
294                 system_utsname.release,
295                 (int)strcspn(system_utsname.version, " "),
296                 system_utsname.version);
297         printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
298         printk_address(regs->rip); 
299         printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
300                 regs->eflags);
301         printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
302                regs->rax, regs->rbx, regs->rcx);
303         printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
304                regs->rdx, regs->rsi, regs->rdi); 
305         printk("RBP: %016lx R08: %016lx R09: %016lx\n",
306                regs->rbp, regs->r8, regs->r9); 
307         printk("R10: %016lx R11: %016lx R12: %016lx\n",
308                regs->r10, regs->r11, regs->r12); 
309         printk("R13: %016lx R14: %016lx R15: %016lx\n",
310                regs->r13, regs->r14, regs->r15); 
311
312         asm("movl %%ds,%0" : "=r" (ds)); 
313         asm("movl %%cs,%0" : "=r" (cs)); 
314         asm("movl %%es,%0" : "=r" (es)); 
315         asm("movl %%fs,%0" : "=r" (fsindex));
316         asm("movl %%gs,%0" : "=r" (gsindex));
317
318         rdmsrl(MSR_FS_BASE, fs);
319         rdmsrl(MSR_GS_BASE, gs); 
320         rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
321
322         asm("movq %%cr0, %0": "=r" (cr0));
323         asm("movq %%cr2, %0": "=r" (cr2));
324         asm("movq %%cr3, %0": "=r" (cr3));
325         asm("movq %%cr4, %0": "=r" (cr4));
326
327         printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
328                fs,fsindex,gs,gsindex,shadowgs); 
329         printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
330         printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
331 }
332
333 void show_regs(struct pt_regs *regs)
334 {
335         printk("CPU %d:", smp_processor_id());
336         __show_regs(regs);
337         show_trace(NULL, regs, (void *)(regs + 1));
338 }
339
340 /*
341  * Free current thread data structures etc..
342  */
343 void exit_thread(void)
344 {
345         struct task_struct *me = current;
346         struct thread_struct *t = &me->thread;
347
348         if (me->thread.io_bitmap_ptr) { 
349                 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
350
351                 kfree(t->io_bitmap_ptr);
352                 t->io_bitmap_ptr = NULL;
353                 clear_thread_flag(TIF_IO_BITMAP);
354                 /*
355                  * Careful, clear this in the TSS too:
356                  */
357                 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
358                 t->io_bitmap_max = 0;
359                 put_cpu();
360         }
361 }
362
363 void flush_thread(void)
364 {
365         struct task_struct *tsk = current;
366         struct thread_info *t = current_thread_info();
367
368         if (t->flags & _TIF_ABI_PENDING) {
369                 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
370                 if (t->flags & _TIF_IA32)
371                         current_thread_info()->status |= TS_COMPAT;
372         }
373         t->flags &= ~_TIF_DEBUG;
374
375         tsk->thread.debugreg0 = 0;
376         tsk->thread.debugreg1 = 0;
377         tsk->thread.debugreg2 = 0;
378         tsk->thread.debugreg3 = 0;
379         tsk->thread.debugreg6 = 0;
380         tsk->thread.debugreg7 = 0;
381         memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));        
382         /*
383          * Forget coprocessor state..
384          */
385         clear_fpu(tsk);
386         clear_used_math();
387 }
388
389 void release_thread(struct task_struct *dead_task)
390 {
391         if (dead_task->mm) {
392                 if (dead_task->mm->context.size) {
393                         printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
394                                         dead_task->comm,
395                                         dead_task->mm->context.ldt,
396                                         dead_task->mm->context.size);
397                         BUG();
398                 }
399         }
400 }
401
402 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
403 {
404         struct user_desc ud = { 
405                 .base_addr = addr,
406                 .limit = 0xfffff,
407                 .seg_32bit = 1,
408                 .limit_in_pages = 1,
409                 .useable = 1,
410         };
411         struct n_desc_struct *desc = (void *)t->thread.tls_array;
412         desc += tls;
413         desc->a = LDT_entry_a(&ud); 
414         desc->b = LDT_entry_b(&ud); 
415 }
416
417 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
418 {
419         struct desc_struct *desc = (void *)t->thread.tls_array;
420         desc += tls;
421         return desc->base0 | 
422                 (((u32)desc->base1) << 16) | 
423                 (((u32)desc->base2) << 24);
424 }
425
426 /*
427  * This gets called before we allocate a new thread and copy
428  * the current task into it.
429  */
430 void prepare_to_copy(struct task_struct *tsk)
431 {
432         unlazy_fpu(tsk);
433 }
434
435 int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, 
436                 unsigned long unused,
437         struct task_struct * p, struct pt_regs * regs)
438 {
439         int err;
440         struct pt_regs * childregs;
441         struct task_struct *me = current;
442
443         childregs = ((struct pt_regs *)
444                         (THREAD_SIZE + task_stack_page(p))) - 1;
445         *childregs = *regs;
446
447         childregs->rax = 0;
448         childregs->rsp = rsp;
449         if (rsp == ~0UL)
450                 childregs->rsp = (unsigned long)childregs;
451
452         p->thread.rsp = (unsigned long) childregs;
453         p->thread.rsp0 = (unsigned long) (childregs+1);
454         p->thread.userrsp = me->thread.userrsp; 
455
456         set_tsk_thread_flag(p, TIF_FORK);
457
458         p->thread.fs = me->thread.fs;
459         p->thread.gs = me->thread.gs;
460
461         asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
462         asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
463         asm("mov %%es,%0" : "=m" (p->thread.es));
464         asm("mov %%ds,%0" : "=m" (p->thread.ds));
465
466         if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
467                 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
468                 if (!p->thread.io_bitmap_ptr) {
469                         p->thread.io_bitmap_max = 0;
470                         return -ENOMEM;
471                 }
472                 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
473                                 IO_BITMAP_BYTES);
474                 set_tsk_thread_flag(p, TIF_IO_BITMAP);
475         } 
476
477         /*
478          * Set a new TLS for the child thread?
479          */
480         if (clone_flags & CLONE_SETTLS) {
481 #ifdef CONFIG_IA32_EMULATION
482                 if (test_thread_flag(TIF_IA32))
483                         err = ia32_child_tls(p, childregs); 
484                 else                    
485 #endif   
486                         err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
487                 if (err) 
488                         goto out;
489         }
490         err = 0;
491 out:
492         if (err && p->thread.io_bitmap_ptr) {
493                 kfree(p->thread.io_bitmap_ptr);
494                 p->thread.io_bitmap_max = 0;
495         }
496         return err;
497 }
498
499 /*
500  * This special macro can be used to load a debugging register
501  */
502 #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
503
504 static inline void __switch_to_xtra(struct task_struct *prev_p,
505                                     struct task_struct *next_p,
506                                     struct tss_struct *tss)
507 {
508         struct thread_struct *prev, *next;
509
510         prev = &prev_p->thread,
511         next = &next_p->thread;
512
513         if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
514                 loaddebug(next, 0);
515                 loaddebug(next, 1);
516                 loaddebug(next, 2);
517                 loaddebug(next, 3);
518                 /* no 4 and 5 */
519                 loaddebug(next, 6);
520                 loaddebug(next, 7);
521         }
522
523         if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
524                 /*
525                  * Copy the relevant range of the IO bitmap.
526                  * Normally this is 128 bytes or less:
527                  */
528                 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
529                        max(prev->io_bitmap_max, next->io_bitmap_max));
530         } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
531                 /*
532                  * Clear any possible leftover bits:
533                  */
534                 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
535         }
536 }
537
538 /*
539  *      switch_to(x,y) should switch tasks from x to y.
540  *
541  * This could still be optimized: 
542  * - fold all the options into a flag word and test it with a single test.
543  * - could test fs/gs bitsliced
544  *
545  * Kprobes not supported here. Set the probe on schedule instead.
546  */
547 __kprobes struct task_struct *
548 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
549 {
550         struct thread_struct *prev = &prev_p->thread,
551                                  *next = &next_p->thread;
552         int cpu = smp_processor_id();  
553         struct tss_struct *tss = &per_cpu(init_tss, cpu);
554
555         /* we're going to use this soon, after a few expensive things */
556         if (next_p->fpu_counter>5)
557                 prefetch(&next->i387.fxsave);
558
559         /*
560          * Reload esp0, LDT and the page table pointer:
561          */
562         tss->rsp0 = next->rsp0;
563
564         /* 
565          * Switch DS and ES.
566          * This won't pick up thread selector changes, but I guess that is ok.
567          */
568         asm volatile("mov %%es,%0" : "=m" (prev->es));
569         if (unlikely(next->es | prev->es))
570                 loadsegment(es, next->es); 
571         
572         asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
573         if (unlikely(next->ds | prev->ds))
574                 loadsegment(ds, next->ds);
575
576         load_TLS(next, cpu);
577
578         /* 
579          * Switch FS and GS.
580          */
581         { 
582                 unsigned fsindex;
583                 asm volatile("movl %%fs,%0" : "=r" (fsindex)); 
584                 /* segment register != 0 always requires a reload. 
585                    also reload when it has changed. 
586                    when prev process used 64bit base always reload
587                    to avoid an information leak. */
588                 if (unlikely(fsindex | next->fsindex | prev->fs)) {
589                         loadsegment(fs, next->fsindex);
590                         /* check if the user used a selector != 0
591                          * if yes clear 64bit base, since overloaded base
592                          * is always mapped to the Null selector
593                          */
594                         if (fsindex)
595                         prev->fs = 0;                           
596                 }
597                 /* when next process has a 64bit base use it */
598                 if (next->fs) 
599                         wrmsrl(MSR_FS_BASE, next->fs); 
600                 prev->fsindex = fsindex;
601         }
602         { 
603                 unsigned gsindex;
604                 asm volatile("movl %%gs,%0" : "=r" (gsindex)); 
605                 if (unlikely(gsindex | next->gsindex | prev->gs)) {
606                         load_gs_index(next->gsindex);
607                         if (gsindex)
608                         prev->gs = 0;                           
609                 }
610                 if (next->gs)
611                         wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 
612                 prev->gsindex = gsindex;
613         }
614
615         /* 
616          * Switch the PDA and FPU contexts.
617          */
618         prev->userrsp = read_pda(oldrsp); 
619         write_pda(oldrsp, next->userrsp); 
620         write_pda(pcurrent, next_p); 
621
622         /* This must be here to ensure both math_state_restore() and
623            kernel_fpu_begin() work consistently. 
624            And the AMD workaround requires it to be after DS reload. */
625         unlazy_fpu(prev_p);
626         write_pda(kernelstack,
627                   task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
628 #ifdef CONFIG_CC_STACKPROTECTOR
629         write_pda(stack_canary, next_p->stack_canary);
630         /*
631          * Build time only check to make sure the stack_canary is at
632          * offset 40 in the pda; this is a gcc ABI requirement
633          */
634         BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
635 #endif
636
637         /*
638          * Now maybe reload the debug registers and handle I/O bitmaps
639          */
640         if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
641             || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
642                 __switch_to_xtra(prev_p, next_p, tss);
643
644         /* If the task has used fpu the last 5 timeslices, just do a full
645          * restore of the math state immediately to avoid the trap; the
646          * chances of needing FPU soon are obviously high now
647          */
648         if (next_p->fpu_counter>5)
649                 math_state_restore();
650         return prev_p;
651 }
652
653 /*
654  * sys_execve() executes a new program.
655  */
656 asmlinkage 
657 long sys_execve(char __user *name, char __user * __user *argv,
658                 char __user * __user *envp, struct pt_regs regs)
659 {
660         long error;
661         char * filename;
662
663         filename = getname(name);
664         error = PTR_ERR(filename);
665         if (IS_ERR(filename)) 
666                 return error;
667         error = do_execve(filename, argv, envp, &regs); 
668         if (error == 0) {
669                 task_lock(current);
670                 current->ptrace &= ~PT_DTRACE;
671                 task_unlock(current);
672         }
673         putname(filename);
674         return error;
675 }
676
677 void set_personality_64bit(void)
678 {
679         /* inherit personality from parent */
680
681         /* Make sure to be in 64bit mode */
682         clear_thread_flag(TIF_IA32); 
683
684         /* TBD: overwrites user setup. Should have two bits.
685            But 64bit processes have always behaved this way,
686            so it's not too bad. The main problem is just that
687            32bit childs are affected again. */
688         current->personality &= ~READ_IMPLIES_EXEC;
689 }
690
691 asmlinkage long sys_fork(struct pt_regs *regs)
692 {
693         return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
694 }
695
696 asmlinkage long
697 sys_clone(unsigned long clone_flags, unsigned long newsp,
698           void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
699 {
700         if (!newsp)
701                 newsp = regs->rsp;
702         return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
703 }
704
705 /*
706  * This is trivial, and on the face of it looks like it
707  * could equally well be done in user mode.
708  *
709  * Not so, for quite unobvious reasons - register pressure.
710  * In user mode vfork() cannot have a stack frame, and if
711  * done by calling the "clone()" system call directly, you
712  * do not have enough call-clobbered registers to hold all
713  * the information you need.
714  */
715 asmlinkage long sys_vfork(struct pt_regs *regs)
716 {
717         return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
718                     NULL, NULL);
719 }
720
721 unsigned long get_wchan(struct task_struct *p)
722 {
723         unsigned long stack;
724         u64 fp,rip;
725         int count = 0;
726
727         if (!p || p == current || p->state==TASK_RUNNING)
728                 return 0; 
729         stack = (unsigned long)task_stack_page(p);
730         if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
731                 return 0;
732         fp = *(u64 *)(p->thread.rsp);
733         do { 
734                 if (fp < (unsigned long)stack ||
735                     fp > (unsigned long)stack+THREAD_SIZE)
736                         return 0; 
737                 rip = *(u64 *)(fp+8); 
738                 if (!in_sched_functions(rip))
739                         return rip; 
740                 fp = *(u64 *)fp; 
741         } while (count++ < 16); 
742         return 0;
743 }
744
745 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
746
747         int ret = 0; 
748         int doit = task == current;
749         int cpu;
750
751         switch (code) { 
752         case ARCH_SET_GS:
753                 if (addr >= TASK_SIZE_OF(task))
754                         return -EPERM; 
755                 cpu = get_cpu();
756                 /* handle small bases via the GDT because that's faster to 
757                    switch. */
758                 if (addr <= 0xffffffff) {  
759                         set_32bit_tls(task, GS_TLS, addr); 
760                         if (doit) { 
761                                 load_TLS(&task->thread, cpu);
762                                 load_gs_index(GS_TLS_SEL); 
763                         }
764                         task->thread.gsindex = GS_TLS_SEL; 
765                         task->thread.gs = 0;
766                 } else { 
767                         task->thread.gsindex = 0;
768                         task->thread.gs = addr;
769                         if (doit) {
770                                 load_gs_index(0);
771                                 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
772                         } 
773                 }
774                 put_cpu();
775                 break;
776         case ARCH_SET_FS:
777                 /* Not strictly needed for fs, but do it for symmetry
778                    with gs */
779                 if (addr >= TASK_SIZE_OF(task))
780                         return -EPERM; 
781                 cpu = get_cpu();
782                 /* handle small bases via the GDT because that's faster to 
783                    switch. */
784                 if (addr <= 0xffffffff) { 
785                         set_32bit_tls(task, FS_TLS, addr);
786                         if (doit) { 
787                                 load_TLS(&task->thread, cpu); 
788                                 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
789                         }
790                         task->thread.fsindex = FS_TLS_SEL;
791                         task->thread.fs = 0;
792                 } else { 
793                         task->thread.fsindex = 0;
794                         task->thread.fs = addr;
795                         if (doit) {
796                                 /* set the selector to 0 to not confuse
797                                    __switch_to */
798                                 asm volatile("movl %0,%%fs" :: "r" (0));
799                                 ret = checking_wrmsrl(MSR_FS_BASE, addr);
800                         }
801                 }
802                 put_cpu();
803                 break;
804         case ARCH_GET_FS: { 
805                 unsigned long base; 
806                 if (task->thread.fsindex == FS_TLS_SEL)
807                         base = read_32bit_tls(task, FS_TLS);
808                 else if (doit)
809                         rdmsrl(MSR_FS_BASE, base);
810                 else
811                         base = task->thread.fs;
812                 ret = put_user(base, (unsigned long __user *)addr); 
813                 break; 
814         }
815         case ARCH_GET_GS: { 
816                 unsigned long base;
817                 unsigned gsindex;
818                 if (task->thread.gsindex == GS_TLS_SEL)
819                         base = read_32bit_tls(task, GS_TLS);
820                 else if (doit) {
821                         asm("movl %%gs,%0" : "=r" (gsindex));
822                         if (gsindex)
823                                 rdmsrl(MSR_KERNEL_GS_BASE, base);
824                         else
825                                 base = task->thread.gs;
826                 }
827                 else
828                         base = task->thread.gs;
829                 ret = put_user(base, (unsigned long __user *)addr); 
830                 break;
831         }
832
833         default:
834                 ret = -EINVAL;
835                 break;
836         } 
837
838         return ret;     
839
840
841 long sys_arch_prctl(int code, unsigned long addr)
842 {
843         return do_arch_prctl(current, code, addr);
844
845
846 /* 
847  * Capture the user space registers if the task is not running (in user space)
848  */
849 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
850 {
851         struct pt_regs *pp, ptregs;
852
853         pp = task_pt_regs(tsk);
854
855         ptregs = *pp; 
856         ptregs.cs &= 0xffff;
857         ptregs.ss &= 0xffff;
858
859         elf_core_copy_regs(regs, &ptregs);
860  
861         return 1;
862 }
863
864 unsigned long arch_align_stack(unsigned long sp)
865 {
866         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
867                 sp -= get_random_int() % 8192;
868         return sp & ~0xf;
869 }