]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/ppc64/kernel/vdso.c
1bbacac44988168e58ce2d06888f195055d2697e
[linux-2.6-omap-h63xx.git] / arch / ppc64 / kernel / vdso.c
1 /*
2  *  linux/arch/ppc64/kernel/vdso.c
3  *
4  *    Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
5  *                       <benh@kernel.crashing.org>
6  *
7  *  This program is free software; you can redistribute it and/or
8  *  modify it under the terms of the GNU General Public License
9  *  as published by the Free Software Foundation; either version
10  *  2 of the License, or (at your option) any later version.
11  */
12
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <linux/errno.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/mm.h>
19 #include <linux/smp.h>
20 #include <linux/smp_lock.h>
21 #include <linux/stddef.h>
22 #include <linux/unistd.h>
23 #include <linux/slab.h>
24 #include <linux/user.h>
25 #include <linux/elf.h>
26 #include <linux/security.h>
27 #include <linux/bootmem.h>
28
29 #include <asm/pgtable.h>
30 #include <asm/system.h>
31 #include <asm/processor.h>
32 #include <asm/mmu.h>
33 #include <asm/mmu_context.h>
34 #include <asm/machdep.h>
35 #include <asm/cputable.h>
36 #include <asm/sections.h>
37 #include <asm/systemcfg.h>
38 #include <asm/vdso.h>
39
40 #undef DEBUG
41
42 #ifdef DEBUG
43 #define DBG(fmt...) printk(fmt)
44 #else
45 #define DBG(fmt...)
46 #endif
47
48
49 /*
50  * The vDSOs themselves are here
51  */
52 extern char vdso64_start, vdso64_end;
53 extern char vdso32_start, vdso32_end;
54
55 static void *vdso64_kbase = &vdso64_start;
56 static void *vdso32_kbase = &vdso32_start;
57
58 unsigned int vdso64_pages;
59 unsigned int vdso32_pages;
60
61 /* Signal trampolines user addresses */
62
63 unsigned long vdso64_rt_sigtramp;
64 unsigned long vdso32_sigtramp;
65 unsigned long vdso32_rt_sigtramp;
66
67 /* Format of the patch table */
68 struct vdso_patch_def
69 {
70         u32             pvr_mask, pvr_value;
71         const char      *gen_name;
72         const char      *fix_name;
73 };
74
75 /* Table of functions to patch based on the CPU type/revision
76  *
77  * TODO: Improve by adding whole lists for each entry
78  */
79 static struct vdso_patch_def vdso_patches[] = {
80         {
81                 0xffff0000, 0x003a0000,         /* POWER5 */
82                 "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
83         },
84         {
85                 0xffff0000, 0x003b0000,         /* POWER5 */
86                 "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
87         },
88 };
89
90 /*
91  * Some infos carried around for each of them during parsing at
92  * boot time.
93  */
94 struct lib32_elfinfo
95 {
96         Elf32_Ehdr      *hdr;           /* ptr to ELF */
97         Elf32_Sym       *dynsym;        /* ptr to .dynsym section */
98         unsigned long   dynsymsize;     /* size of .dynsym section */
99         char            *dynstr;        /* ptr to .dynstr section */
100         unsigned long   text;           /* offset of .text section in .so */
101 };
102
103 struct lib64_elfinfo
104 {
105         Elf64_Ehdr      *hdr;
106         Elf64_Sym       *dynsym;
107         unsigned long   dynsymsize;
108         char            *dynstr;
109         unsigned long   text;
110 };
111
112
113 #ifdef __DEBUG
114 static void dump_one_vdso_page(struct page *pg, struct page *upg)
115 {
116         printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
117                page_count(pg),
118                pg->flags);
119         if (upg/* && pg != upg*/) {
120                 printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) << PAGE_SHIFT),
121                        page_count(upg),
122                        upg->flags);
123         }
124         printk("\n");
125 }
126
127 static void dump_vdso_pages(struct vm_area_struct * vma)
128 {
129         int i;
130
131         if (!vma || test_thread_flag(TIF_32BIT)) {
132                 printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
133                 for (i=0; i<vdso32_pages; i++) {
134                         struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
135                         struct page *upg = (vma && vma->vm_mm) ?
136                                 follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
137                                 : NULL;
138                         dump_one_vdso_page(pg, upg);
139                 }
140         }
141         if (!vma || !test_thread_flag(TIF_32BIT)) {
142                 printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
143                 for (i=0; i<vdso64_pages; i++) {
144                         struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
145                         struct page *upg = (vma && vma->vm_mm) ?
146                                 follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0)
147                                 : NULL;
148                         dump_one_vdso_page(pg, upg);
149                 }
150         }
151 }
152 #endif /* DEBUG */
153
154 /*
155  * Keep a dummy vma_close for now, it will prevent VMA merging.
156  */
157 static void vdso_vma_close(struct vm_area_struct * vma)
158 {
159 }
160
161 /*
162  * Our nopage() function, maps in the actual vDSO kernel pages, they will
163  * be mapped read-only by do_no_page(), and eventually COW'ed, either
164  * right away for an initial write access, or by do_wp_page().
165  */
166 static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
167                                      unsigned long address, int *type)
168 {
169         unsigned long offset = address - vma->vm_start;
170         struct page *pg;
171         void *vbase = test_thread_flag(TIF_32BIT) ? vdso32_kbase : vdso64_kbase;
172
173         DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
174             current->comm, address, offset);
175
176         if (address < vma->vm_start || address > vma->vm_end)
177                 return NOPAGE_SIGBUS;
178
179         /*
180          * Last page is systemcfg.
181          */
182         if ((vma->vm_end - address) <= PAGE_SIZE)
183                 pg = virt_to_page(_systemcfg);
184         else
185                 pg = virt_to_page(vbase + offset);
186
187         get_page(pg);
188         DBG(" ->page count: %d\n", page_count(pg));
189
190         return pg;
191 }
192
193 static struct vm_operations_struct vdso_vmops = {
194         .close  = vdso_vma_close,
195         .nopage = vdso_vma_nopage,
196 };
197
198 /*
199  * This is called from binfmt_elf, we create the special vma for the
200  * vDSO and insert it into the mm struct tree
201  */
202 int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
203 {
204         struct mm_struct *mm = current->mm;
205         struct vm_area_struct *vma;
206         unsigned long vdso_pages;
207         unsigned long vdso_base;
208
209         if (test_thread_flag(TIF_32BIT)) {
210                 vdso_pages = vdso32_pages;
211                 vdso_base = VDSO32_MBASE;
212         } else {
213                 vdso_pages = vdso64_pages;
214                 vdso_base = VDSO64_MBASE;
215         }
216
217         current->thread.vdso_base = 0;
218
219         /* vDSO has a problem and was disabled, just don't "enable" it for the
220          * process
221          */
222         if (vdso_pages == 0)
223                 return 0;
224
225         vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
226         if (vma == NULL)
227                 return -ENOMEM;
228
229         memset(vma, 0, sizeof(*vma));
230
231         /*
232          * pick a base address for the vDSO in process space. We try to put it
233          * at vdso_base which is the "natural" base for it, but we might fail
234          * and end up putting it elsewhere.
235          */
236         vdso_base = get_unmapped_area(NULL, vdso_base,
237                                       vdso_pages << PAGE_SHIFT, 0, 0);
238         if (vdso_base & ~PAGE_MASK) {
239                 kmem_cache_free(vm_area_cachep, vma);
240                 return (int)vdso_base;
241         }
242
243         current->thread.vdso_base = vdso_base;
244
245         vma->vm_mm = mm;
246         vma->vm_start = current->thread.vdso_base;
247
248         /*
249          * the VMA size is one page more than the vDSO since systemcfg
250          * is mapped in the last one
251          */
252         vma->vm_end = vma->vm_start + ((vdso_pages + 1) << PAGE_SHIFT);
253
254         /*
255          * our vma flags don't have VM_WRITE so by default, the process isn't allowed
256          * to write those pages.
257          * gdb can break that with ptrace interface, and thus trigger COW on those
258          * pages but it's then your responsibility to never do that on the "data" page
259          * of the vDSO or you'll stop getting kernel updates and your nice userland
260          * gettimeofday will be totally dead. It's fine to use that for setting
261          * breakpoints in the vDSO code pages though
262          */
263         vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_RESERVED;
264         vma->vm_flags |= mm->def_flags;
265         vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
266         vma->vm_ops = &vdso_vmops;
267
268         down_write(&mm->mmap_sem);
269         if (insert_vm_struct(mm, vma)) {
270                 up_write(&mm->mmap_sem);
271                 kmem_cache_free(vm_area_cachep, vma);
272                 return -ENOMEM;
273         }
274         mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
275         up_write(&mm->mmap_sem);
276
277         return 0;
278 }
279
280 static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
281                                   unsigned long *size)
282 {
283         Elf32_Shdr *sechdrs;
284         unsigned int i;
285         char *secnames;
286
287         /* Grab section headers and strings so we can tell who is who */
288         sechdrs = (void *)ehdr + ehdr->e_shoff;
289         secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
290
291         /* Find the section they want */
292         for (i = 1; i < ehdr->e_shnum; i++) {
293                 if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
294                         if (size)
295                                 *size = sechdrs[i].sh_size;
296                         return (void *)ehdr + sechdrs[i].sh_offset;
297                 }
298         }
299         *size = 0;
300         return NULL;
301 }
302
303 static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
304                                   unsigned long *size)
305 {
306         Elf64_Shdr *sechdrs;
307         unsigned int i;
308         char *secnames;
309
310         /* Grab section headers and strings so we can tell who is who */
311         sechdrs = (void *)ehdr + ehdr->e_shoff;
312         secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
313
314         /* Find the section they want */
315         for (i = 1; i < ehdr->e_shnum; i++) {
316                 if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
317                         if (size)
318                                 *size = sechdrs[i].sh_size;
319                         return (void *)ehdr + sechdrs[i].sh_offset;
320                 }
321         }
322         if (size)
323                 *size = 0;
324         return NULL;
325 }
326
327 static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, const char *symname)
328 {
329         unsigned int i;
330         char name[32], *c;
331
332         for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
333                 if (lib->dynsym[i].st_name == 0)
334                         continue;
335                 strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, 32);
336                 c = strchr(name, '@');
337                 if (c)
338                         *c = 0;
339                 if (strcmp(symname, name) == 0)
340                         return &lib->dynsym[i];
341         }
342         return NULL;
343 }
344
345 static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, const char *symname)
346 {
347         unsigned int i;
348         char name[32], *c;
349
350         for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
351                 if (lib->dynsym[i].st_name == 0)
352                         continue;
353                 strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, 32);
354                 c = strchr(name, '@');
355                 if (c)
356                         *c = 0;
357                 if (strcmp(symname, name) == 0)
358                         return &lib->dynsym[i];
359         }
360         return NULL;
361 }
362
363 /* Note that we assume the section is .text and the symbol is relative to
364  * the library base
365  */
366 static unsigned long __init find_function32(struct lib32_elfinfo *lib, const char *symname)
367 {
368         Elf32_Sym *sym = find_symbol32(lib, symname);
369
370         if (sym == NULL) {
371                 printk(KERN_WARNING "vDSO32: function %s not found !\n", symname);
372                 return 0;
373         }
374         return sym->st_value - VDSO32_LBASE;
375 }
376
377 /* Note that we assume the section is .text and the symbol is relative to
378  * the library base
379  */
380 static unsigned long __init find_function64(struct lib64_elfinfo *lib, const char *symname)
381 {
382         Elf64_Sym *sym = find_symbol64(lib, symname);
383
384         if (sym == NULL) {
385                 printk(KERN_WARNING "vDSO64: function %s not found !\n", symname);
386                 return 0;
387         }
388 #ifdef VDS64_HAS_DESCRIPTORS
389         return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) - VDSO64_LBASE;
390 #else
391         return sym->st_value - VDSO64_LBASE;
392 #endif
393 }
394
395
396 static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
397                                         struct lib64_elfinfo *v64)
398 {
399         void *sect;
400
401         /*
402          * Locate symbol tables & text section
403          */
404
405         v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
406         v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
407         if (v32->dynsym == NULL || v32->dynstr == NULL) {
408                 printk(KERN_ERR "vDSO32: a required symbol section was not found\n");
409                 return -1;
410         }
411         sect = find_section32(v32->hdr, ".text", NULL);
412         if (sect == NULL) {
413                 printk(KERN_ERR "vDSO32: the .text section was not found\n");
414                 return -1;
415         }
416         v32->text = sect - vdso32_kbase;
417
418         v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
419         v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
420         if (v64->dynsym == NULL || v64->dynstr == NULL) {
421                 printk(KERN_ERR "vDSO64: a required symbol section was not found\n");
422                 return -1;
423         }
424         sect = find_section64(v64->hdr, ".text", NULL);
425         if (sect == NULL) {
426                 printk(KERN_ERR "vDSO64: the .text section was not found\n");
427                 return -1;
428         }
429         v64->text = sect - vdso64_kbase;
430
431         return 0;
432 }
433
434 static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
435                                           struct lib64_elfinfo *v64)
436 {
437         /*
438          * Find signal trampolines
439          */
440
441         vdso64_rt_sigtramp      = find_function64(v64, "__kernel_sigtramp_rt64");
442         vdso32_sigtramp         = find_function32(v32, "__kernel_sigtramp32");
443         vdso32_rt_sigtramp      = find_function32(v32, "__kernel_sigtramp_rt32");
444 }
445
446 static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
447                                        struct lib64_elfinfo *v64)
448 {
449         Elf32_Sym *sym32;
450         Elf64_Sym *sym64;
451
452         sym32 = find_symbol32(v32, "__kernel_datapage_offset");
453         if (sym32 == NULL) {
454                 printk(KERN_ERR "vDSO32: Can't find symbol __kernel_datapage_offset !\n");
455                 return -1;
456         }
457         *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
458                 (vdso32_pages << PAGE_SHIFT) - (sym32->st_value - VDSO32_LBASE);
459
460         sym64 = find_symbol64(v64, "__kernel_datapage_offset");
461         if (sym64 == NULL) {
462                 printk(KERN_ERR "vDSO64: Can't find symbol __kernel_datapage_offset !\n");
463                 return -1;
464         }
465         *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
466                 (vdso64_pages << PAGE_SHIFT) - (sym64->st_value - VDSO64_LBASE);
467
468         return 0;
469 }
470
471 static int vdso_do_func_patch32(struct lib32_elfinfo *v32,
472                                 struct lib64_elfinfo *v64,
473                                 const char *orig, const char *fix)
474 {
475         Elf32_Sym *sym32_gen, *sym32_fix;
476
477         sym32_gen = find_symbol32(v32, orig);
478         if (sym32_gen == NULL) {
479                 printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
480                 return -1;
481         }
482         sym32_fix = find_symbol32(v32, fix);
483         if (sym32_fix == NULL) {
484                 printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
485                 return -1;
486         }
487         sym32_gen->st_value = sym32_fix->st_value;
488         sym32_gen->st_size = sym32_fix->st_size;
489         sym32_gen->st_info = sym32_fix->st_info;
490         sym32_gen->st_other = sym32_fix->st_other;
491         sym32_gen->st_shndx = sym32_fix->st_shndx;
492
493         return 0;
494 }
495
496 static int vdso_do_func_patch64(struct lib32_elfinfo *v32,
497                                 struct lib64_elfinfo *v64,
498                                 const char *orig, const char *fix)
499 {
500         Elf64_Sym *sym64_gen, *sym64_fix;
501
502         sym64_gen = find_symbol64(v64, orig);
503         if (sym64_gen == NULL) {
504                 printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
505                 return -1;
506         }
507         sym64_fix = find_symbol64(v64, fix);
508         if (sym64_fix == NULL) {
509                 printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
510                 return -1;
511         }
512         sym64_gen->st_value = sym64_fix->st_value;
513         sym64_gen->st_size = sym64_fix->st_size;
514         sym64_gen->st_info = sym64_fix->st_info;
515         sym64_gen->st_other = sym64_fix->st_other;
516         sym64_gen->st_shndx = sym64_fix->st_shndx;
517
518         return 0;
519 }
520
521 static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
522                                        struct lib64_elfinfo *v64)
523 {
524         u32 pvr;
525         int i;
526
527         pvr = mfspr(SPRN_PVR);
528         for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
529                 struct vdso_patch_def *patch = &vdso_patches[i];
530                 int match = (pvr & patch->pvr_mask) == patch->pvr_value;
531
532                 DBG("patch %d (mask: %x, pvr: %x) : %s\n",
533                     i, patch->pvr_mask, patch->pvr_value, match ? "match" : "skip");
534
535                 if (!match)
536                         continue;
537
538                 DBG("replacing %s with %s...\n", patch->gen_name, patch->fix_name);
539
540                 /*
541                  * Patch the 32 bits and 64 bits symbols. Note that we do not patch
542                  * the "." symbol on 64 bits. It would be easy to do, but doesn't
543                  * seem to be necessary, patching the OPD symbol is enough.
544                  */
545                 vdso_do_func_patch32(v32, v64, patch->gen_name, patch->fix_name);
546                 vdso_do_func_patch64(v32, v64, patch->gen_name, patch->fix_name);
547         }
548
549         return 0;
550 }
551
552
553 static __init int vdso_setup(void)
554 {
555         struct lib32_elfinfo    v32;
556         struct lib64_elfinfo    v64;
557
558         v32.hdr = vdso32_kbase;
559         v64.hdr = vdso64_kbase;
560
561         if (vdso_do_find_sections(&v32, &v64))
562                 return -1;
563
564         if (vdso_fixup_datapage(&v32, &v64))
565                 return -1;
566
567         if (vdso_fixup_alt_funcs(&v32, &v64))
568                 return -1;
569
570         vdso_setup_trampolines(&v32, &v64);
571
572         return 0;
573 }
574
575 void __init vdso_init(void)
576 {
577         int i;
578
579         vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
580         vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
581
582         DBG("vdso64_kbase: %p, 0x%x pages, vdso32_kbase: %p, 0x%x pages\n",
583                vdso64_kbase, vdso64_pages, vdso32_kbase, vdso32_pages);
584
585         /*
586          * Initialize the vDSO images in memory, that is do necessary
587          * fixups of vDSO symbols, locate trampolines, etc...
588          */
589         if (vdso_setup()) {
590                 printk(KERN_ERR "vDSO setup failure, not enabled !\n");
591                 /* XXX should free pages here ? */
592                 vdso64_pages = vdso32_pages = 0;
593                 return;
594         }
595
596         /* Make sure pages are in the correct state */
597         for (i = 0; i < vdso64_pages; i++) {
598                 struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
599                 ClearPageReserved(pg);
600                 get_page(pg);
601         }
602         for (i = 0; i < vdso32_pages; i++) {
603                 struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
604                 ClearPageReserved(pg);
605                 get_page(pg);
606         }
607
608         get_page(virt_to_page(_systemcfg));
609 }
610
611 int in_gate_area_no_task(unsigned long addr)
612 {
613         return 0;
614 }
615
616 int in_gate_area(struct task_struct *task, unsigned long addr)
617 {
618         return 0;
619 }
620
621 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
622 {
623         return NULL;
624 }
625