]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/x86/mm/init_64.c
x86: kill mk_pte_huge
[linux-2.6-omap-h63xx.git] / arch / x86 / mm / init_64.c
1 /*
2  *  linux/arch/x86_64/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7  */
8
9 #include <linux/signal.h>
10 #include <linux/sched.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/pagemap.h>
22 #include <linux/bootmem.h>
23 #include <linux/proc_fs.h>
24 #include <linux/pci.h>
25 #include <linux/pfn.h>
26 #include <linux/poison.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/module.h>
29 #include <linux/memory_hotplug.h>
30 #include <linux/nmi.h>
31
32 #include <asm/processor.h>
33 #include <asm/system.h>
34 #include <asm/uaccess.h>
35 #include <asm/pgtable.h>
36 #include <asm/pgalloc.h>
37 #include <asm/dma.h>
38 #include <asm/fixmap.h>
39 #include <asm/e820.h>
40 #include <asm/apic.h>
41 #include <asm/tlb.h>
42 #include <asm/mmu_context.h>
43 #include <asm/proto.h>
44 #include <asm/smp.h>
45 #include <asm/sections.h>
46 #include <asm/kdebug.h>
47 #include <asm/numa.h>
48
49 #ifndef Dprintk
50 #define Dprintk(x...)
51 #endif
52
53 const struct dma_mapping_ops* dma_ops;
54 EXPORT_SYMBOL(dma_ops);
55
56 static unsigned long dma_reserve __initdata;
57
58 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
59
60 /*
61  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
62  * physical space so we can cache the place of the first one and move
63  * around without checking the pgd every time.
64  */
65
66 void show_mem(void)
67 {
68         long i, total = 0, reserved = 0;
69         long shared = 0, cached = 0;
70         pg_data_t *pgdat;
71         struct page *page;
72
73         printk(KERN_INFO "Mem-info:\n");
74         show_free_areas();
75         printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
76
77         for_each_online_pgdat(pgdat) {
78                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
79                         /* this loop can take a while with 256 GB and 4k pages
80                            so update the NMI watchdog */
81                         if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
82                                 touch_nmi_watchdog();
83                         }
84                         if (!pfn_valid(pgdat->node_start_pfn + i))
85                                 continue;
86                         page = pfn_to_page(pgdat->node_start_pfn + i);
87                         total++;
88                         if (PageReserved(page))
89                                 reserved++;
90                         else if (PageSwapCache(page))
91                                 cached++;
92                         else if (page_count(page))
93                                 shared += page_count(page) - 1;
94                }
95         }
96         printk(KERN_INFO "%lu pages of RAM\n", total);
97         printk(KERN_INFO "%lu reserved pages\n",reserved);
98         printk(KERN_INFO "%lu pages shared\n",shared);
99         printk(KERN_INFO "%lu pages swap cached\n",cached);
100 }
101
102 int after_bootmem;
103
104 static __init void *spp_getpage(void)
105
106         void *ptr;
107         if (after_bootmem)
108                 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
109         else
110                 ptr = alloc_bootmem_pages(PAGE_SIZE);
111         if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
112                 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
113
114         Dprintk("spp_getpage %p\n", ptr);
115         return ptr;
116
117
118 static __init void set_pte_phys(unsigned long vaddr,
119                          unsigned long phys, pgprot_t prot)
120 {
121         pgd_t *pgd;
122         pud_t *pud;
123         pmd_t *pmd;
124         pte_t *pte, new_pte;
125
126         Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
127
128         pgd = pgd_offset_k(vaddr);
129         if (pgd_none(*pgd)) {
130                 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
131                 return;
132         }
133         pud = pud_offset(pgd, vaddr);
134         if (pud_none(*pud)) {
135                 pmd = (pmd_t *) spp_getpage(); 
136                 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
137                 if (pmd != pmd_offset(pud, 0)) {
138                         printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
139                         return;
140                 }
141         }
142         pmd = pmd_offset(pud, vaddr);
143         if (pmd_none(*pmd)) {
144                 pte = (pte_t *) spp_getpage();
145                 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
146                 if (pte != pte_offset_kernel(pmd, 0)) {
147                         printk("PAGETABLE BUG #02!\n");
148                         return;
149                 }
150         }
151         new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
152
153         pte = pte_offset_kernel(pmd, vaddr);
154         if (!pte_none(*pte) &&
155             pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
156                 pte_ERROR(*pte);
157         set_pte(pte, new_pte);
158
159         /*
160          * It's enough to flush this one mapping.
161          * (PGE mappings get flushed as well)
162          */
163         __flush_tlb_one(vaddr);
164 }
165
166 /* NOTE: this is meant to be run only at boot */
167 void __init 
168 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
169 {
170         unsigned long address = __fix_to_virt(idx);
171
172         if (idx >= __end_of_fixed_addresses) {
173                 printk("Invalid __set_fixmap\n");
174                 return;
175         }
176         set_pte_phys(address, phys, prot);
177 }
178
179 unsigned long __meminitdata table_start, table_end;
180
181 static __meminit void *alloc_low_page(unsigned long *phys)
182
183         unsigned long pfn = table_end++;
184         void *adr;
185
186         if (after_bootmem) {
187                 adr = (void *)get_zeroed_page(GFP_ATOMIC);
188                 *phys = __pa(adr);
189                 return adr;
190         }
191
192         if (pfn >= end_pfn) 
193                 panic("alloc_low_page: ran out of memory"); 
194
195         adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
196         memset(adr, 0, PAGE_SIZE);
197         *phys  = pfn * PAGE_SIZE;
198         return adr;
199 }
200
201 static __meminit void unmap_low_page(void *adr)
202
203
204         if (after_bootmem)
205                 return;
206
207         early_iounmap(adr, PAGE_SIZE);
208
209
210 /* Must run before zap_low_mappings */
211 __meminit void *early_ioremap(unsigned long addr, unsigned long size)
212 {
213         unsigned long vaddr;
214         pmd_t *pmd, *last_pmd;
215         int i, pmds;
216
217         pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
218         vaddr = __START_KERNEL_map;
219         pmd = level2_kernel_pgt;
220         last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
221         for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
222                 for (i = 0; i < pmds; i++) {
223                         if (pmd_present(pmd[i]))
224                                 goto next;
225                 }
226                 vaddr += addr & ~PMD_MASK;
227                 addr &= PMD_MASK;
228                 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
229                         set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
230                 __flush_tlb();
231                 return (void *)vaddr;
232         next:
233                 ;
234         }
235         printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
236         return NULL;
237 }
238
239 /* To avoid virtual aliases later */
240 __meminit void early_iounmap(void *addr, unsigned long size)
241 {
242         unsigned long vaddr;
243         pmd_t *pmd;
244         int i, pmds;
245
246         vaddr = (unsigned long)addr;
247         pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
248         pmd = level2_kernel_pgt + pmd_index(vaddr);
249         for (i = 0; i < pmds; i++)
250                 pmd_clear(pmd + i);
251         __flush_tlb();
252 }
253
254 static void __meminit
255 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
256 {
257         int i = pmd_index(address);
258
259         for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
260                 unsigned long entry;
261                 pmd_t *pmd = pmd_page + pmd_index(address);
262
263                 if (address >= end) {
264                         if (!after_bootmem)
265                                 for (; i < PTRS_PER_PMD; i++, pmd++)
266                                         set_pmd(pmd, __pmd(0));
267                         break;
268                 }
269
270                 if (pmd_val(*pmd))
271                         continue;
272
273                 entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
274                 entry &= __supported_pte_mask;
275                 set_pmd(pmd, __pmd(entry));
276         }
277 }
278
279 static void __meminit
280 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
281 {
282         pmd_t *pmd = pmd_offset(pud,0);
283         spin_lock(&init_mm.page_table_lock);
284         phys_pmd_init(pmd, address, end);
285         spin_unlock(&init_mm.page_table_lock);
286         __flush_tlb_all();
287 }
288
289 static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
290
291         int i = pud_index(addr);
292
293
294         for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
295                 unsigned long pmd_phys;
296                 pud_t *pud = pud_page + pud_index(addr);
297                 pmd_t *pmd;
298
299                 if (addr >= end)
300                         break;
301
302                 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
303                         set_pud(pud, __pud(0)); 
304                         continue;
305                 } 
306
307                 if (pud_val(*pud)) {
308                         phys_pmd_update(pud, addr, end);
309                         continue;
310                 }
311
312                 pmd = alloc_low_page(&pmd_phys);
313                 spin_lock(&init_mm.page_table_lock);
314                 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
315                 phys_pmd_init(pmd, addr, end);
316                 spin_unlock(&init_mm.page_table_lock);
317                 unmap_low_page(pmd);
318         }
319         __flush_tlb();
320
321
322 static void __init find_early_table_space(unsigned long end)
323 {
324         unsigned long puds, pmds, tables, start;
325
326         puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
327         pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
328         tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
329                  round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
330
331         /* RED-PEN putting page tables only on node 0 could
332            cause a hotspot and fill up ZONE_DMA. The page tables
333            need roughly 0.5KB per GB. */
334         start = 0x8000;
335         table_start = find_e820_area(start, end, tables);
336         if (table_start == -1UL)
337                 panic("Cannot find space for the kernel page tables");
338
339         table_start >>= PAGE_SHIFT;
340         table_end = table_start;
341
342         early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
343                 end, table_start << PAGE_SHIFT,
344                 (table_start << PAGE_SHIFT) + tables);
345 }
346
347 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
348    This runs before bootmem is initialized and gets pages directly from the 
349    physical memory. To access them they are temporarily mapped. */
350 void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
351
352         unsigned long next; 
353
354         Dprintk("init_memory_mapping\n");
355
356         /* 
357          * Find space for the kernel direct mapping tables.
358          * Later we should allocate these tables in the local node of the memory
359          * mapped.  Unfortunately this is done currently before the nodes are 
360          * discovered.
361          */
362         if (!after_bootmem)
363                 find_early_table_space(end);
364
365         start = (unsigned long)__va(start);
366         end = (unsigned long)__va(end);
367
368         for (; start < end; start = next) {
369                 unsigned long pud_phys; 
370                 pgd_t *pgd = pgd_offset_k(start);
371                 pud_t *pud;
372
373                 if (after_bootmem)
374                         pud = pud_offset(pgd, start & PGDIR_MASK);
375                 else
376                         pud = alloc_low_page(&pud_phys);
377
378                 next = start + PGDIR_SIZE;
379                 if (next > end) 
380                         next = end; 
381                 phys_pud_init(pud, __pa(start), __pa(next));
382                 if (!after_bootmem)
383                         set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
384                 unmap_low_page(pud);
385         } 
386
387         if (!after_bootmem)
388                 mmu_cr4_features = read_cr4();
389         __flush_tlb_all();
390 }
391
392 #ifndef CONFIG_NUMA
393 void __init paging_init(void)
394 {
395         unsigned long max_zone_pfns[MAX_NR_ZONES];
396         memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
397         max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
398         max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
399         max_zone_pfns[ZONE_NORMAL] = end_pfn;
400
401         memory_present(0, 0, end_pfn);
402         sparse_init();
403         free_area_init_nodes(max_zone_pfns);
404 }
405 #endif
406
407 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
408    from the CPU leading to inconsistent cache lines. address and size
409    must be aligned to 2MB boundaries. 
410    Does nothing when the mapping doesn't exist. */
411 void __init clear_kernel_mapping(unsigned long address, unsigned long size) 
412 {
413         unsigned long end = address + size;
414
415         BUG_ON(address & ~LARGE_PAGE_MASK);
416         BUG_ON(size & ~LARGE_PAGE_MASK); 
417         
418         for (; address < end; address += LARGE_PAGE_SIZE) { 
419                 pgd_t *pgd = pgd_offset_k(address);
420                 pud_t *pud;
421                 pmd_t *pmd;
422                 if (pgd_none(*pgd))
423                         continue;
424                 pud = pud_offset(pgd, address);
425                 if (pud_none(*pud))
426                         continue; 
427                 pmd = pmd_offset(pud, address);
428                 if (!pmd || pmd_none(*pmd))
429                         continue; 
430                 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 
431                         /* Could handle this, but it should not happen currently. */
432                         printk(KERN_ERR 
433                "clear_kernel_mapping: mapping has been split. will leak memory\n"); 
434                         pmd_ERROR(*pmd); 
435                 }
436                 set_pmd(pmd, __pmd(0));                 
437         }
438         __flush_tlb_all();
439
440
441 /*
442  * Memory hotplug specific functions
443  */
444 void online_page(struct page *page)
445 {
446         ClearPageReserved(page);
447         init_page_count(page);
448         __free_page(page);
449         totalram_pages++;
450         num_physpages++;
451 }
452
453 #ifdef CONFIG_MEMORY_HOTPLUG
454 /*
455  * Memory is added always to NORMAL zone. This means you will never get
456  * additional DMA/DMA32 memory.
457  */
458 int arch_add_memory(int nid, u64 start, u64 size)
459 {
460         struct pglist_data *pgdat = NODE_DATA(nid);
461         struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
462         unsigned long start_pfn = start >> PAGE_SHIFT;
463         unsigned long nr_pages = size >> PAGE_SHIFT;
464         int ret;
465
466         init_memory_mapping(start, (start + size -1));
467
468         ret = __add_pages(zone, start_pfn, nr_pages);
469         if (ret)
470                 goto error;
471
472         return ret;
473 error:
474         printk("%s: Problem encountered in __add_pages!\n", __func__);
475         return ret;
476 }
477 EXPORT_SYMBOL_GPL(arch_add_memory);
478
479 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
480 int memory_add_physaddr_to_nid(u64 start)
481 {
482         return 0;
483 }
484 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
485 #endif
486
487 #endif /* CONFIG_MEMORY_HOTPLUG */
488
489 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
490                          kcore_vsyscall;
491
492 void __init mem_init(void)
493 {
494         long codesize, reservedpages, datasize, initsize;
495
496         pci_iommu_alloc();
497
498         /* clear the zero-page */
499         memset(empty_zero_page, 0, PAGE_SIZE);
500
501         reservedpages = 0;
502
503         /* this will put all low memory onto the freelists */
504 #ifdef CONFIG_NUMA
505         totalram_pages = numa_free_all_bootmem();
506 #else
507         totalram_pages = free_all_bootmem();
508 #endif
509         reservedpages = end_pfn - totalram_pages -
510                                         absent_pages_in_range(0, end_pfn);
511
512         after_bootmem = 1;
513
514         codesize =  (unsigned long) &_etext - (unsigned long) &_text;
515         datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
516         initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
517
518         /* Register memory areas for /proc/kcore */
519         kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
520         kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
521                    VMALLOC_END-VMALLOC_START);
522         kclist_add(&kcore_kernel, &_stext, _end - _stext);
523         kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
524         kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 
525                                  VSYSCALL_END - VSYSCALL_START);
526
527         printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
528                 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
529                 end_pfn << (PAGE_SHIFT-10),
530                 codesize >> 10,
531                 reservedpages << (PAGE_SHIFT-10),
532                 datasize >> 10,
533                 initsize >> 10);
534 }
535
536 void free_init_pages(char *what, unsigned long begin, unsigned long end)
537 {
538         unsigned long addr;
539
540         if (begin >= end)
541                 return;
542
543         printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
544         for (addr = begin; addr < end; addr += PAGE_SIZE) {
545                 ClearPageReserved(virt_to_page(addr));
546                 init_page_count(virt_to_page(addr));
547                 memset((void *)(addr & ~(PAGE_SIZE-1)),
548                         POISON_FREE_INITMEM, PAGE_SIZE);
549                 if (addr >= __START_KERNEL_map)
550                         change_page_attr_addr(addr, 1, __pgprot(0));
551                 free_page(addr);
552                 totalram_pages++;
553         }
554         if (addr > __START_KERNEL_map)
555                 global_flush_tlb();
556 }
557
558 void free_initmem(void)
559 {
560         free_init_pages("unused kernel memory",
561                         (unsigned long)(&__init_begin),
562                         (unsigned long)(&__init_end));
563 }
564
565 #ifdef CONFIG_DEBUG_RODATA
566
567 void mark_rodata_ro(void)
568 {
569         unsigned long start = (unsigned long)_stext, end;
570
571 #ifdef CONFIG_HOTPLUG_CPU
572         /* It must still be possible to apply SMP alternatives. */
573         if (num_possible_cpus() > 1)
574                 start = (unsigned long)_etext;
575 #endif
576
577 #ifdef CONFIG_KPROBES
578         start = (unsigned long)__start_rodata;
579 #endif
580         
581         end = (unsigned long)__end_rodata;
582         start = (start + PAGE_SIZE - 1) & PAGE_MASK;
583         end &= PAGE_MASK;
584         if (end <= start)
585                 return;
586
587         change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
588
589         printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
590                (end - start) >> 10);
591
592         /*
593          * change_page_attr_addr() requires a global_flush_tlb() call after it.
594          * We do this after the printk so that if something went wrong in the
595          * change, the printk gets out at least to give a better debug hint
596          * of who is the culprit.
597          */
598         global_flush_tlb();
599 }
600 #endif
601
602 #ifdef CONFIG_BLK_DEV_INITRD
603 void free_initrd_mem(unsigned long start, unsigned long end)
604 {
605         free_init_pages("initrd memory", start, end);
606 }
607 #endif
608
609 void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
610
611 #ifdef CONFIG_NUMA
612         int nid = phys_to_nid(phys);
613 #endif
614         unsigned long pfn = phys >> PAGE_SHIFT;
615         if (pfn >= end_pfn) {
616                 /* This can happen with kdump kernels when accessing firmware
617                    tables. */
618                 if (pfn < end_pfn_map)
619                         return;
620                 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
621                                 phys, len);
622                 return;
623         }
624
625         /* Should check here against the e820 map to avoid double free */
626 #ifdef CONFIG_NUMA
627         reserve_bootmem_node(NODE_DATA(nid), phys, len);
628 #else                   
629         reserve_bootmem(phys, len);    
630 #endif
631         if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
632                 dma_reserve += len / PAGE_SIZE;
633                 set_dma_reserve(dma_reserve);
634         }
635 }
636
637 int kern_addr_valid(unsigned long addr) 
638
639         unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
640        pgd_t *pgd;
641        pud_t *pud;
642        pmd_t *pmd;
643        pte_t *pte;
644
645         if (above != 0 && above != -1UL)
646                 return 0; 
647         
648         pgd = pgd_offset_k(addr);
649         if (pgd_none(*pgd))
650                 return 0;
651
652         pud = pud_offset(pgd, addr);
653         if (pud_none(*pud))
654                 return 0; 
655
656         pmd = pmd_offset(pud, addr);
657         if (pmd_none(*pmd))
658                 return 0;
659         if (pmd_large(*pmd))
660                 return pfn_valid(pmd_pfn(*pmd));
661
662         pte = pte_offset_kernel(pmd, addr);
663         if (pte_none(*pte))
664                 return 0;
665         return pfn_valid(pte_pfn(*pte));
666 }
667
668 /* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
669    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
670    not need special handling anymore. */
671
672 static struct vm_area_struct gate_vma = {
673         .vm_start = VSYSCALL_START,
674         .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
675         .vm_page_prot = PAGE_READONLY_EXEC,
676         .vm_flags = VM_READ | VM_EXEC
677 };
678
679 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
680 {
681 #ifdef CONFIG_IA32_EMULATION
682         if (test_tsk_thread_flag(tsk, TIF_IA32))
683                 return NULL;
684 #endif
685         return &gate_vma;
686 }
687
688 int in_gate_area(struct task_struct *task, unsigned long addr)
689 {
690         struct vm_area_struct *vma = get_gate_vma(task);
691         if (!vma)
692                 return 0;
693         return (addr >= vma->vm_start) && (addr < vma->vm_end);
694 }
695
696 /* Use this when you have no reliable task/vma, typically from interrupt
697  * context.  It is less reliable than using the task's vma and may give
698  * false positives.
699  */
700 int in_gate_area_no_task(unsigned long addr)
701 {
702         return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
703 }
704
705 const char *arch_vma_name(struct vm_area_struct *vma)
706 {
707         if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
708                 return "[vdso]";
709         if (vma == &gate_vma)
710                 return "[vsyscall]";
711         return NULL;
712 }
713
714 #ifdef CONFIG_SPARSEMEM_VMEMMAP
715 /*
716  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
717  */
718 int __meminit vmemmap_populate(struct page *start_page,
719                                                 unsigned long size, int node)
720 {
721         unsigned long addr = (unsigned long)start_page;
722         unsigned long end = (unsigned long)(start_page + size);
723         unsigned long next;
724         pgd_t *pgd;
725         pud_t *pud;
726         pmd_t *pmd;
727
728         for (; addr < end; addr = next) {
729                 next = pmd_addr_end(addr, end);
730
731                 pgd = vmemmap_pgd_populate(addr, node);
732                 if (!pgd)
733                         return -ENOMEM;
734                 pud = vmemmap_pud_populate(pgd, addr, node);
735                 if (!pud)
736                         return -ENOMEM;
737
738                 pmd = pmd_offset(pud, addr);
739                 if (pmd_none(*pmd)) {
740                         pte_t entry;
741                         void *p = vmemmap_alloc_block(PMD_SIZE, node);
742                         if (!p)
743                                 return -ENOMEM;
744
745                         entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE);
746                         set_pmd(pmd, __pmd(pte_val(entry)));
747
748                         printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
749                                 addr, addr + PMD_SIZE - 1, p, node);
750                 } else
751                         vmemmap_verify((pte_t *)pmd, node, addr, next);
752         }
753
754         return 0;
755 }
756 #endif