]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/x86/mm/init_64.c
x86: move debug related declarations to kdebug.h
[linux-2.6-omap-h63xx.git] / arch / x86 / mm / init_64.c
1 /*
2  *  linux/arch/x86_64/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7  */
8
9 #include <linux/signal.h>
10 #include <linux/sched.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/pagemap.h>
22 #include <linux/bootmem.h>
23 #include <linux/proc_fs.h>
24 #include <linux/pci.h>
25 #include <linux/pfn.h>
26 #include <linux/poison.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/module.h>
29 #include <linux/memory_hotplug.h>
30 #include <linux/nmi.h>
31
32 #include <asm/processor.h>
33 #include <asm/system.h>
34 #include <asm/uaccess.h>
35 #include <asm/pgtable.h>
36 #include <asm/pgalloc.h>
37 #include <asm/dma.h>
38 #include <asm/fixmap.h>
39 #include <asm/e820.h>
40 #include <asm/apic.h>
41 #include <asm/tlb.h>
42 #include <asm/mmu_context.h>
43 #include <asm/proto.h>
44 #include <asm/smp.h>
45 #include <asm/sections.h>
46 #include <asm/kdebug.h>
47
48 #ifndef Dprintk
49 #define Dprintk(x...)
50 #endif
51
52 const struct dma_mapping_ops* dma_ops;
53 EXPORT_SYMBOL(dma_ops);
54
55 static unsigned long dma_reserve __initdata;
56
57 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
58
59 /*
60  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
61  * physical space so we can cache the place of the first one and move
62  * around without checking the pgd every time.
63  */
64
65 void show_mem(void)
66 {
67         long i, total = 0, reserved = 0;
68         long shared = 0, cached = 0;
69         pg_data_t *pgdat;
70         struct page *page;
71
72         printk(KERN_INFO "Mem-info:\n");
73         show_free_areas();
74         printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
75
76         for_each_online_pgdat(pgdat) {
77                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
78                         /* this loop can take a while with 256 GB and 4k pages
79                            so update the NMI watchdog */
80                         if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
81                                 touch_nmi_watchdog();
82                         }
83                         if (!pfn_valid(pgdat->node_start_pfn + i))
84                                 continue;
85                         page = pfn_to_page(pgdat->node_start_pfn + i);
86                         total++;
87                         if (PageReserved(page))
88                                 reserved++;
89                         else if (PageSwapCache(page))
90                                 cached++;
91                         else if (page_count(page))
92                                 shared += page_count(page) - 1;
93                }
94         }
95         printk(KERN_INFO "%lu pages of RAM\n", total);
96         printk(KERN_INFO "%lu reserved pages\n",reserved);
97         printk(KERN_INFO "%lu pages shared\n",shared);
98         printk(KERN_INFO "%lu pages swap cached\n",cached);
99 }
100
101 int after_bootmem;
102
103 static __init void *spp_getpage(void)
104
105         void *ptr;
106         if (after_bootmem)
107                 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
108         else
109                 ptr = alloc_bootmem_pages(PAGE_SIZE);
110         if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
111                 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
112
113         Dprintk("spp_getpage %p\n", ptr);
114         return ptr;
115
116
117 static __init void set_pte_phys(unsigned long vaddr,
118                          unsigned long phys, pgprot_t prot)
119 {
120         pgd_t *pgd;
121         pud_t *pud;
122         pmd_t *pmd;
123         pte_t *pte, new_pte;
124
125         Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
126
127         pgd = pgd_offset_k(vaddr);
128         if (pgd_none(*pgd)) {
129                 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
130                 return;
131         }
132         pud = pud_offset(pgd, vaddr);
133         if (pud_none(*pud)) {
134                 pmd = (pmd_t *) spp_getpage(); 
135                 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
136                 if (pmd != pmd_offset(pud, 0)) {
137                         printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
138                         return;
139                 }
140         }
141         pmd = pmd_offset(pud, vaddr);
142         if (pmd_none(*pmd)) {
143                 pte = (pte_t *) spp_getpage();
144                 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
145                 if (pte != pte_offset_kernel(pmd, 0)) {
146                         printk("PAGETABLE BUG #02!\n");
147                         return;
148                 }
149         }
150         new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
151
152         pte = pte_offset_kernel(pmd, vaddr);
153         if (!pte_none(*pte) &&
154             pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
155                 pte_ERROR(*pte);
156         set_pte(pte, new_pte);
157
158         /*
159          * It's enough to flush this one mapping.
160          * (PGE mappings get flushed as well)
161          */
162         __flush_tlb_one(vaddr);
163 }
164
165 /* NOTE: this is meant to be run only at boot */
166 void __init 
167 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
168 {
169         unsigned long address = __fix_to_virt(idx);
170
171         if (idx >= __end_of_fixed_addresses) {
172                 printk("Invalid __set_fixmap\n");
173                 return;
174         }
175         set_pte_phys(address, phys, prot);
176 }
177
178 unsigned long __meminitdata table_start, table_end;
179
180 static __meminit void *alloc_low_page(unsigned long *phys)
181
182         unsigned long pfn = table_end++;
183         void *adr;
184
185         if (after_bootmem) {
186                 adr = (void *)get_zeroed_page(GFP_ATOMIC);
187                 *phys = __pa(adr);
188                 return adr;
189         }
190
191         if (pfn >= end_pfn) 
192                 panic("alloc_low_page: ran out of memory"); 
193
194         adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
195         memset(adr, 0, PAGE_SIZE);
196         *phys  = pfn * PAGE_SIZE;
197         return adr;
198 }
199
200 static __meminit void unmap_low_page(void *adr)
201
202
203         if (after_bootmem)
204                 return;
205
206         early_iounmap(adr, PAGE_SIZE);
207
208
209 /* Must run before zap_low_mappings */
210 __meminit void *early_ioremap(unsigned long addr, unsigned long size)
211 {
212         unsigned long vaddr;
213         pmd_t *pmd, *last_pmd;
214         int i, pmds;
215
216         pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
217         vaddr = __START_KERNEL_map;
218         pmd = level2_kernel_pgt;
219         last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
220         for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
221                 for (i = 0; i < pmds; i++) {
222                         if (pmd_present(pmd[i]))
223                                 goto next;
224                 }
225                 vaddr += addr & ~PMD_MASK;
226                 addr &= PMD_MASK;
227                 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
228                         set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
229                 __flush_tlb();
230                 return (void *)vaddr;
231         next:
232                 ;
233         }
234         printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
235         return NULL;
236 }
237
238 /* To avoid virtual aliases later */
239 __meminit void early_iounmap(void *addr, unsigned long size)
240 {
241         unsigned long vaddr;
242         pmd_t *pmd;
243         int i, pmds;
244
245         vaddr = (unsigned long)addr;
246         pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
247         pmd = level2_kernel_pgt + pmd_index(vaddr);
248         for (i = 0; i < pmds; i++)
249                 pmd_clear(pmd + i);
250         __flush_tlb();
251 }
252
253 static void __meminit
254 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
255 {
256         int i = pmd_index(address);
257
258         for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
259                 unsigned long entry;
260                 pmd_t *pmd = pmd_page + pmd_index(address);
261
262                 if (address >= end) {
263                         if (!after_bootmem)
264                                 for (; i < PTRS_PER_PMD; i++, pmd++)
265                                         set_pmd(pmd, __pmd(0));
266                         break;
267                 }
268
269                 if (pmd_val(*pmd))
270                         continue;
271
272                 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
273                 entry &= __supported_pte_mask;
274                 set_pmd(pmd, __pmd(entry));
275         }
276 }
277
278 static void __meminit
279 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
280 {
281         pmd_t *pmd = pmd_offset(pud,0);
282         spin_lock(&init_mm.page_table_lock);
283         phys_pmd_init(pmd, address, end);
284         spin_unlock(&init_mm.page_table_lock);
285         __flush_tlb_all();
286 }
287
288 static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
289
290         int i = pud_index(addr);
291
292
293         for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
294                 unsigned long pmd_phys;
295                 pud_t *pud = pud_page + pud_index(addr);
296                 pmd_t *pmd;
297
298                 if (addr >= end)
299                         break;
300
301                 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
302                         set_pud(pud, __pud(0)); 
303                         continue;
304                 } 
305
306                 if (pud_val(*pud)) {
307                         phys_pmd_update(pud, addr, end);
308                         continue;
309                 }
310
311                 pmd = alloc_low_page(&pmd_phys);
312                 spin_lock(&init_mm.page_table_lock);
313                 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
314                 phys_pmd_init(pmd, addr, end);
315                 spin_unlock(&init_mm.page_table_lock);
316                 unmap_low_page(pmd);
317         }
318         __flush_tlb();
319
320
321 static void __init find_early_table_space(unsigned long end)
322 {
323         unsigned long puds, pmds, tables, start;
324
325         puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
326         pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
327         tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
328                  round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
329
330         /* RED-PEN putting page tables only on node 0 could
331            cause a hotspot and fill up ZONE_DMA. The page tables
332            need roughly 0.5KB per GB. */
333         start = 0x8000;
334         table_start = find_e820_area(start, end, tables);
335         if (table_start == -1UL)
336                 panic("Cannot find space for the kernel page tables");
337
338         table_start >>= PAGE_SHIFT;
339         table_end = table_start;
340
341         early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
342                 end, table_start << PAGE_SHIFT,
343                 (table_start << PAGE_SHIFT) + tables);
344 }
345
346 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
347    This runs before bootmem is initialized and gets pages directly from the 
348    physical memory. To access them they are temporarily mapped. */
349 void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
350
351         unsigned long next; 
352
353         Dprintk("init_memory_mapping\n");
354
355         /* 
356          * Find space for the kernel direct mapping tables.
357          * Later we should allocate these tables in the local node of the memory
358          * mapped.  Unfortunately this is done currently before the nodes are 
359          * discovered.
360          */
361         if (!after_bootmem)
362                 find_early_table_space(end);
363
364         start = (unsigned long)__va(start);
365         end = (unsigned long)__va(end);
366
367         for (; start < end; start = next) {
368                 unsigned long pud_phys; 
369                 pgd_t *pgd = pgd_offset_k(start);
370                 pud_t *pud;
371
372                 if (after_bootmem)
373                         pud = pud_offset(pgd, start & PGDIR_MASK);
374                 else
375                         pud = alloc_low_page(&pud_phys);
376
377                 next = start + PGDIR_SIZE;
378                 if (next > end) 
379                         next = end; 
380                 phys_pud_init(pud, __pa(start), __pa(next));
381                 if (!after_bootmem)
382                         set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
383                 unmap_low_page(pud);
384         } 
385
386         if (!after_bootmem)
387                 mmu_cr4_features = read_cr4();
388         __flush_tlb_all();
389 }
390
391 #ifndef CONFIG_NUMA
392 void __init paging_init(void)
393 {
394         unsigned long max_zone_pfns[MAX_NR_ZONES];
395         memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
396         max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
397         max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
398         max_zone_pfns[ZONE_NORMAL] = end_pfn;
399
400         memory_present(0, 0, end_pfn);
401         sparse_init();
402         free_area_init_nodes(max_zone_pfns);
403 }
404 #endif
405
406 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
407    from the CPU leading to inconsistent cache lines. address and size
408    must be aligned to 2MB boundaries. 
409    Does nothing when the mapping doesn't exist. */
410 void __init clear_kernel_mapping(unsigned long address, unsigned long size) 
411 {
412         unsigned long end = address + size;
413
414         BUG_ON(address & ~LARGE_PAGE_MASK);
415         BUG_ON(size & ~LARGE_PAGE_MASK); 
416         
417         for (; address < end; address += LARGE_PAGE_SIZE) { 
418                 pgd_t *pgd = pgd_offset_k(address);
419                 pud_t *pud;
420                 pmd_t *pmd;
421                 if (pgd_none(*pgd))
422                         continue;
423                 pud = pud_offset(pgd, address);
424                 if (pud_none(*pud))
425                         continue; 
426                 pmd = pmd_offset(pud, address);
427                 if (!pmd || pmd_none(*pmd))
428                         continue; 
429                 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 
430                         /* Could handle this, but it should not happen currently. */
431                         printk(KERN_ERR 
432                "clear_kernel_mapping: mapping has been split. will leak memory\n"); 
433                         pmd_ERROR(*pmd); 
434                 }
435                 set_pmd(pmd, __pmd(0));                 
436         }
437         __flush_tlb_all();
438
439
440 /*
441  * Memory hotplug specific functions
442  */
443 void online_page(struct page *page)
444 {
445         ClearPageReserved(page);
446         init_page_count(page);
447         __free_page(page);
448         totalram_pages++;
449         num_physpages++;
450 }
451
452 #ifdef CONFIG_MEMORY_HOTPLUG
453 /*
454  * Memory is added always to NORMAL zone. This means you will never get
455  * additional DMA/DMA32 memory.
456  */
457 int arch_add_memory(int nid, u64 start, u64 size)
458 {
459         struct pglist_data *pgdat = NODE_DATA(nid);
460         struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
461         unsigned long start_pfn = start >> PAGE_SHIFT;
462         unsigned long nr_pages = size >> PAGE_SHIFT;
463         int ret;
464
465         init_memory_mapping(start, (start + size -1));
466
467         ret = __add_pages(zone, start_pfn, nr_pages);
468         if (ret)
469                 goto error;
470
471         return ret;
472 error:
473         printk("%s: Problem encountered in __add_pages!\n", __func__);
474         return ret;
475 }
476 EXPORT_SYMBOL_GPL(arch_add_memory);
477
478 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
479 int memory_add_physaddr_to_nid(u64 start)
480 {
481         return 0;
482 }
483 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
484 #endif
485
486 #endif /* CONFIG_MEMORY_HOTPLUG */
487
488 #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
489 /*
490  * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
491  * just online the pages.
492  */
493 int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
494 {
495         int err = -EIO;
496         unsigned long pfn;
497         unsigned long total = 0, mem = 0;
498         for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
499                 if (pfn_valid(pfn)) {
500                         online_page(pfn_to_page(pfn));
501                         err = 0;
502                         mem++;
503                 }
504                 total++;
505         }
506         if (!err) {
507                 z->spanned_pages += total;
508                 z->present_pages += mem;
509                 z->zone_pgdat->node_spanned_pages += total;
510                 z->zone_pgdat->node_present_pages += mem;
511         }
512         return err;
513 }
514 #endif
515
516 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
517                          kcore_vsyscall;
518
519 void __init mem_init(void)
520 {
521         long codesize, reservedpages, datasize, initsize;
522
523         pci_iommu_alloc();
524
525         /* clear the zero-page */
526         memset(empty_zero_page, 0, PAGE_SIZE);
527
528         reservedpages = 0;
529
530         /* this will put all low memory onto the freelists */
531 #ifdef CONFIG_NUMA
532         totalram_pages = numa_free_all_bootmem();
533 #else
534         totalram_pages = free_all_bootmem();
535 #endif
536         reservedpages = end_pfn - totalram_pages -
537                                         absent_pages_in_range(0, end_pfn);
538
539         after_bootmem = 1;
540
541         codesize =  (unsigned long) &_etext - (unsigned long) &_text;
542         datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
543         initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
544
545         /* Register memory areas for /proc/kcore */
546         kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
547         kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
548                    VMALLOC_END-VMALLOC_START);
549         kclist_add(&kcore_kernel, &_stext, _end - _stext);
550         kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
551         kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 
552                                  VSYSCALL_END - VSYSCALL_START);
553
554         printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
555                 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
556                 end_pfn << (PAGE_SHIFT-10),
557                 codesize >> 10,
558                 reservedpages << (PAGE_SHIFT-10),
559                 datasize >> 10,
560                 initsize >> 10);
561 }
562
563 void free_init_pages(char *what, unsigned long begin, unsigned long end)
564 {
565         unsigned long addr;
566
567         if (begin >= end)
568                 return;
569
570         printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
571         for (addr = begin; addr < end; addr += PAGE_SIZE) {
572                 ClearPageReserved(virt_to_page(addr));
573                 init_page_count(virt_to_page(addr));
574                 memset((void *)(addr & ~(PAGE_SIZE-1)),
575                         POISON_FREE_INITMEM, PAGE_SIZE);
576                 if (addr >= __START_KERNEL_map)
577                         change_page_attr_addr(addr, 1, __pgprot(0));
578                 free_page(addr);
579                 totalram_pages++;
580         }
581         if (addr > __START_KERNEL_map)
582                 global_flush_tlb();
583 }
584
585 void free_initmem(void)
586 {
587         free_init_pages("unused kernel memory",
588                         (unsigned long)(&__init_begin),
589                         (unsigned long)(&__init_end));
590 }
591
592 #ifdef CONFIG_DEBUG_RODATA
593
594 void mark_rodata_ro(void)
595 {
596         unsigned long start = (unsigned long)_stext, end;
597
598 #ifdef CONFIG_HOTPLUG_CPU
599         /* It must still be possible to apply SMP alternatives. */
600         if (num_possible_cpus() > 1)
601                 start = (unsigned long)_etext;
602 #endif
603
604 #ifdef CONFIG_KPROBES
605         start = (unsigned long)__start_rodata;
606 #endif
607         
608         end = (unsigned long)__end_rodata;
609         start = (start + PAGE_SIZE - 1) & PAGE_MASK;
610         end &= PAGE_MASK;
611         if (end <= start)
612                 return;
613
614         change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
615
616         printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
617                (end - start) >> 10);
618
619         /*
620          * change_page_attr_addr() requires a global_flush_tlb() call after it.
621          * We do this after the printk so that if something went wrong in the
622          * change, the printk gets out at least to give a better debug hint
623          * of who is the culprit.
624          */
625         global_flush_tlb();
626 }
627 #endif
628
629 #ifdef CONFIG_BLK_DEV_INITRD
630 void free_initrd_mem(unsigned long start, unsigned long end)
631 {
632         free_init_pages("initrd memory", start, end);
633 }
634 #endif
635
636 void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
637
638 #ifdef CONFIG_NUMA
639         int nid = phys_to_nid(phys);
640 #endif
641         unsigned long pfn = phys >> PAGE_SHIFT;
642         if (pfn >= end_pfn) {
643                 /* This can happen with kdump kernels when accessing firmware
644                    tables. */
645                 if (pfn < end_pfn_map)
646                         return;
647                 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
648                                 phys, len);
649                 return;
650         }
651
652         /* Should check here against the e820 map to avoid double free */
653 #ifdef CONFIG_NUMA
654         reserve_bootmem_node(NODE_DATA(nid), phys, len);
655 #else                   
656         reserve_bootmem(phys, len);    
657 #endif
658         if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
659                 dma_reserve += len / PAGE_SIZE;
660                 set_dma_reserve(dma_reserve);
661         }
662 }
663
664 int kern_addr_valid(unsigned long addr) 
665
666         unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
667        pgd_t *pgd;
668        pud_t *pud;
669        pmd_t *pmd;
670        pte_t *pte;
671
672         if (above != 0 && above != -1UL)
673                 return 0; 
674         
675         pgd = pgd_offset_k(addr);
676         if (pgd_none(*pgd))
677                 return 0;
678
679         pud = pud_offset(pgd, addr);
680         if (pud_none(*pud))
681                 return 0; 
682
683         pmd = pmd_offset(pud, addr);
684         if (pmd_none(*pmd))
685                 return 0;
686         if (pmd_large(*pmd))
687                 return pfn_valid(pmd_pfn(*pmd));
688
689         pte = pte_offset_kernel(pmd, addr);
690         if (pte_none(*pte))
691                 return 0;
692         return pfn_valid(pte_pfn(*pte));
693 }
694
695 /* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
696    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
697    not need special handling anymore. */
698
699 static struct vm_area_struct gate_vma = {
700         .vm_start = VSYSCALL_START,
701         .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
702         .vm_page_prot = PAGE_READONLY_EXEC,
703         .vm_flags = VM_READ | VM_EXEC
704 };
705
706 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
707 {
708 #ifdef CONFIG_IA32_EMULATION
709         if (test_tsk_thread_flag(tsk, TIF_IA32))
710                 return NULL;
711 #endif
712         return &gate_vma;
713 }
714
715 int in_gate_area(struct task_struct *task, unsigned long addr)
716 {
717         struct vm_area_struct *vma = get_gate_vma(task);
718         if (!vma)
719                 return 0;
720         return (addr >= vma->vm_start) && (addr < vma->vm_end);
721 }
722
723 /* Use this when you have no reliable task/vma, typically from interrupt
724  * context.  It is less reliable than using the task's vma and may give
725  * false positives.
726  */
727 int in_gate_area_no_task(unsigned long addr)
728 {
729         return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
730 }
731
732 const char *arch_vma_name(struct vm_area_struct *vma)
733 {
734         if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
735                 return "[vdso]";
736         if (vma == &gate_vma)
737                 return "[vsyscall]";
738         return NULL;
739 }
740
741 #ifdef CONFIG_SPARSEMEM_VMEMMAP
742 /*
743  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
744  */
745 int __meminit vmemmap_populate(struct page *start_page,
746                                                 unsigned long size, int node)
747 {
748         unsigned long addr = (unsigned long)start_page;
749         unsigned long end = (unsigned long)(start_page + size);
750         unsigned long next;
751         pgd_t *pgd;
752         pud_t *pud;
753         pmd_t *pmd;
754
755         for (; addr < end; addr = next) {
756                 next = pmd_addr_end(addr, end);
757
758                 pgd = vmemmap_pgd_populate(addr, node);
759                 if (!pgd)
760                         return -ENOMEM;
761                 pud = vmemmap_pud_populate(pgd, addr, node);
762                 if (!pud)
763                         return -ENOMEM;
764
765                 pmd = pmd_offset(pud, addr);
766                 if (pmd_none(*pmd)) {
767                         pte_t entry;
768                         void *p = vmemmap_alloc_block(PMD_SIZE, node);
769                         if (!p)
770                                 return -ENOMEM;
771
772                         entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
773                         mk_pte_huge(entry);
774                         set_pmd(pmd, __pmd(pte_val(entry)));
775
776                         printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
777                                 addr, addr + PMD_SIZE - 1, p, node);
778                 } else
779                         vmemmap_verify((pte_t *)pmd, node, addr, next);
780         }
781
782         return 0;
783 }
784 #endif