]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - arch/x86/mm/init_64.c
x86: move numa related declarations
[linux-2.6-omap-h63xx.git] / arch / x86 / mm / init_64.c
1 /*
2  *  linux/arch/x86_64/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7  */
8
9 #include <linux/signal.h>
10 #include <linux/sched.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/pagemap.h>
22 #include <linux/bootmem.h>
23 #include <linux/proc_fs.h>
24 #include <linux/pci.h>
25 #include <linux/pfn.h>
26 #include <linux/poison.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/module.h>
29 #include <linux/memory_hotplug.h>
30 #include <linux/nmi.h>
31
32 #include <asm/processor.h>
33 #include <asm/system.h>
34 #include <asm/uaccess.h>
35 #include <asm/pgtable.h>
36 #include <asm/pgalloc.h>
37 #include <asm/dma.h>
38 #include <asm/fixmap.h>
39 #include <asm/e820.h>
40 #include <asm/apic.h>
41 #include <asm/tlb.h>
42 #include <asm/mmu_context.h>
43 #include <asm/proto.h>
44 #include <asm/smp.h>
45 #include <asm/sections.h>
46 #include <asm/kdebug.h>
47 #include <asm/numa.h>
48
49 #ifndef Dprintk
50 #define Dprintk(x...)
51 #endif
52
53 const struct dma_mapping_ops* dma_ops;
54 EXPORT_SYMBOL(dma_ops);
55
56 static unsigned long dma_reserve __initdata;
57
58 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
59
60 /*
61  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
62  * physical space so we can cache the place of the first one and move
63  * around without checking the pgd every time.
64  */
65
66 void show_mem(void)
67 {
68         long i, total = 0, reserved = 0;
69         long shared = 0, cached = 0;
70         pg_data_t *pgdat;
71         struct page *page;
72
73         printk(KERN_INFO "Mem-info:\n");
74         show_free_areas();
75         printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
76
77         for_each_online_pgdat(pgdat) {
78                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
79                         /* this loop can take a while with 256 GB and 4k pages
80                            so update the NMI watchdog */
81                         if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
82                                 touch_nmi_watchdog();
83                         }
84                         if (!pfn_valid(pgdat->node_start_pfn + i))
85                                 continue;
86                         page = pfn_to_page(pgdat->node_start_pfn + i);
87                         total++;
88                         if (PageReserved(page))
89                                 reserved++;
90                         else if (PageSwapCache(page))
91                                 cached++;
92                         else if (page_count(page))
93                                 shared += page_count(page) - 1;
94                }
95         }
96         printk(KERN_INFO "%lu pages of RAM\n", total);
97         printk(KERN_INFO "%lu reserved pages\n",reserved);
98         printk(KERN_INFO "%lu pages shared\n",shared);
99         printk(KERN_INFO "%lu pages swap cached\n",cached);
100 }
101
102 int after_bootmem;
103
104 static __init void *spp_getpage(void)
105
106         void *ptr;
107         if (after_bootmem)
108                 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
109         else
110                 ptr = alloc_bootmem_pages(PAGE_SIZE);
111         if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
112                 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
113
114         Dprintk("spp_getpage %p\n", ptr);
115         return ptr;
116
117
118 static __init void set_pte_phys(unsigned long vaddr,
119                          unsigned long phys, pgprot_t prot)
120 {
121         pgd_t *pgd;
122         pud_t *pud;
123         pmd_t *pmd;
124         pte_t *pte, new_pte;
125
126         Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
127
128         pgd = pgd_offset_k(vaddr);
129         if (pgd_none(*pgd)) {
130                 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
131                 return;
132         }
133         pud = pud_offset(pgd, vaddr);
134         if (pud_none(*pud)) {
135                 pmd = (pmd_t *) spp_getpage(); 
136                 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
137                 if (pmd != pmd_offset(pud, 0)) {
138                         printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
139                         return;
140                 }
141         }
142         pmd = pmd_offset(pud, vaddr);
143         if (pmd_none(*pmd)) {
144                 pte = (pte_t *) spp_getpage();
145                 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
146                 if (pte != pte_offset_kernel(pmd, 0)) {
147                         printk("PAGETABLE BUG #02!\n");
148                         return;
149                 }
150         }
151         new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
152
153         pte = pte_offset_kernel(pmd, vaddr);
154         if (!pte_none(*pte) &&
155             pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
156                 pte_ERROR(*pte);
157         set_pte(pte, new_pte);
158
159         /*
160          * It's enough to flush this one mapping.
161          * (PGE mappings get flushed as well)
162          */
163         __flush_tlb_one(vaddr);
164 }
165
166 /* NOTE: this is meant to be run only at boot */
167 void __init 
168 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
169 {
170         unsigned long address = __fix_to_virt(idx);
171
172         if (idx >= __end_of_fixed_addresses) {
173                 printk("Invalid __set_fixmap\n");
174                 return;
175         }
176         set_pte_phys(address, phys, prot);
177 }
178
179 unsigned long __meminitdata table_start, table_end;
180
181 static __meminit void *alloc_low_page(unsigned long *phys)
182
183         unsigned long pfn = table_end++;
184         void *adr;
185
186         if (after_bootmem) {
187                 adr = (void *)get_zeroed_page(GFP_ATOMIC);
188                 *phys = __pa(adr);
189                 return adr;
190         }
191
192         if (pfn >= end_pfn) 
193                 panic("alloc_low_page: ran out of memory"); 
194
195         adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
196         memset(adr, 0, PAGE_SIZE);
197         *phys  = pfn * PAGE_SIZE;
198         return adr;
199 }
200
201 static __meminit void unmap_low_page(void *adr)
202
203
204         if (after_bootmem)
205                 return;
206
207         early_iounmap(adr, PAGE_SIZE);
208
209
210 /* Must run before zap_low_mappings */
211 __meminit void *early_ioremap(unsigned long addr, unsigned long size)
212 {
213         unsigned long vaddr;
214         pmd_t *pmd, *last_pmd;
215         int i, pmds;
216
217         pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
218         vaddr = __START_KERNEL_map;
219         pmd = level2_kernel_pgt;
220         last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
221         for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
222                 for (i = 0; i < pmds; i++) {
223                         if (pmd_present(pmd[i]))
224                                 goto next;
225                 }
226                 vaddr += addr & ~PMD_MASK;
227                 addr &= PMD_MASK;
228                 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
229                         set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
230                 __flush_tlb();
231                 return (void *)vaddr;
232         next:
233                 ;
234         }
235         printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
236         return NULL;
237 }
238
239 /* To avoid virtual aliases later */
240 __meminit void early_iounmap(void *addr, unsigned long size)
241 {
242         unsigned long vaddr;
243         pmd_t *pmd;
244         int i, pmds;
245
246         vaddr = (unsigned long)addr;
247         pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
248         pmd = level2_kernel_pgt + pmd_index(vaddr);
249         for (i = 0; i < pmds; i++)
250                 pmd_clear(pmd + i);
251         __flush_tlb();
252 }
253
254 static void __meminit
255 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
256 {
257         int i = pmd_index(address);
258
259         for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
260                 unsigned long entry;
261                 pmd_t *pmd = pmd_page + pmd_index(address);
262
263                 if (address >= end) {
264                         if (!after_bootmem)
265                                 for (; i < PTRS_PER_PMD; i++, pmd++)
266                                         set_pmd(pmd, __pmd(0));
267                         break;
268                 }
269
270                 if (pmd_val(*pmd))
271                         continue;
272
273                 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
274                 entry &= __supported_pte_mask;
275                 set_pmd(pmd, __pmd(entry));
276         }
277 }
278
279 static void __meminit
280 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
281 {
282         pmd_t *pmd = pmd_offset(pud,0);
283         spin_lock(&init_mm.page_table_lock);
284         phys_pmd_init(pmd, address, end);
285         spin_unlock(&init_mm.page_table_lock);
286         __flush_tlb_all();
287 }
288
289 static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
290
291         int i = pud_index(addr);
292
293
294         for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
295                 unsigned long pmd_phys;
296                 pud_t *pud = pud_page + pud_index(addr);
297                 pmd_t *pmd;
298
299                 if (addr >= end)
300                         break;
301
302                 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
303                         set_pud(pud, __pud(0)); 
304                         continue;
305                 } 
306
307                 if (pud_val(*pud)) {
308                         phys_pmd_update(pud, addr, end);
309                         continue;
310                 }
311
312                 pmd = alloc_low_page(&pmd_phys);
313                 spin_lock(&init_mm.page_table_lock);
314                 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
315                 phys_pmd_init(pmd, addr, end);
316                 spin_unlock(&init_mm.page_table_lock);
317                 unmap_low_page(pmd);
318         }
319         __flush_tlb();
320
321
322 static void __init find_early_table_space(unsigned long end)
323 {
324         unsigned long puds, pmds, tables, start;
325
326         puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
327         pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
328         tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
329                  round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
330
331         /* RED-PEN putting page tables only on node 0 could
332            cause a hotspot and fill up ZONE_DMA. The page tables
333            need roughly 0.5KB per GB. */
334         start = 0x8000;
335         table_start = find_e820_area(start, end, tables);
336         if (table_start == -1UL)
337                 panic("Cannot find space for the kernel page tables");
338
339         table_start >>= PAGE_SHIFT;
340         table_end = table_start;
341
342         early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
343                 end, table_start << PAGE_SHIFT,
344                 (table_start << PAGE_SHIFT) + tables);
345 }
346
347 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
348    This runs before bootmem is initialized and gets pages directly from the 
349    physical memory. To access them they are temporarily mapped. */
350 void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
351
352         unsigned long next; 
353
354         Dprintk("init_memory_mapping\n");
355
356         /* 
357          * Find space for the kernel direct mapping tables.
358          * Later we should allocate these tables in the local node of the memory
359          * mapped.  Unfortunately this is done currently before the nodes are 
360          * discovered.
361          */
362         if (!after_bootmem)
363                 find_early_table_space(end);
364
365         start = (unsigned long)__va(start);
366         end = (unsigned long)__va(end);
367
368         for (; start < end; start = next) {
369                 unsigned long pud_phys; 
370                 pgd_t *pgd = pgd_offset_k(start);
371                 pud_t *pud;
372
373                 if (after_bootmem)
374                         pud = pud_offset(pgd, start & PGDIR_MASK);
375                 else
376                         pud = alloc_low_page(&pud_phys);
377
378                 next = start + PGDIR_SIZE;
379                 if (next > end) 
380                         next = end; 
381                 phys_pud_init(pud, __pa(start), __pa(next));
382                 if (!after_bootmem)
383                         set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
384                 unmap_low_page(pud);
385         } 
386
387         if (!after_bootmem)
388                 mmu_cr4_features = read_cr4();
389         __flush_tlb_all();
390 }
391
392 #ifndef CONFIG_NUMA
393 void __init paging_init(void)
394 {
395         unsigned long max_zone_pfns[MAX_NR_ZONES];
396         memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
397         max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
398         max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
399         max_zone_pfns[ZONE_NORMAL] = end_pfn;
400
401         memory_present(0, 0, end_pfn);
402         sparse_init();
403         free_area_init_nodes(max_zone_pfns);
404 }
405 #endif
406
407 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
408    from the CPU leading to inconsistent cache lines. address and size
409    must be aligned to 2MB boundaries. 
410    Does nothing when the mapping doesn't exist. */
411 void __init clear_kernel_mapping(unsigned long address, unsigned long size) 
412 {
413         unsigned long end = address + size;
414
415         BUG_ON(address & ~LARGE_PAGE_MASK);
416         BUG_ON(size & ~LARGE_PAGE_MASK); 
417         
418         for (; address < end; address += LARGE_PAGE_SIZE) { 
419                 pgd_t *pgd = pgd_offset_k(address);
420                 pud_t *pud;
421                 pmd_t *pmd;
422                 if (pgd_none(*pgd))
423                         continue;
424                 pud = pud_offset(pgd, address);
425                 if (pud_none(*pud))
426                         continue; 
427                 pmd = pmd_offset(pud, address);
428                 if (!pmd || pmd_none(*pmd))
429                         continue; 
430                 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 
431                         /* Could handle this, but it should not happen currently. */
432                         printk(KERN_ERR 
433                "clear_kernel_mapping: mapping has been split. will leak memory\n"); 
434                         pmd_ERROR(*pmd); 
435                 }
436                 set_pmd(pmd, __pmd(0));                 
437         }
438         __flush_tlb_all();
439
440
441 /*
442  * Memory hotplug specific functions
443  */
444 void online_page(struct page *page)
445 {
446         ClearPageReserved(page);
447         init_page_count(page);
448         __free_page(page);
449         totalram_pages++;
450         num_physpages++;
451 }
452
453 #ifdef CONFIG_MEMORY_HOTPLUG
454 /*
455  * Memory is added always to NORMAL zone. This means you will never get
456  * additional DMA/DMA32 memory.
457  */
458 int arch_add_memory(int nid, u64 start, u64 size)
459 {
460         struct pglist_data *pgdat = NODE_DATA(nid);
461         struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
462         unsigned long start_pfn = start >> PAGE_SHIFT;
463         unsigned long nr_pages = size >> PAGE_SHIFT;
464         int ret;
465
466         init_memory_mapping(start, (start + size -1));
467
468         ret = __add_pages(zone, start_pfn, nr_pages);
469         if (ret)
470                 goto error;
471
472         return ret;
473 error:
474         printk("%s: Problem encountered in __add_pages!\n", __func__);
475         return ret;
476 }
477 EXPORT_SYMBOL_GPL(arch_add_memory);
478
479 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
480 int memory_add_physaddr_to_nid(u64 start)
481 {
482         return 0;
483 }
484 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
485 #endif
486
487 #endif /* CONFIG_MEMORY_HOTPLUG */
488
489 #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
490 /*
491  * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
492  * just online the pages.
493  */
494 int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
495 {
496         int err = -EIO;
497         unsigned long pfn;
498         unsigned long total = 0, mem = 0;
499         for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
500                 if (pfn_valid(pfn)) {
501                         online_page(pfn_to_page(pfn));
502                         err = 0;
503                         mem++;
504                 }
505                 total++;
506         }
507         if (!err) {
508                 z->spanned_pages += total;
509                 z->present_pages += mem;
510                 z->zone_pgdat->node_spanned_pages += total;
511                 z->zone_pgdat->node_present_pages += mem;
512         }
513         return err;
514 }
515 #endif
516
517 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
518                          kcore_vsyscall;
519
520 void __init mem_init(void)
521 {
522         long codesize, reservedpages, datasize, initsize;
523
524         pci_iommu_alloc();
525
526         /* clear the zero-page */
527         memset(empty_zero_page, 0, PAGE_SIZE);
528
529         reservedpages = 0;
530
531         /* this will put all low memory onto the freelists */
532 #ifdef CONFIG_NUMA
533         totalram_pages = numa_free_all_bootmem();
534 #else
535         totalram_pages = free_all_bootmem();
536 #endif
537         reservedpages = end_pfn - totalram_pages -
538                                         absent_pages_in_range(0, end_pfn);
539
540         after_bootmem = 1;
541
542         codesize =  (unsigned long) &_etext - (unsigned long) &_text;
543         datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
544         initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
545
546         /* Register memory areas for /proc/kcore */
547         kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
548         kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
549                    VMALLOC_END-VMALLOC_START);
550         kclist_add(&kcore_kernel, &_stext, _end - _stext);
551         kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
552         kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 
553                                  VSYSCALL_END - VSYSCALL_START);
554
555         printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
556                 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
557                 end_pfn << (PAGE_SHIFT-10),
558                 codesize >> 10,
559                 reservedpages << (PAGE_SHIFT-10),
560                 datasize >> 10,
561                 initsize >> 10);
562 }
563
564 void free_init_pages(char *what, unsigned long begin, unsigned long end)
565 {
566         unsigned long addr;
567
568         if (begin >= end)
569                 return;
570
571         printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
572         for (addr = begin; addr < end; addr += PAGE_SIZE) {
573                 ClearPageReserved(virt_to_page(addr));
574                 init_page_count(virt_to_page(addr));
575                 memset((void *)(addr & ~(PAGE_SIZE-1)),
576                         POISON_FREE_INITMEM, PAGE_SIZE);
577                 if (addr >= __START_KERNEL_map)
578                         change_page_attr_addr(addr, 1, __pgprot(0));
579                 free_page(addr);
580                 totalram_pages++;
581         }
582         if (addr > __START_KERNEL_map)
583                 global_flush_tlb();
584 }
585
586 void free_initmem(void)
587 {
588         free_init_pages("unused kernel memory",
589                         (unsigned long)(&__init_begin),
590                         (unsigned long)(&__init_end));
591 }
592
593 #ifdef CONFIG_DEBUG_RODATA
594
595 void mark_rodata_ro(void)
596 {
597         unsigned long start = (unsigned long)_stext, end;
598
599 #ifdef CONFIG_HOTPLUG_CPU
600         /* It must still be possible to apply SMP alternatives. */
601         if (num_possible_cpus() > 1)
602                 start = (unsigned long)_etext;
603 #endif
604
605 #ifdef CONFIG_KPROBES
606         start = (unsigned long)__start_rodata;
607 #endif
608         
609         end = (unsigned long)__end_rodata;
610         start = (start + PAGE_SIZE - 1) & PAGE_MASK;
611         end &= PAGE_MASK;
612         if (end <= start)
613                 return;
614
615         change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
616
617         printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
618                (end - start) >> 10);
619
620         /*
621          * change_page_attr_addr() requires a global_flush_tlb() call after it.
622          * We do this after the printk so that if something went wrong in the
623          * change, the printk gets out at least to give a better debug hint
624          * of who is the culprit.
625          */
626         global_flush_tlb();
627 }
628 #endif
629
630 #ifdef CONFIG_BLK_DEV_INITRD
631 void free_initrd_mem(unsigned long start, unsigned long end)
632 {
633         free_init_pages("initrd memory", start, end);
634 }
635 #endif
636
637 void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
638
639 #ifdef CONFIG_NUMA
640         int nid = phys_to_nid(phys);
641 #endif
642         unsigned long pfn = phys >> PAGE_SHIFT;
643         if (pfn >= end_pfn) {
644                 /* This can happen with kdump kernels when accessing firmware
645                    tables. */
646                 if (pfn < end_pfn_map)
647                         return;
648                 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
649                                 phys, len);
650                 return;
651         }
652
653         /* Should check here against the e820 map to avoid double free */
654 #ifdef CONFIG_NUMA
655         reserve_bootmem_node(NODE_DATA(nid), phys, len);
656 #else                   
657         reserve_bootmem(phys, len);    
658 #endif
659         if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
660                 dma_reserve += len / PAGE_SIZE;
661                 set_dma_reserve(dma_reserve);
662         }
663 }
664
665 int kern_addr_valid(unsigned long addr) 
666
667         unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
668        pgd_t *pgd;
669        pud_t *pud;
670        pmd_t *pmd;
671        pte_t *pte;
672
673         if (above != 0 && above != -1UL)
674                 return 0; 
675         
676         pgd = pgd_offset_k(addr);
677         if (pgd_none(*pgd))
678                 return 0;
679
680         pud = pud_offset(pgd, addr);
681         if (pud_none(*pud))
682                 return 0; 
683
684         pmd = pmd_offset(pud, addr);
685         if (pmd_none(*pmd))
686                 return 0;
687         if (pmd_large(*pmd))
688                 return pfn_valid(pmd_pfn(*pmd));
689
690         pte = pte_offset_kernel(pmd, addr);
691         if (pte_none(*pte))
692                 return 0;
693         return pfn_valid(pte_pfn(*pte));
694 }
695
696 /* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
697    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
698    not need special handling anymore. */
699
700 static struct vm_area_struct gate_vma = {
701         .vm_start = VSYSCALL_START,
702         .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
703         .vm_page_prot = PAGE_READONLY_EXEC,
704         .vm_flags = VM_READ | VM_EXEC
705 };
706
707 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
708 {
709 #ifdef CONFIG_IA32_EMULATION
710         if (test_tsk_thread_flag(tsk, TIF_IA32))
711                 return NULL;
712 #endif
713         return &gate_vma;
714 }
715
716 int in_gate_area(struct task_struct *task, unsigned long addr)
717 {
718         struct vm_area_struct *vma = get_gate_vma(task);
719         if (!vma)
720                 return 0;
721         return (addr >= vma->vm_start) && (addr < vma->vm_end);
722 }
723
724 /* Use this when you have no reliable task/vma, typically from interrupt
725  * context.  It is less reliable than using the task's vma and may give
726  * false positives.
727  */
728 int in_gate_area_no_task(unsigned long addr)
729 {
730         return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
731 }
732
733 const char *arch_vma_name(struct vm_area_struct *vma)
734 {
735         if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
736                 return "[vdso]";
737         if (vma == &gate_vma)
738                 return "[vsyscall]";
739         return NULL;
740 }
741
742 #ifdef CONFIG_SPARSEMEM_VMEMMAP
743 /*
744  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
745  */
746 int __meminit vmemmap_populate(struct page *start_page,
747                                                 unsigned long size, int node)
748 {
749         unsigned long addr = (unsigned long)start_page;
750         unsigned long end = (unsigned long)(start_page + size);
751         unsigned long next;
752         pgd_t *pgd;
753         pud_t *pud;
754         pmd_t *pmd;
755
756         for (; addr < end; addr = next) {
757                 next = pmd_addr_end(addr, end);
758
759                 pgd = vmemmap_pgd_populate(addr, node);
760                 if (!pgd)
761                         return -ENOMEM;
762                 pud = vmemmap_pud_populate(pgd, addr, node);
763                 if (!pud)
764                         return -ENOMEM;
765
766                 pmd = pmd_offset(pud, addr);
767                 if (pmd_none(*pmd)) {
768                         pte_t entry;
769                         void *p = vmemmap_alloc_block(PMD_SIZE, node);
770                         if (!p)
771                                 return -ENOMEM;
772
773                         entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
774                         mk_pte_huge(entry);
775                         set_pmd(pmd, __pmd(pte_val(entry)));
776
777                         printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
778                                 addr, addr + PMD_SIZE - 1, p, node);
779                 } else
780                         vmemmap_verify((pte_t *)pmd, node, addr, next);
781         }
782
783         return 0;
784 }
785 #endif