From fe770bf0310d90b3b033c19044d45b7de5f2041c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 17 Apr 2008 17:40:45 +0200 Subject: [PATCH] x86: clean up the page table dumper and add 32-bit support Clean up the page table dumper (fix boundary conditions, table driven address ranges, some formatting changes since it is no longer using the kernel log but a separate virtual file), and generalize to 32 bits. [ mingo@elte.hu: x86: fix the pagetable dumper ] Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 2 +- arch/x86/mm/Makefile | 2 +- arch/x86/mm/dump_pagetables.c | 301 ++++++++++++++++++++-------------- 3 files changed, 179 insertions(+), 126 deletions(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index cb7002eca88..7ce8e702566 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -56,7 +56,7 @@ config DEBUG_PER_CPU_MAPS config X86_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" - depends on X86_64 + depends on DEBUG_KERNEL select DEBUG_FS help Say Y here if you want to show the kernel pagetable layout in a diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 28632f42ca6..9ab9889863f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -3,6 +3,7 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o obj-$(CONFIG_X86_32) += pgtable_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o @@ -12,5 +13,4 @@ else obj-$(CONFIG_NUMA) += numa_64.o obj-$(CONFIG_K8_NUMA) += k8topology_64.o obj-$(CONFIG_ACPI_NUMA) += srat_64.o -obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o endif diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 5e7f6430c27..6d840338f80 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -12,9 +12,10 @@ * of the License. */ +#include +#include #include #include -#include #include @@ -28,73 +29,107 @@ struct pg_state { pgprot_t current_prot; unsigned long start_address; unsigned long current_address; - int printed_vmalloc; - int printed_modules; - int printed_vmemmap; - int printed_highmap; + const struct addr_marker *marker; }; -/* Multipliers for offsets within the PTEs */ -#define LEVEL_4_MULT (PAGE_SIZE) -#define LEVEL_3_MULT (512UL * LEVEL_4_MULT) -#define LEVEL_2_MULT (512UL * LEVEL_3_MULT) -#define LEVEL_1_MULT (512UL * LEVEL_2_MULT) +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +/* Address space markers hints */ +static struct addr_marker address_markers[] = { + { 0, "User Space" }, +#ifdef CONFIG_X86_64 + { 0x8000000000000000UL, "Kernel Space" }, + { 0xffff810000000000UL, "Low Kernel Mapping" }, + { VMALLOC_START, "vmalloc() Area" }, + { MODULES_VADDR, "Modules" }, + { MODULES_END, "End Modules" }, + { VMEMMAP_START, "Vmemmap" }, + { __START_KERNEL_map, "High Kernel Mapping" }, +#else + { PAGE_OFFSET, "Kernel Mapping" }, + { 0/* VMALLOC_START */, "vmalloc() Area" }, + { 0/*VMALLOC_END*/, "vmalloc() End" }, +# ifdef CONFIG_HIGHMEM + { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, +# endif + { 0/*FIXADDR_START*/, "Fixmap Area" }, +#endif + { -1, NULL } /* End of list */ +}; +/* Multipliers for offsets within the PTEs */ +#define PTE_LEVEL_MULT (PAGE_SIZE) +#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) +#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) +#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) /* * Print a readable form of a pgprot_t to the seq_file */ static void printk_prot(struct seq_file *m, pgprot_t prot, int level) { - unsigned long pr = pgprot_val(prot); - - if (pr & _PAGE_USER) - seq_printf(m, "USR "); - else - seq_printf(m, " "); - if (pr & _PAGE_RW) - seq_printf(m, "RW "); - else - seq_printf(m, "ro "); - if (pr & _PAGE_PWT) - seq_printf(m, "PWT "); - else - seq_printf(m, " "); - if (pr & _PAGE_PCD) - seq_printf(m, "PCD "); - else - seq_printf(m, " "); - - /* Bit 9 has a different meaning on level 3 vs 4 */ - if (level <= 3) { - if (pr & _PAGE_PSE) - seq_printf(m, "PSE "); + pgprotval_t pr = pgprot_val(prot); + static const char * const level_name[] = + { "cr3", "pgd", "pud", "pmd", "pte" }; + + if (!pgprot_val(prot)) { + /* Not present */ + seq_printf(m, " "); + } else { + if (pr & _PAGE_USER) + seq_printf(m, "USR "); else seq_printf(m, " "); - } else { - if (pr & _PAGE_PAT) - seq_printf(m, "pat "); + if (pr & _PAGE_RW) + seq_printf(m, "RW "); + else + seq_printf(m, "ro "); + if (pr & _PAGE_PWT) + seq_printf(m, "PWT "); + else + seq_printf(m, " "); + if (pr & _PAGE_PCD) + seq_printf(m, "PCD "); else seq_printf(m, " "); + + /* Bit 9 has a different meaning on level 3 vs 4 */ + if (level <= 3) { + if (pr & _PAGE_PSE) + seq_printf(m, "PSE "); + else + seq_printf(m, " "); + } else { + if (pr & _PAGE_PAT) + seq_printf(m, "pat "); + else + seq_printf(m, " "); + } + if (pr & _PAGE_GLOBAL) + seq_printf(m, "GLB "); + else + seq_printf(m, " "); + if (pr & _PAGE_NX) + seq_printf(m, "NX "); + else + seq_printf(m, "x "); } - if (pr & _PAGE_GLOBAL) - seq_printf(m, "GLB "); - else - seq_printf(m, " "); - if (pr & _PAGE_NX) - seq_printf(m, "NX "); - else - seq_printf(m, "x "); + seq_printf(m, "%s\n", level_name[level]); } /* - * Sign-extend the 48 bit address to 64 bit + * On 64 bits, sign-extend the 48 bit address to 64 bit */ -static unsigned long sign_extend(unsigned long u) +static unsigned long normalize_addr(unsigned long u) { - if (u>>47) - u = u | (0xffffUL << 48); +#ifdef CONFIG_X86_64 + return (signed long)(u << 16) >> 16; +#else return u; +#endif } /* @@ -103,81 +138,62 @@ static unsigned long sign_extend(unsigned long u) * print what we collected so far. */ static void note_page(struct seq_file *m, struct pg_state *st, - pgprot_t new_prot, int level) + pgprot_t new_prot, int level) { - unsigned long prot, cur; + pgprotval_t prot, cur; + static const char units[] = "KMGTPE"; /* * If we have a "break" in the series, we need to flush the state that - * we have now. "break" is either changing perms or a different level. + * we have now. "break" is either changing perms, levels or + * address space marker. */ prot = pgprot_val(new_prot) & ~(PTE_MASK); cur = pgprot_val(st->current_prot) & ~(PTE_MASK); - if ((prot != cur || level != st->level) && - st->current_address != st->start_address) { - char unit = 'K'; + if (!st->level) { + /* First entry */ + st->current_prot = new_prot; + st->level = level; + st->marker = address_markers; + seq_printf(m, "---[ %s ]---\n", st->marker->name); + } else if (prot != cur || level != st->level || + st->current_address >= st->marker[1].start_address) { + const char *unit = units; unsigned long delta; - /* - * We print markers for special areas of address space, - * such as the start of vmalloc space etc. - * This helps in the interpretation. - */ - if (!st->printed_vmalloc && - st->start_address >= VMALLOC_START) { - seq_printf(m, "---[ VMALLOC SPACE ]---\n"); - st->printed_vmalloc = 1; - } - if (!st->printed_modules && - st->start_address >= MODULES_VADDR) { - seq_printf(m, "---[ MODULES SPACE ]---\n"); - st->printed_modules = 1; - } - if (st->printed_modules < 2 && - st->start_address >= MODULES_END) { - seq_printf(m, "---[ END MODULES SPACE ]---\n"); - st->printed_modules = 2; - } - if (!st->printed_vmemmap && - st->start_address >= VMEMMAP_START) { - seq_printf(m, "---[ VMMEMMAP SPACE ]---\n"); - st->printed_vmemmap = 1; - } - if (!st->printed_highmap && - st->start_address >= __START_KERNEL_map) { - seq_printf(m, "---[ HIGH KERNEL MAPPING ]---\n"); - st->printed_highmap = 1; - } - /* * Now print the actual finished series */ - seq_printf(m, "[ %016lx - %016lx ", - st->start_address, st->current_address); + seq_printf(m, "0x%p-0x%p ", + (void *)st->start_address, + (void *)st->current_address); delta = (st->current_address - st->start_address) >> 10; - if ((delta & 1023) == 0) { - delta = delta >> 10; - unit = 'M'; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; } - if (pgprot_val(st->current_prot)) { - seq_printf(m, "Size %9lu%cb ", delta, unit); - printk_prot(m, st->current_prot, st->level); - seq_printf(m, "L%i]\n", st->level); - } else { - /* don't print protections on non-present memory */ - seq_printf(m, "%14lu%cb", delta, unit); - seq_printf(m, " L%i]\n", - st->level); + seq_printf(m, "%9lu%c ", delta, *unit); + printk_prot(m, st->current_prot, st->level); + + /* + * We print markers for special areas of address space, + * such as the start of vmalloc space etc. + * This helps in the interpretation. + */ + if (st->current_address >= st->marker[1].start_address) { + st->marker++; + seq_printf(m, "---[ %s ]---\n", st->marker->name); } + st->start_address = st->current_address; st->current_prot = new_prot; st->level = level; - }; + } } -static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr, +static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P) { int i; @@ -187,14 +203,15 @@ static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr, for (i = 0; i < PTRS_PER_PTE; i++) { pgprot_t prot = pte_pgprot(*start); - st->current_address = sign_extend(P + i * LEVEL_4_MULT); + st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); note_page(m, st, prot, 4); start++; } } +#if PTRS_PER_PMD > 1 -static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr, +static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P) { int i; @@ -202,25 +219,30 @@ static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr, start = (pmd_t *) pud_page_vaddr(addr); for (i = 0; i < PTRS_PER_PMD; i++) { - st->current_address = sign_extend(P + i * LEVEL_3_MULT); + st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); if (!pmd_none(*start)) { - unsigned long prot; + pgprotval_t prot = pmd_val(*start) & ~PTE_MASK; - prot = pmd_val(*start) & ~(PTE_MASK); - /* Deal with 2Mb pages */ - if (pmd_large(*start)) + if (pmd_large(*start) || !pmd_present(*start)) note_page(m, st, __pgprot(prot), 3); else - walk_level_4(m, st, *start, - P + i * LEVEL_3_MULT); + walk_pte_level(m, st, *start, + P + i * PMD_LEVEL_MULT); } else note_page(m, st, __pgprot(0), 3); start++; } } +#else +#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) +#define pud_large(a) pmd_large(__pmd(pud_val(a))) +#define pud_none(a) pmd_none(__pmd(pud_val(a))) +#endif -static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr, +#if PTRS_PER_PUD > 1 + +static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) { int i; @@ -229,16 +251,15 @@ static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr, start = (pud_t *) pgd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PUD; i++) { + st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); if (!pud_none(*start)) { - unsigned long prot; + pgprotval_t prot = pud_val(*start) & ~PTE_MASK; - prot = pud_val(*start) & ~(PTE_MASK); - /* Deal with 1Gb pages */ - if (pud_large(*start)) + if (pud_large(*start) || !pud_present(*start)) note_page(m, st, __pgprot(prot), 2); else - walk_level_3(m, st, *start, - P + i * LEVEL_2_MULT); + walk_pmd_level(m, st, *start, + P + i * PUD_LEVEL_MULT); } else note_page(m, st, __pgprot(0), 2); @@ -246,28 +267,48 @@ static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr, } } -static void walk_level_1(struct seq_file *m) +#else +#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) +#define pgd_large(a) pud_large(__pud(pgd_val(a))) +#define pgd_none(a) pud_none(__pud(pgd_val(a))) +#endif + +static void walk_pgd_level(struct seq_file *m) { +#ifdef CONFIG_X86_64 pgd_t *start = (pgd_t *) &init_level4_pgt; +#else + pgd_t *start = swapper_pg_dir; +#endif int i; struct pg_state st; memset(&st, 0, sizeof(st)); - st.level = 1; for (i = 0; i < PTRS_PER_PGD; i++) { - if (!pgd_none(*start)) - walk_level_2(m, &st, *start, i * LEVEL_1_MULT); - else + st.current_address = normalize_addr(i * PGD_LEVEL_MULT); + if (!pgd_none(*start)) { + pgprotval_t prot = pgd_val(*start) & ~PTE_MASK; + + if (pgd_large(*start) || !pgd_present(*start)) + note_page(m, &st, __pgprot(prot), 1); + else + walk_pud_level(m, &st, *start, + i * PGD_LEVEL_MULT); + } else note_page(m, &st, __pgprot(0), 1); + start++; } + + /* Flush out the last page */ + st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); + note_page(m, &st, __pgprot(0), 0); } static int ptdump_show(struct seq_file *m, void *v) { - seq_puts(m, "Kernel pagetable dump\n"); - walk_level_1(m); + walk_pgd_level(m); return 0; } @@ -287,6 +328,18 @@ int pt_dump_init(void) { struct dentry *pe; +#ifdef CONFIG_X86_32 + /* Not a compile-time constant on x86-32 */ + address_markers[2].start_address = VMALLOC_START; + address_markers[3].start_address = VMALLOC_END; +# ifdef CONFIG_HIGHMEM + address_markers[4].start_address = PKMAP_BASE; + address_markers[5].start_address = FIXADDR_START; +# else + address_markers[4].start_address = FIXADDR_START; +# endif +#endif + pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, &ptdump_fops); if (!pe) -- 2.41.0