Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fe770bf0 authored by H. Peter Anvin's avatar H. Peter Anvin Committed by Ingo Molnar
Browse files

x86: clean up the page table dumper and add 32-bit support



Clean up the page table dumper (fix boundary conditions, table driven
address ranges, some formatting changes since it is no longer using
the kernel log but a separate virtual file), and generalize to 32
bits.

[ mingo@elte.hu: x86: fix the pagetable dumper ]

Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 926e5392
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -56,7 +56,7 @@ config DEBUG_PER_CPU_MAPS


config X86_PTDUMP
config X86_PTDUMP
	bool "Export kernel pagetable layout to userspace via debugfs"
	bool "Export kernel pagetable layout to userspace via debugfs"
	depends on X86_64
	depends on DEBUG_KERNEL
	select DEBUG_FS
	select DEBUG_FS
	help
	help
	  Say Y here if you want to show the kernel pagetable layout in a
	  Say Y here if you want to show the kernel pagetable layout in a
+1 −1
Original line number Original line Diff line number Diff line
@@ -3,6 +3,7 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o
obj-$(CONFIG_X86_32)		+= pgtable_32.o
obj-$(CONFIG_X86_32)		+= pgtable_32.o


obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetables.o


obj-$(CONFIG_HIGHMEM)		+= highmem_32.o
obj-$(CONFIG_HIGHMEM)		+= highmem_32.o


@@ -12,5 +13,4 @@ else
obj-$(CONFIG_NUMA)		+= numa_64.o
obj-$(CONFIG_NUMA)		+= numa_64.o
obj-$(CONFIG_K8_NUMA)		+= k8topology_64.o
obj-$(CONFIG_K8_NUMA)		+= k8topology_64.o
obj-$(CONFIG_ACPI_NUMA)		+= srat_64.o
obj-$(CONFIG_ACPI_NUMA)		+= srat_64.o
obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetables.o
endif
endif
+177 −124
Original line number Original line Diff line number Diff line
@@ -12,9 +12,10 @@
 * of the License.
 * of the License.
 */
 */


#include <linux/debugfs.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>


#include <asm/pgtable.h>
#include <asm/pgtable.h>


@@ -28,26 +29,56 @@ struct pg_state {
	pgprot_t current_prot;
	pgprot_t current_prot;
	unsigned long start_address;
	unsigned long start_address;
	unsigned long current_address;
	unsigned long current_address;
	int printed_vmalloc;
	const struct addr_marker *marker;
	int printed_modules;
	int printed_vmemmap;
	int printed_highmap;
};
};


/* Multipliers for offsets within the PTEs */
struct addr_marker {
#define LEVEL_4_MULT (PAGE_SIZE)
	unsigned long start_address;
#define LEVEL_3_MULT (512UL * LEVEL_4_MULT)
	const char *name;
#define LEVEL_2_MULT (512UL * LEVEL_3_MULT)
};
#define LEVEL_1_MULT (512UL * LEVEL_2_MULT)


/* Address space markers hints */
static struct addr_marker address_markers[] = {
	{ 0, "User Space" },
#ifdef CONFIG_X86_64
	{ 0x8000000000000000UL, "Kernel Space" },
	{ 0xffff810000000000UL, "Low Kernel Mapping" },
	{ VMALLOC_START,        "vmalloc() Area" },
	{ MODULES_VADDR,        "Modules" },
	{ MODULES_END,          "End Modules" },
	{ VMEMMAP_START,        "Vmemmap" },
	{ __START_KERNEL_map,   "High Kernel Mapping" },
#else
	{ PAGE_OFFSET,          "Kernel Mapping" },
	{ 0/* VMALLOC_START */, "vmalloc() Area" },
	{ 0/*VMALLOC_END*/,     "vmalloc() End" },
# ifdef CONFIG_HIGHMEM
	{ 0/*PKMAP_BASE*/,      "Persisent kmap() Area" },
# endif
	{ 0/*FIXADDR_START*/,   "Fixmap Area" },
#endif
	{ -1, NULL }		/* End of list */
};

/* Multipliers for offsets within the PTEs */
#define PTE_LEVEL_MULT (PAGE_SIZE)
#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)


/*
/*
 * Print a readable form of a pgprot_t to the seq_file
 * Print a readable form of a pgprot_t to the seq_file
 */
 */
static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
{
{
	unsigned long pr = pgprot_val(prot);
	pgprotval_t pr = pgprot_val(prot);
	static const char * const level_name[] =
		{ "cr3", "pgd", "pud", "pmd", "pte" };


	if (!pgprot_val(prot)) {
		/* Not present */
		seq_printf(m, "                          ");
	} else {
		if (pr & _PAGE_USER)
		if (pr & _PAGE_USER)
			seq_printf(m, "USR ");
			seq_printf(m, "USR ");
		else
		else
@@ -86,15 +117,19 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
		else
		else
			seq_printf(m, "x  ");
			seq_printf(m, "x  ");
	}
	}
	seq_printf(m, "%s\n", level_name[level]);
}


/*
/*
 * Sign-extend the 48 bit address to 64 bit
 * On 64 bits, sign-extend the 48 bit address to 64 bit
 */
 */
static unsigned long sign_extend(unsigned long u)
static unsigned long normalize_addr(unsigned long u)
{
{
	if (u>>47)
#ifdef CONFIG_X86_64
		u = u | (0xffffUL << 48);
	return (signed long)(u << 16) >> 16;
#else
	return u;
	return u;
#endif
}
}


/*
/*
@@ -105,79 +140,60 @@ static unsigned long sign_extend(unsigned long u)
static void note_page(struct seq_file *m, struct pg_state *st,
static void note_page(struct seq_file *m, struct pg_state *st,
		      pgprot_t new_prot, int level)
		      pgprot_t new_prot, int level)
{
{
	unsigned long prot, cur;
	pgprotval_t prot, cur;
	static const char units[] = "KMGTPE";


	/*
	/*
	 * If we have a "break" in the series, we need to flush the state that
	 * If we have a "break" in the series, we need to flush the state that
	 * we have now. "break" is either changing perms or a different level.
	 * we have now. "break" is either changing perms, levels or
	 * address space marker.
	 */
	 */
	prot = pgprot_val(new_prot) & ~(PTE_MASK);
	prot = pgprot_val(new_prot) & ~(PTE_MASK);
	cur = pgprot_val(st->current_prot) & ~(PTE_MASK);
	cur = pgprot_val(st->current_prot) & ~(PTE_MASK);


	if ((prot != cur || level != st->level) &&
	if (!st->level) {
				st->current_address != st->start_address) {
		/* First entry */
		char unit = 'K';
		st->current_prot = new_prot;
		st->level = level;
		st->marker = address_markers;
		seq_printf(m, "---[ %s ]---\n", st->marker->name);
	} else if (prot != cur || level != st->level ||
		   st->current_address >= st->marker[1].start_address) {
		const char *unit = units;
		unsigned long delta;
		unsigned long delta;


		/*
		 * We print markers for special areas of address space,
		 * such as the start of vmalloc space etc.
		 * This helps in the interpretation.
		 */
		if (!st->printed_vmalloc &&
				st->start_address >= VMALLOC_START) {
			seq_printf(m, "---[ VMALLOC SPACE ]---\n");
			st->printed_vmalloc = 1;
		}
		if (!st->printed_modules &&
				st->start_address >= MODULES_VADDR) {
			seq_printf(m, "---[ MODULES SPACE ]---\n");
			st->printed_modules = 1;
		}
		if (st->printed_modules < 2 &&
				st->start_address >= MODULES_END) {
			seq_printf(m, "---[ END MODULES SPACE ]---\n");
			st->printed_modules = 2;
		}
		if (!st->printed_vmemmap &&
				st->start_address >= VMEMMAP_START) {
			seq_printf(m, "---[ VMMEMMAP SPACE ]---\n");
			st->printed_vmemmap = 1;
		}
		if (!st->printed_highmap &&
				st->start_address >= __START_KERNEL_map) {
			seq_printf(m, "---[ HIGH KERNEL MAPPING ]---\n");
			st->printed_highmap = 1;
		}

		/*
		/*
		 * Now print the actual finished series
		 * Now print the actual finished series
		 */
		 */
		seq_printf(m, "[ %016lx -  %016lx   ",
		seq_printf(m, "0x%p-0x%p   ",
				st->start_address, st->current_address);
			   (void *)st->start_address,
			   (void *)st->current_address);


		delta = (st->current_address - st->start_address) >> 10;
		delta = (st->current_address - st->start_address) >> 10;
		if ((delta & 1023) == 0) {
		while (!(delta & 1023) && unit[1]) {
			delta = delta >> 10;
			delta >>= 10;
			unit = 'M';
			unit++;
		}
		}
		if (pgprot_val(st->current_prot)) {
		seq_printf(m, "%9lu%c ", delta, *unit);
			seq_printf(m, "Size %9lu%cb ", delta, unit);
		printk_prot(m, st->current_prot, st->level);
		printk_prot(m, st->current_prot, st->level);
			seq_printf(m, "L%i]\n", st->level);

		} else {
		/*
			/* don't print protections on non-present memory */
		 * We print markers for special areas of address space,
			seq_printf(m, "%14lu%cb", delta, unit);
		 * such as the start of vmalloc space etc.
			seq_printf(m, "                           L%i]\n",
		 * This helps in the interpretation.
					st->level);
		 */
		if (st->current_address >= st->marker[1].start_address) {
			st->marker++;
			seq_printf(m, "---[ %s ]---\n", st->marker->name);
		}
		}

		st->start_address = st->current_address;
		st->start_address = st->current_address;
		st->current_prot = new_prot;
		st->current_prot = new_prot;
		st->level = level;
		st->level = level;
	};
	}
}
}


static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr,
static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
							unsigned long P)
							unsigned long P)
{
{
	int i;
	int i;
@@ -187,14 +203,15 @@ static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr,
	for (i = 0; i < PTRS_PER_PTE; i++) {
	for (i = 0; i < PTRS_PER_PTE; i++) {
		pgprot_t prot = pte_pgprot(*start);
		pgprot_t prot = pte_pgprot(*start);


		st->current_address = sign_extend(P + i * LEVEL_4_MULT);
		st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
		note_page(m, st, prot, 4);
		note_page(m, st, prot, 4);
		start++;
		start++;
	}
	}
}
}


#if PTRS_PER_PMD > 1


static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr,
static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
							unsigned long P)
							unsigned long P)
{
{
	int i;
	int i;
@@ -202,25 +219,30 @@ static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr,


	start = (pmd_t *) pud_page_vaddr(addr);
	start = (pmd_t *) pud_page_vaddr(addr);
	for (i = 0; i < PTRS_PER_PMD; i++) {
	for (i = 0; i < PTRS_PER_PMD; i++) {
		st->current_address = sign_extend(P + i * LEVEL_3_MULT);
		st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
		if (!pmd_none(*start)) {
		if (!pmd_none(*start)) {
			unsigned long prot;
			pgprotval_t prot = pmd_val(*start) & ~PTE_MASK;


			prot = pmd_val(*start) & ~(PTE_MASK);
			if (pmd_large(*start) || !pmd_present(*start))
			/* Deal with 2Mb pages */
			if (pmd_large(*start))
				note_page(m, st, __pgprot(prot), 3);
				note_page(m, st, __pgprot(prot), 3);
			else
			else
				walk_level_4(m, st, *start,
				walk_pte_level(m, st, *start,
							P + i * LEVEL_3_MULT);
					       P + i * PMD_LEVEL_MULT);
		} else
		} else
			note_page(m, st, __pgprot(0), 3);
			note_page(m, st, __pgprot(0), 3);
		start++;
		start++;
	}
	}
}
}


#else
#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
#define pud_large(a) pmd_large(__pmd(pud_val(a)))
#define pud_none(a)  pmd_none(__pmd(pud_val(a)))
#endif


static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
#if PTRS_PER_PUD > 1

static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
							unsigned long P)
							unsigned long P)
{
{
	int i;
	int i;
@@ -229,16 +251,15 @@ static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
	start = (pud_t *) pgd_page_vaddr(addr);
	start = (pud_t *) pgd_page_vaddr(addr);


	for (i = 0; i < PTRS_PER_PUD; i++) {
	for (i = 0; i < PTRS_PER_PUD; i++) {
		st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
		if (!pud_none(*start)) {
		if (!pud_none(*start)) {
			unsigned long prot;
			pgprotval_t prot = pud_val(*start) & ~PTE_MASK;


			prot = pud_val(*start) & ~(PTE_MASK);
			if (pud_large(*start) || !pud_present(*start))
			/* Deal with 1Gb pages */
			if (pud_large(*start))
				note_page(m, st, __pgprot(prot), 2);
				note_page(m, st, __pgprot(prot), 2);
			else
			else
				walk_level_3(m, st, *start,
				walk_pmd_level(m, st, *start,
					P + i * LEVEL_2_MULT);
					       P + i * PUD_LEVEL_MULT);
		} else
		} else
			note_page(m, st, __pgprot(0), 2);
			note_page(m, st, __pgprot(0), 2);


@@ -246,28 +267,48 @@ static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
	}
	}
}
}


static void walk_level_1(struct seq_file *m)
#else
#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
#define pgd_large(a) pud_large(__pud(pgd_val(a)))
#define pgd_none(a)  pud_none(__pud(pgd_val(a)))
#endif

static void walk_pgd_level(struct seq_file *m)
{
{
#ifdef CONFIG_X86_64
	pgd_t *start = (pgd_t *) &init_level4_pgt;
	pgd_t *start = (pgd_t *) &init_level4_pgt;
#else
	pgd_t *start = swapper_pg_dir;
#endif
	int i;
	int i;
	struct pg_state st;
	struct pg_state st;


	memset(&st, 0, sizeof(st));
	memset(&st, 0, sizeof(st));
	st.level = 1;


	for (i = 0; i < PTRS_PER_PGD; i++) {
	for (i = 0; i < PTRS_PER_PGD; i++) {
		if (!pgd_none(*start))
		st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
			walk_level_2(m, &st, *start, i * LEVEL_1_MULT);
		if (!pgd_none(*start)) {
			pgprotval_t prot = pgd_val(*start) & ~PTE_MASK;

			if (pgd_large(*start) || !pgd_present(*start))
				note_page(m, &st, __pgprot(prot), 1);
			else
			else
				walk_pud_level(m, &st, *start,
					       i * PGD_LEVEL_MULT);
		} else
			note_page(m, &st, __pgprot(0), 1);
			note_page(m, &st, __pgprot(0), 1);

		start++;
		start++;
	}
	}

	/* Flush out the last page */
	st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
	note_page(m, &st, __pgprot(0), 0);
}
}


static int ptdump_show(struct seq_file *m, void *v)
static int ptdump_show(struct seq_file *m, void *v)
{
{
	seq_puts(m, "Kernel pagetable dump\n");
	walk_pgd_level(m);
	walk_level_1(m);
	return 0;
	return 0;
}
}


@@ -287,6 +328,18 @@ int pt_dump_init(void)
{
{
	struct dentry *pe;
	struct dentry *pe;


#ifdef CONFIG_X86_32
	/* Not a compile-time constant on x86-32 */
	address_markers[2].start_address = VMALLOC_START;
	address_markers[3].start_address = VMALLOC_END;
# ifdef CONFIG_HIGHMEM
	address_markers[4].start_address = PKMAP_BASE;
	address_markers[5].start_address = FIXADDR_START;
# else
	address_markers[4].start_address = FIXADDR_START;
# endif
#endif

	pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
	pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
				 &ptdump_fops);
				 &ptdump_fops);
	if (!pe)
	if (!pe)