Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 551889a6 authored by Ian Campbell's avatar Ian Campbell Committed by Thomas Gleixner
Browse files

x86: construct 32-bit boot time page tables in native format.



Specifically the boot time page tables in a CONFIG_X86_PAE=y enabled
kernel are in PAE format.

early_ioremap is updated to use the standard page table accessors.

Clear any mappings beyond max_low_pfn from the boot page tables in
native_pagetable_setup_start because the initial mappings can extend
beyond the range of physical memory and into the vmalloc area.

Derived from patches by Eric Biederman and H. Peter Anvin.

[ jeremy@goop.org: PAE swapper_pg_dir needs to be page-sized fix ]

Signed-off-by: default avatarIan Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Mika Penttilä <mika.penttila@kolumbus.fi>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 185c045c
Loading
Loading
Loading
Loading
+116 −35
Original line number Diff line number Diff line
@@ -19,6 +19,10 @@
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/setup.h>
#include <asm/processor-flags.h>

/* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET)

/*
 * References to members of the new_cpu_data structure.
@@ -80,10 +84,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
 */
.section .text.head,"ax",@progbits
ENTRY(startup_32)
	/* check to see if KEEP_SEGMENTS flag is meaningful */
	cmpw $0x207, BP_version(%esi)
	jb 1f

	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
		us to not reload segments */
	testb $(1<<6), BP_loadflags(%esi)
@@ -92,7 +92,7 @@ ENTRY(startup_32)
/*
 * Set segments to known values.
 */
1:	lgdt boot_gdt_descr - __PAGE_OFFSET
	lgdt pa(boot_gdt_descr)
	movl $(__BOOT_DS),%eax
	movl %eax,%ds
	movl %eax,%es
@@ -105,8 +105,8 @@ ENTRY(startup_32)
 */
	cld
	xorl %eax,%eax
	movl $__bss_start - __PAGE_OFFSET,%edi
	movl $__bss_stop - __PAGE_OFFSET,%ecx
	movl $pa(__bss_start),%edi
	movl $pa(__bss_stop),%ecx
	subl %edi,%ecx
	shrl $2,%ecx
	rep ; stosl
@@ -118,31 +118,32 @@ ENTRY(startup_32)
 * (kexec on panic case). Hence copy out the parameters before initializing
 * page tables.
 */
	movl $(boot_params - __PAGE_OFFSET),%edi
	movl $pa(boot_params),%edi
	movl $(PARAM_SIZE/4),%ecx
	cld
	rep
	movsl
	movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
	movl pa(boot_params) + NEW_CL_POINTER,%esi
	andl %esi,%esi
	jz 1f			# No comand line
	movl $(boot_command_line - __PAGE_OFFSET),%edi
	movl $pa(boot_command_line),%edi
	movl $(COMMAND_LINE_SIZE/4),%ecx
	rep
	movsl
1:

#ifdef CONFIG_PARAVIRT
	cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
	/* This is can only trip for a broken bootloader... */
	cmpw $0x207, pa(boot_params + BP_version)
	jb default_entry

	/* Paravirt-compatible boot parameters.  Look to see what architecture
		we're booting under. */
	movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax
	movl pa(boot_params + BP_hardware_subarch), %eax
	cmpl $num_subarch_entries, %eax
	jae bad_subarch

	movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax
	movl pa(subarch_entries)(,%eax,4), %eax
	subl $__PAGE_OFFSET, %eax
	jmp *%eax

@@ -170,17 +171,68 @@ num_subarch_entries = (. - subarch_entries) / 4
 * Mappings are created both at virtual address 0 (identity mapping)
 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
 *
 * Warning: don't use %esi or the stack in this code.  However, %esp
 * can be used as a GPR if you really need it...
 * Note that the stack is not yet set up!
 */
page_pde_offset = (__PAGE_OFFSET >> 20);
#define PTE_ATTR	0x007		/* PRESENT+RW+USER */
#define PDE_ATTR	0x067		/* PRESENT+RW+USER+DIRTY+ACCESSED */
#define PGD_ATTR	0x001		/* PRESENT (no other attributes) */

default_entry:
	movl $(pg0 - __PAGE_OFFSET), %edi
	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
	movl $0x007, %eax			/* 0x007 = PRESENT+RW+USER */
#ifdef CONFIG_X86_PAE

	/*
	 * In PAE mode swapper_pg_dir is statically defined to contain enough
	 * entries to cover the VMSPLIT option (that is the top 1, 2 or 3
	 * entries). The identity mapping is handled by pointing two PGD
	 * entries to the first kernel PMD.
	 *
	 * Note the upper half of each PMD or PTE are always zero at
	 * this stage.
	 */

#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */

	xorl %ebx,%ebx				/* %ebx is kept at zero */

	movl $pa(pg0), %edi
	movl $pa(swapper_pg_pmd), %edx
	movl $PTE_ATTR, %eax
10:
	leal 0x007(%edi),%ecx			/* Create PDE entry */
	leal PDE_ATTR(%edi),%ecx		/* Create PMD entry */
	movl %ecx,(%edx)			/* Store PMD entry */
						/* Upper half already zero */
	addl $8,%edx
	movl $512,%ecx
11:
	stosl
	xchgl %eax,%ebx
	stosl
	xchgl %eax,%ebx
	addl $0x1000,%eax
	loop 11b

	/*
	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
	 * bytes beyond the end of our own page tables.
	 */
	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
	cmpl %ebp,%eax
	jb 10b
1:
	movl %edi,pa(init_pg_tables_end)

	/* Do early initialization of the fixmap area */
	movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
	movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
#else	/* Not PAE */

page_pde_offset = (__PAGE_OFFSET >> 20);

	movl $pa(pg0), %edi
	movl $pa(swapper_pg_dir), %edx
	movl $PTE_ATTR, %eax
10:
	leal PDE_ATTR(%edi),%ecx		/* Create PDE entry */
	movl %ecx,(%edx)			/* Store identity PDE entry */
	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
	addl $4,%edx
@@ -189,19 +241,20 @@ default_entry:
	stosl
	addl $0x1000,%eax
	loop 11b
	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
	/*
	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
	 * bytes beyond the end of our own page tables; the +0x007 is
	 * the attribute bits
	 */
	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
	cmpl %ebp,%eax
	jb 10b
	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)

	/* Do an early initialization of the fixmap area */
	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
	movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
	addl $0x67, %eax			/* 0x67 == _PAGE_TABLE */
	movl %eax, 4092(%edx)
	movl %edi,pa(init_pg_tables_end)

	/* Do early initialization of the fixmap area */
	movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
	movl %eax,pa(swapper_pg_dir+0xffc)
#endif
	jmp 3f
/*
 * Non-boot CPU entry point; entered from trampoline.S
@@ -241,7 +294,7 @@ ENTRY(startup_32_smp)
 *	NOTE! We have to correct for the fact that we're
 *	not yet offset PAGE_OFFSET..
 */
#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
#define cr4_bits pa(mmu_cr4_features)
	movl cr4_bits,%edx
	andl %edx,%edx
	jz 6f
@@ -276,10 +329,10 @@ ENTRY(startup_32_smp)
/*
 * Enable paging
 */
	movl $swapper_pg_dir-__PAGE_OFFSET,%eax
	movl $pa(swapper_pg_dir),%eax
	movl %eax,%cr3		/* set the page table pointer.. */
	movl %cr0,%eax
	orl $0x80000000,%eax
	orl  $X86_CR0_PG,%eax
	movl %eax,%cr0		/* ..and set paging (PG) bit */
	ljmp $__BOOT_CS,$1f	/* Clear prefetch and normalize %eip */
1:
@@ -552,16 +605,44 @@ ENTRY(_stext)
 */
.section ".bss.page_aligned","wa"
	.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
ENTRY(swapper_pg_pmd)
	.fill 1024*KPMDS,4,0
#else
ENTRY(swapper_pg_dir)
	.fill 1024,4,0
ENTRY(swapper_pg_pmd)
#endif
ENTRY(swapper_pg_fixmap)
	.fill 1024,4,0
ENTRY(empty_zero_page)
	.fill 4096,1,0

/*
 * This starts the data section.
 */
#ifdef CONFIG_X86_PAE
.section ".data.page_aligned","wa"
	/* Page-aligned for the benefit of paravirt? */
	.align PAGE_SIZE_asm
ENTRY(swapper_pg_dir)
	.long	pa(swapper_pg_pmd+PGD_ATTR),0		/* low identity map */
# if KPMDS == 3
	.long	pa(swapper_pg_pmd+PGD_ATTR),0
	.long	pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
	.long	pa(swapper_pg_pmd+PGD_ATTR+0x2000),0
# elif KPMDS == 2
	.long	0,0
	.long	pa(swapper_pg_pmd+PGD_ATTR),0
	.long	pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
# elif KPMDS == 1
	.long	0,0
	.long	0,0
	.long	pa(swapper_pg_pmd+PGD_ATTR),0
# else
#  error "Kernel PMDs should be 1, 2 or 3"
# endif
	.align PAGE_SIZE_asm		/* needs to be page-sized too */
#endif

.data
ENTRY(stack_start)
	.long init_thread_union+THREAD_SIZE
+4 −0
Original line number Diff line number Diff line
@@ -154,7 +154,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
EXPORT_SYMBOL(boot_cpu_data);

#ifndef CONFIG_X86_PAE
unsigned long mmu_cr4_features;
#else
unsigned long mmu_cr4_features = X86_CR4_PAE;
#endif

/* for MCA, but anyone else can use it if they want */
unsigned int machine_id;
+27 −45
Original line number Diff line number Diff line
@@ -46,6 +46,7 @@
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/paravirt.h>
#include <asm/setup.h>

unsigned int __VMALLOC_RESERVE = 128 << 20;

@@ -328,44 +329,38 @@ pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;

void __init native_pagetable_setup_start(pgd_t *base)
{
#ifdef CONFIG_X86_PAE
	int i;
	unsigned long pfn, va;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;

	/*
	 * Init entries of the first-level page table to the
	 * zero page, if they haven't already been set up.
	 *
	 * In a normal native boot, we'll be running on a
	 * pagetable rooted in swapper_pg_dir, but not in PAE
	 * mode, so this will end up clobbering the mappings
	 * for the lower 24Mbytes of the address space,
	 * without affecting the kernel address space.
	 * Remove any mappings which extend past the end of physical
	 * memory from the boot time page table:
	 */
	for (i = 0; i < USER_PTRS_PER_PGD; i++)
		set_pgd(&base[i],
			__pgd(__pa(empty_zero_page) | _PAGE_PRESENT));

	/* Make sure kernel address space is empty so that a pagetable
	   will be allocated for it. */
	memset(&base[USER_PTRS_PER_PGD], 0,
	       KERNEL_PGD_PTRS * sizeof(pgd_t));
#else
	for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
		va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
		pgd = base + pgd_index(va);
		if (!pgd_present(*pgd))
			break;

		pud = pud_offset(pgd, va);
		pmd = pmd_offset(pud, va);
		if (!pmd_present(*pmd))
			break;

		pte = pte_offset_kernel(pmd, va);
		if (!pte_present(*pte))
			break;

		pte_clear(NULL, va, pte);
	}
	paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
#endif
}

void __init native_pagetable_setup_done(pgd_t *base)
{
#ifdef CONFIG_X86_PAE
	/*
	 * Add low memory identity-mappings - SMP needs it when
	 * starting up on an AP from real-mode. In the non-PAE
	 * case we already have these mappings through head.S.
	 * All user-space mappings are explicitly cleared after
	 * SMP startup.
	 */
	set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
#endif
}

/*
@@ -374,9 +369,8 @@ void __init native_pagetable_setup_done(pgd_t *base)
 * the boot process.
 *
 * If we're booting on native hardware, this will be a pagetable
 * constructed in arch/i386/kernel/head.S, and not running in PAE mode
 * (even if we'll end up running in PAE).  The root of the pagetable
 * will be swapper_pg_dir.
 * constructed in arch/x86/kernel/head_32.S.  The root of the
 * pagetable will be swapper_pg_dir.
 *
 * If we're booting paravirtualized under a hypervisor, then there are
 * more options: we may already be running PAE, and the pagetable may
@@ -537,14 +531,6 @@ void __init paging_init(void)

	load_cr3(swapper_pg_dir);

#ifdef CONFIG_X86_PAE
	/*
	 * We will bail out later - printk doesn't work right now so
	 * the user would just see a hanging kernel.
	 */
	if (cpu_has_pae)
		set_in_cr4(X86_CR4_PAE);
#endif
	__flush_tlb_all();

	kmap_init();
@@ -675,10 +661,6 @@ void __init mem_init(void)
	BUG_ON((unsigned long)high_memory		> VMALLOC_START);
#endif /* double-sanity-check paranoia */

#ifdef CONFIG_X86_PAE
	if (!cpu_has_pae)
		panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
#endif
	if (boot_cpu_data.wp_works_ok < 0)
		test_wp_bit();

+31 −24
Original line number Diff line number Diff line
@@ -260,37 +260,42 @@ static int __init early_ioremap_debug_setup(char *str)
early_param("early_ioremap_debug", early_ioremap_debug_setup);

static __initdata int after_paging_init;
static __initdata unsigned long bm_pte[1024]
static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
				__attribute__((aligned(PAGE_SIZE)));

static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
	return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
	pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
	pud_t *pud = pud_offset(pgd, addr);
	pmd_t *pmd = pmd_offset(pud, addr);

	return pmd;
}

static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
static inline pte_t * __init early_ioremap_pte(unsigned long addr)
{
	return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
	return &bm_pte[pte_index(addr)];
}

void __init early_ioremap_init(void)
{
	unsigned long *pgd;
	pmd_t *pmd;

	if (early_ioremap_debug)
		printk(KERN_INFO "early_ioremap_init()\n");

	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
	*pgd = __pa(bm_pte) | _PAGE_TABLE;
	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
	memset(bm_pte, 0, sizeof(bm_pte));
	set_pmd(pmd, __pmd(__pa(bm_pte) | _PAGE_TABLE));

	/*
	 * The boot-ioremap range spans multiple pgds, for which
	 * The boot-ioremap range spans multiple pmds, for which
	 * we are not prepared:
	 */
	if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
		WARN_ON(1);
		printk(KERN_WARNING "pgd %p != %p\n",
		       pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
		printk(KERN_WARNING "pmd %p != %p\n",
		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
			fix_to_virt(FIX_BTMAP_BEGIN));
		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
@@ -304,28 +309,29 @@ void __init early_ioremap_init(void)

void __init early_ioremap_clear(void)
{
	unsigned long *pgd;
	pmd_t *pmd;

	if (early_ioremap_debug)
		printk(KERN_INFO "early_ioremap_clear()\n");

	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
	*pgd = 0;
	paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT);
	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
	pmd_clear(pmd);
	paravirt_release_pt(__pa(pmd) >> PAGE_SHIFT);
	__flush_tlb_all();
}

void __init early_ioremap_reset(void)
{
	enum fixed_addresses idx;
	unsigned long *pte, phys, addr;
	unsigned long addr, phys;
	pte_t *pte;

	after_paging_init = 1;
	for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
		addr = fix_to_virt(idx);
		pte = early_ioremap_pte(addr);
		if (*pte & _PAGE_PRESENT) {
			phys = *pte & PAGE_MASK;
		if (pte_present(*pte)) {
			phys = pte_val(*pte) & PAGE_MASK;
			set_fixmap(idx, phys);
		}
	}
@@ -334,7 +340,8 @@ void __init early_ioremap_reset(void)
static void __init __early_set_fixmap(enum fixed_addresses idx,
				   unsigned long phys, pgprot_t flags)
{
	unsigned long *pte, addr = __fix_to_virt(idx);
	unsigned long addr = __fix_to_virt(idx);
	pte_t *pte;

	if (idx >= __end_of_fixed_addresses) {
		BUG();
@@ -342,9 +349,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
	}
	pte = early_ioremap_pte(addr);
	if (pgprot_val(flags))
		*pte = (phys & PAGE_MASK) | pgprot_val(flags);
		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
	else
		*pte = 0;
		pte_clear(NULL, addr, pte);
	__flush_tlb_one(addr);
}

+0 −1
Original line number Diff line number Diff line
@@ -48,7 +48,6 @@ typedef unsigned long pgprotval_t;
typedef unsigned long	phys_addr_t;

typedef union { pteval_t pte, pte_low; } pte_t;
typedef pte_t boot_pte_t;

#endif	/* __ASSEMBLY__ */
#endif	/* CONFIG_X86_PAE */
Loading