Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 032370b9 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Ingo Molnar
Browse files

x86/boot/64: Add support of additional page table level during early boot



This patch adds support for 5-level paging during early boot.
It generalizes boot for 4- and 5-level paging on 64-bit systems with
compile-time switch between them.

Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20170606113133.22974-10-kirill.shutemov@linux.intel.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 65ade2f8
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -14,6 +14,8 @@
#include <linux/bitops.h>
#include <linux/threads.h>

extern p4d_t level4_kernel_pgt[512];
extern p4d_t level4_ident_pgt[512];
extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
+2 −0
Original line number Diff line number Diff line
@@ -104,6 +104,8 @@
#define X86_CR4_OSFXSR		_BITUL(X86_CR4_OSFXSR_BIT)
#define X86_CR4_OSXMMEXCPT_BIT	10 /* enable unmasked SSE exceptions */
#define X86_CR4_OSXMMEXCPT	_BITUL(X86_CR4_OSXMMEXCPT_BIT)
#define X86_CR4_LA57_BIT	12 /* enable 5-level page tables */
#define X86_CR4_LA57		_BITUL(X86_CR4_LA57_BIT)
#define X86_CR4_VMXE_BIT	13 /* enable VMX virtualization */
#define X86_CR4_VMXE		_BITUL(X86_CR4_VMXE_BIT)
#define X86_CR4_SMXE_BIT	14 /* enable safer mode (TXT) */
+42 −6
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@ void __init __startup_64(unsigned long physaddr)
{
	unsigned long load_delta, *p;
	pgdval_t *pgd;
	p4dval_t *p4d;
	pudval_t *pud;
	pmdval_t *pmd, pmd_entry;
	int i;
@@ -70,6 +71,11 @@ void __init __startup_64(unsigned long physaddr)
	pgd = fixup_pointer(&early_top_pgt, physaddr);
	pgd[pgd_index(__START_KERNEL_map)] += load_delta;

	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
		p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
		p4d[511] += load_delta;
	}

	pud = fixup_pointer(&level3_kernel_pgt, physaddr);
	pud[510] += load_delta;
	pud[511] += load_delta;
@@ -87,9 +93,21 @@ void __init __startup_64(unsigned long physaddr)
	pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
	pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);

	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
		p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);

		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
		pgd[i + 0] = (pgdval_t)p4d + _KERNPG_TABLE;
		pgd[i + 1] = (pgdval_t)p4d + _KERNPG_TABLE;

		i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
		p4d[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
		p4d[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
	} else {
		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
		pgd[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
		pgd[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
	}

	i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
	pud[i + 0] = (pudval_t)pmd + _KERNPG_TABLE;
@@ -134,6 +152,7 @@ int __init early_make_pgtable(unsigned long address)
{
	unsigned long physaddr = address - __PAGE_OFFSET;
	pgdval_t pgd, *pgd_p;
	p4dval_t p4d, *p4d_p;
	pudval_t pud, *pud_p;
	pmdval_t pmd, *pmd_p;

@@ -150,8 +169,25 @@ int __init early_make_pgtable(unsigned long address)
	 * critical -- __PAGE_OFFSET would point us back into the dynamic
	 * range and we might end up looping forever...
	 */
	if (pgd)
		pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
	if (!IS_ENABLED(CONFIG_X86_5LEVEL))
		p4d_p = pgd_p;
	else if (pgd)
		p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
	else {
		if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
			reset_early_page_tables();
			goto again;
		}

		p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++];
		memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
		*pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
	}
	p4d_p += p4d_index(address);
	p4d = *p4d_p;

	if (p4d)
		pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
	else {
		if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
			reset_early_page_tables();
@@ -160,7 +196,7 @@ int __init early_make_pgtable(unsigned long address)

		pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
		memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
		*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
		*p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
	}
	pud_p += pud_index(address);
	pud = *pud_p;
+20 −6
Original line number Diff line number Diff line
@@ -37,10 +37,11 @@
 *
 */

#define p4d_index(x)	(((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))

L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)

	.text
@@ -100,11 +101,14 @@ ENTRY(secondary_startup_64)
	movq	$(init_top_pgt - __START_KERNEL_map), %rax
1:

	/* Enable PAE mode and PGE */
	/* Enable PAE mode, PGE and LA57 */
	movl	$(X86_CR4_PAE | X86_CR4_PGE), %ecx
#ifdef CONFIG_X86_5LEVEL
	orl	$X86_CR4_LA57, %ecx
#endif
	movq	%rcx, %cr4

	/* Setup early boot stage 4 level pagetables. */
	/* Setup early boot stage 4-/5-level pagetables. */
	addq	phys_base(%rip), %rax
	movq	%rax, %cr3

@@ -330,7 +334,11 @@ GLOBAL(name)
	__INITDATA
NEXT_PAGE(early_top_pgt)
	.fill	511,8,0
#ifdef CONFIG_X86_5LEVEL
	.quad	level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
#else
	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
#endif

NEXT_PAGE(early_dynamic_pgts)
	.fill	512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -343,9 +351,9 @@ NEXT_PAGE(init_top_pgt)
#else
NEXT_PAGE(init_top_pgt)
	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
	.org    init_top_pgt + L4_PAGE_OFFSET*8, 0
	.org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
	.org    init_top_pgt + L4_START_KERNEL*8, 0
	.org    init_top_pgt + PGD_START_KERNEL*8, 0
	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
	.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE

@@ -359,6 +367,12 @@ NEXT_PAGE(level2_ident_pgt)
	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#endif

#ifdef CONFIG_X86_5LEVEL
NEXT_PAGE(level4_kernel_pgt)
	.fill	511,8,0
	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
#endif

NEXT_PAGE(level3_kernel_pgt)
	.fill	L3_START_KERNEL,8,0
	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */