Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d22fff81 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:

 - Extend the memmap= boot parameter syntax to allow the redeclaration
   and dropping of existing ranges, and to support all e820 range types
   (Jan H. Schönherr)

 - Improve the W+X boot time security checks to remove false positive
   warnings on Xen (Jan Beulich)

 - Support booting as Xen PVH guest (Juergen Gross)

 - Improved 5-level paging (LA57) support, in particular it's possible
   now to have a single kernel image for both 4-level and 5-level
   hardware (Kirill A. Shutemov)

 - AMD hardware RAM encryption support (SME/SEV) fixes (Tom Lendacky)

 - Preparatory commits for hardware-encrypted RAM support on Intel CPUs.
   (Kirill A. Shutemov)

 - Improved Intel-MID support (Andy Shevchenko)

 - Show EFI page tables in page_tables debug files (Andy Lutomirski)

 - ... plus misc fixes and smaller cleanups

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (56 commits)
  x86/cpu/tme: Fix spelling: "configuation" -> "configuration"
  x86/boot: Fix SEV boot failure from change to __PHYSICAL_MASK_SHIFT
  x86/mm: Update comment in detect_tme() regarding x86_phys_bits
  x86/mm/32: Remove unused node_memmap_size_bytes() & CONFIG_NEED_NODE_MEMMAP_SIZE logic
  x86/mm: Remove pointless checks in vmalloc_fault
  x86/platform/intel-mid: Add special handling for ACPI HW reduced platforms
  ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback
  ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export
  x86/pconfig: Provide defines and helper to run MKTME_KEY_PROG leaf
  x86/pconfig: Detect PCONFIG targets
  x86/tme: Detect if TME and MKTME is activated by BIOS
  x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G
  x86/boot/compressed/64: Use page table in trampoline memory
  x86/boot/compressed/64: Use stack from trampoline memory
  x86/boot/compressed/64: Make sure we have a 32-bit code segment
  x86/mm: Do not use paravirtualized calls in native_set_p4d()
  kdump, vmcoreinfo: Export pgtable_l5_enabled value
  x86/boot/compressed/64: Prepare new top-level page table for trampoline
  x86/boot/compressed/64: Set up trampoline memory
  x86/boot/compressed/64: Save and restore trampoline memory
  ...
parents 986b37c0 eaeb8e76
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -2248,6 +2248,15 @@
			The memory region may be marked as e820 type 12 (0xc)
			and is NVDIMM or ADR memory.

	memmap=<size>%<offset>-<oldtype>+<newtype>
			[KNL,ACPI] Convert memory within the specified region
			from <oldtype> to <newtype>. If "-<oldtype>" is left
			out, the whole region will be marked as <newtype>,
			even if previously unavailable. If "+<newtype>" is left
			out, matching memory will be removed. Types are
			specified as e820 types, e.g., 1 = RAM, 2 = reserved,
			3 = ACPI, 12 = PRAM.

	memory_corruption_check=0/1 [X86]
			Some BIOSes seem to corrupt the first 64k of
			memory when doing things like suspend/resume.
+3 −6
Original line number Diff line number Diff line
@@ -20,12 +20,9 @@ Documentation/x86/x86_64/mm.txt

CONFIG_X86_5LEVEL=y enables the feature.

So far, a kernel compiled with the option enabled will be able to boot
only on machines that supports the feature -- see for 'la57' flag in
/proc/cpuinfo.

The plan is to implement boot-time switching between 4- and 5-level paging
in the future.
Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware.
In this case additional page table level -- p4d -- will be folded at
runtime.

== User-space and large virtual address space ==

+11 −6
Original line number Diff line number Diff line
@@ -1461,6 +1461,8 @@ config X86_PAE

config X86_5LEVEL
	bool "Enable 5-level page tables support"
	select DYNAMIC_MEMORY_LAYOUT
	select SPARSEMEM_VMEMMAP
	depends on X86_64
	---help---
	  5-level paging enables access to larger address space:
@@ -1469,8 +1471,8 @@ config X86_5LEVEL

	  It will be supported by future Intel CPUs.

	  Note: a kernel with this option enabled can only be booted
	  on machines that support the feature.
	  A kernel with the option enabled can be booted on machines that
	  support 4- or 5-level paging.

	  See Documentation/x86/x86_64/5level-paging.txt for more
	  information.
@@ -1595,10 +1597,6 @@ config ARCH_HAVE_MEMORY_PRESENT
	def_bool y
	depends on X86_32 && DISCONTIGMEM

config NEED_NODE_MEMMAP_SIZE
	def_bool y
	depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)

config ARCH_FLATMEM_ENABLE
	def_bool y
	depends on X86_32 && !NUMA
@@ -2174,10 +2172,17 @@ config PHYSICAL_ALIGN

	  Don't change this unless you know what you are doing.

config DYNAMIC_MEMORY_LAYOUT
	bool
	---help---
	  This option makes base addresses of vmalloc and vmemmap as well as
	  __PAGE_OFFSET movable during boot.

config RANDOMIZE_MEMORY
	bool "Randomize the kernel memory sections"
	depends on X86_64
	depends on RANDOMIZE_BASE
	select DYNAMIC_MEMORY_LAYOUT
	default RANDOMIZE_BASE
	---help---
	   Randomizes the base virtual address of kernel memory sections
+1 −1
Original line number Diff line number Diff line
@@ -78,7 +78,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
	vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
	vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o
	vmlinux-objs-y += $(obj)/mem_encrypt.o
	vmlinux-objs-y += $(obj)/pgtable_64.o
endif
+114 −54
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
#include "pgtable.h"

/*
 * Locally defined symbols should be marked hidden:
@@ -304,55 +305,77 @@ ENTRY(startup_64)
	/* Set up the stack */
	leaq	boot_stack_end(%rbx), %rsp

#ifdef CONFIG_X86_5LEVEL
	/*
	 * Check if we need to enable 5-level paging.
	 * RSI holds real mode data and need to be preserved across
	 * a function call.
	 * At this point we are in long mode with 4-level paging enabled,
	 * but we might want to enable 5-level paging or vice versa.
	 *
	 * The problem is that we cannot do it directly. Setting or clearing
	 * CR4.LA57 in long mode would trigger #GP. So we need to switch off
	 * long mode and paging first.
	 *
	 * We also need a trampoline in lower memory to switch over from
	 * 4- to 5-level paging for cases when the bootloader puts the kernel
	 * above 4G, but didn't enable 5-level paging for us.
	 *
	 * The same trampoline can be used to switch from 5- to 4-level paging
	 * mode, like when starting 4-level paging kernel via kexec() when
	 * original kernel worked in 5-level paging mode.
	 *
	 * For the trampoline, we need the top page table to reside in lower
	 * memory as we don't have a way to load 64-bit values into CR3 in
	 * 32-bit mode.
	 *
	 * We go though the trampoline even if we don't have to: if we're
	 * already in a desired paging mode. This way the trampoline code gets
	 * tested on every boot.
	 */
	pushq	%rsi
	call	l5_paging_required
	popq	%rsi

	/* If l5_paging_required() returned zero, we're done here. */
	cmpq	$0, %rax
	je	lvl5
	/* Make sure we have GDT with 32-bit code segment */
	leaq	gdt(%rip), %rax
	movq	%rax, gdt64+2(%rip)
	lgdt	gdt64(%rip)

	/*
	 * At this point we are in long mode with 4-level paging enabled,
	 * but we want to enable 5-level paging.
	 *
	 * The problem is that we cannot do it directly. Setting LA57 in
	 * long mode would trigger #GP. So we need to switch off long mode
	 * first.
	 * paging_prepare() sets up the trampoline and checks if we need to
	 * enable 5-level paging.
	 *
	 * NOTE: This is not going to work if bootloader put us above 4G
	 * limit.
	 * Address of the trampoline is returned in RAX.
	 * Non zero RDX on return means we need to enable 5-level paging.
	 *
	 * The first step is go into compatibility mode.
	 * RSI holds real mode data and needs to be preserved across
	 * this function call.
	 */
	pushq	%rsi
	call	paging_prepare
	popq	%rsi

	/* Clear additional page table */
	leaq	lvl5_pgtable(%rbx), %rdi
	xorq	%rax, %rax
	movq	$(PAGE_SIZE/8), %rcx
	rep	stosq
	/* Save the trampoline address in RCX */
	movq	%rax, %rcx

	/*
	 * Setup current CR3 as the first and only entry in a new top level
	 * page table.
	 * Load the address of trampoline_return() into RDI.
	 * It will be used by the trampoline to return to the main code.
	 */
	movq	%cr3, %rdi
	leaq	0x7 (%rdi), %rax
	movq	%rax, lvl5_pgtable(%rbx)
	leaq	trampoline_return(%rip), %rdi

	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
	pushq	$__KERNEL32_CS
	leaq	compatible_mode(%rip), %rax
	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
	pushq	%rax
	lretq
lvl5:
#endif
trampoline_return:
	/* Restore the stack, the 32-bit trampoline uses its own stack */
	leaq	boot_stack_end(%rbx), %rsp

	/*
	 * cleanup_trampoline() would restore trampoline memory.
	 *
	 * RSI holds real mode data and needs to be preserved across
	 * this function call.
	 */
	pushq	%rsi
	call	cleanup_trampoline
	popq	%rsi

	/* Zero EFLAGS */
	pushq	$0
@@ -490,46 +513,82 @@ relocated:
	jmp	*%rax

	.code32
#ifdef CONFIG_X86_5LEVEL
compatible_mode:
/*
 * This is the 32-bit trampoline that will be copied over to low memory.
 *
 * RDI contains the return address (might be above 4G).
 * ECX contains the base address of the trampoline memory.
 * Non zero RDX on return means we need to enable 5-level paging.
 */
ENTRY(trampoline_32bit_src)
	/* Set up data and stack segments */
	movl	$__KERNEL_DS, %eax
	movl	%eax, %ds
	movl	%eax, %ss

	/* Set up new stack */
	leal	TRAMPOLINE_32BIT_STACK_END(%ecx), %esp

	/* Disable paging */
	movl	%cr0, %eax
	btrl	$X86_CR0_PG_BIT, %eax
	movl	%eax, %cr0

	/* Point CR3 to 5-level paging */
	leal	lvl5_pgtable(%ebx), %eax
	movl	%eax, %cr3
	/* Check what paging mode we want to be in after the trampoline */
	cmpl	$0, %edx
	jz	1f

	/* Enable PAE and LA57 mode */
	/* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
	movl	%cr4, %eax
	testl	$X86_CR4_LA57, %eax
	jnz	3f
	jmp	2f
1:
	/* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
	movl	%cr4, %eax
	orl	$(X86_CR4_PAE | X86_CR4_LA57), %eax
	testl	$X86_CR4_LA57, %eax
	jz	3f
2:
	/* Point CR3 to the trampoline's new top level page table */
	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
	movl	%eax, %cr3
3:
	/* Enable PAE and LA57 (if required) paging modes */
	movl	$X86_CR4_PAE, %eax
	cmpl	$0, %edx
	jz	1f
	orl	$X86_CR4_LA57, %eax
1:
	movl	%eax, %cr4

	/* Calculate address we are running at */
	call	1f
1:	popl	%edi
	subl	$1b, %edi
	/* Calculate address of paging_enabled() once we are executing in the trampoline */
	leal	paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax

	/* Prepare stack for far return to Long Mode */
	/* Prepare the stack for far return to Long Mode */
	pushl	$__KERNEL_CS
	leal	lvl5(%edi), %eax
	push	%eax
	pushl	%eax

	/* Enable paging back */
	/* Enable paging again */
	movl	$(X86_CR0_PG | X86_CR0_PE), %eax
	movl	%eax, %cr0

	lret
#endif

	.code64
paging_enabled:
	/* Return from the trampoline */
	jmp	*%rdi

	/*
         * The trampoline code has a size limit.
         * Make sure we fail to compile if the trampoline code grows
         * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
	 */
	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE

	.code32
no_longmode:
	/* This isn't an x86-64 CPU so hang */
	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
	hlt
	jmp     1b
@@ -537,6 +596,11 @@ no_longmode:
#include "../../kernel/verify_cpu.S"

	.data
gdt64:
	.word	gdt_end - gdt
	.long	0
	.word	0
	.quad   0
gdt:
	.word	gdt_end - gdt
	.long	gdt
@@ -585,7 +649,3 @@ boot_stack_end:
	.balign 4096
pgtable:
	.fill BOOT_PGT_SIZE, 1, 0
#ifdef CONFIG_X86_5LEVEL
lvl5_pgtable:
	.fill PAGE_SIZE, 1, 0
#endif
Loading