Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6ebcb060 authored by Tom Lendacky's avatar Tom Lendacky Committed by Ingo Molnar
Browse files

x86/mm: Add support to encrypt the kernel in-place



Add the support to encrypt the kernel in-place. This is done by creating
new page mappings for the kernel - a decrypted write-protected mapping
and an encrypted mapping. The kernel is encrypted by copying it through
a temporary buffer.

Signed-off-by: default avatarTom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Toshimitsu Kani <toshi.kani@hpe.com>
Cc: kasan-dev@googlegroups.com
Cc: kvm@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-efi@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/c039bf9412ef95e1e6bf4fdf8facab95e00c717b.1500319216.git.thomas.lendacky@amd.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent db516997
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -21,6 +21,12 @@

extern unsigned long sme_me_mask;

void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
			 unsigned long decrypted_kernel_vaddr,
			 unsigned long kernel_len,
			 unsigned long encryption_wa,
			 unsigned long encryption_pgd);

void __init sme_early_encrypt(resource_size_t paddr,
			      unsigned long size);
void __init sme_early_decrypt(resource_size_t paddr,
+1 −0
Original line number Diff line number Diff line
@@ -40,3 +40,4 @@ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o

obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt.o
obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt_boot.o
+310 −0
Original line number Diff line number Diff line
@@ -21,6 +21,8 @@
#include <asm/setup.h>
#include <asm/bootparam.h>
#include <asm/set_memory.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>

/*
 * Since SME related variables are set early in the boot process they must
@@ -199,8 +201,316 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
	set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
}

static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
				 unsigned long end)
{
	unsigned long pgd_start, pgd_end, pgd_size;
	pgd_t *pgd_p;

	pgd_start = start & PGDIR_MASK;
	pgd_end = end & PGDIR_MASK;

	pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
	pgd_size *= sizeof(pgd_t);

	pgd_p = pgd_base + pgd_index(start);

	memset(pgd_p, 0, pgd_size);
}

#define PGD_FLAGS	_KERNPG_TABLE_NOENC
#define P4D_FLAGS	_KERNPG_TABLE_NOENC
#define PUD_FLAGS	_KERNPG_TABLE_NOENC
#define PMD_FLAGS	(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)

static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
				     unsigned long vaddr, pmdval_t pmd_val)
{
	pgd_t *pgd_p;
	p4d_t *p4d_p;
	pud_t *pud_p;
	pmd_t *pmd_p;

	pgd_p = pgd_base + pgd_index(vaddr);
	if (native_pgd_val(*pgd_p)) {
		if (IS_ENABLED(CONFIG_X86_5LEVEL))
			p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
		else
			pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
	} else {
		pgd_t pgd;

		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
			p4d_p = pgtable_area;
			memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
			pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;

			pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
		} else {
			pud_p = pgtable_area;
			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;

			pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
		}
		native_set_pgd(pgd_p, pgd);
	}

	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
		p4d_p += p4d_index(vaddr);
		if (native_p4d_val(*p4d_p)) {
			pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
		} else {
			p4d_t p4d;

			pud_p = pgtable_area;
			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;

			p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
			native_set_p4d(p4d_p, p4d);
		}
	}

	pud_p += pud_index(vaddr);
	if (native_pud_val(*pud_p)) {
		if (native_pud_val(*pud_p) & _PAGE_PSE)
			goto out;

		pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
	} else {
		pud_t pud;

		pmd_p = pgtable_area;
		memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
		pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;

		pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
		native_set_pud(pud_p, pud);
	}

	pmd_p += pmd_index(vaddr);
	if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
		native_set_pmd(pmd_p, native_make_pmd(pmd_val));

out:
	return pgtable_area;
}

static unsigned long __init sme_pgtable_calc(unsigned long len)
{
	unsigned long p4d_size, pud_size, pmd_size;
	unsigned long total;

	/*
	 * Perform a relatively simplistic calculation of the pagetable
	 * entries that are needed. That mappings will be covered by 2MB
	 * PMD entries so we can conservatively calculate the required
	 * number of P4D, PUD and PMD structures needed to perform the
	 * mappings. Incrementing the count for each covers the case where
	 * the addresses cross entries.
	 */
	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
		p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
		p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
		pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
		pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
	} else {
		p4d_size = 0;
		pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
		pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
	}
	pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
	pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;

	total = p4d_size + pud_size + pmd_size;

	/*
	 * Now calculate the added pagetable structures needed to populate
	 * the new pagetables.
	 */
	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
		p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
		p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
		pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
		pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
	} else {
		p4d_size = 0;
		pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
		pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
	}
	pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
	pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;

	total += p4d_size + pud_size + pmd_size;

	return total;
}

void __init sme_encrypt_kernel(void)
{
	unsigned long workarea_start, workarea_end, workarea_len;
	unsigned long execute_start, execute_end, execute_len;
	unsigned long kernel_start, kernel_end, kernel_len;
	unsigned long pgtable_area_len;
	unsigned long paddr, pmd_flags;
	unsigned long decrypted_base;
	void *pgtable_area;
	pgd_t *pgd;

	if (!sme_active())
		return;

	/*
	 * Prepare for encrypting the kernel by building new pagetables with
	 * the necessary attributes needed to encrypt the kernel in place.
	 *
	 *   One range of virtual addresses will map the memory occupied
	 *   by the kernel as encrypted.
	 *
	 *   Another range of virtual addresses will map the memory occupied
	 *   by the kernel as decrypted and write-protected.
	 *
	 *     The use of write-protect attribute will prevent any of the
	 *     memory from being cached.
	 */

	/* Physical addresses gives us the identity mapped virtual addresses */
	kernel_start = __pa_symbol(_text);
	kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
	kernel_len = kernel_end - kernel_start;

	/* Set the encryption workarea to be immediately after the kernel */
	workarea_start = kernel_end;

	/*
	 * Calculate required number of workarea bytes needed:
	 *   executable encryption area size:
	 *     stack page (PAGE_SIZE)
	 *     encryption routine page (PAGE_SIZE)
	 *     intermediate copy buffer (PMD_PAGE_SIZE)
	 *   pagetable structures for the encryption of the kernel
	 *   pagetable structures for workarea (in case not currently mapped)
	 */
	execute_start = workarea_start;
	execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
	execute_len = execute_end - execute_start;

	/*
	 * One PGD for both encrypted and decrypted mappings and a set of
	 * PUDs and PMDs for each of the encrypted and decrypted mappings.
	 */
	pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
	pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;

	/* PUDs and PMDs needed in the current pagetables for the workarea */
	pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);

	/*
	 * The total workarea includes the executable encryption area and
	 * the pagetable area.
	 */
	workarea_len = execute_len + pgtable_area_len;
	workarea_end = workarea_start + workarea_len;

	/*
	 * Set the address to the start of where newly created pagetable
	 * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
	 * structures are created when the workarea is added to the current
	 * pagetables and when the new encrypted and decrypted kernel
	 * mappings are populated.
	 */
	pgtable_area = (void *)execute_end;

	/*
	 * Make sure the current pagetable structure has entries for
	 * addressing the workarea.
	 */
	pgd = (pgd_t *)native_read_cr3_pa();
	paddr = workarea_start;
	while (paddr < workarea_end) {
		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
						paddr,
						paddr + PMD_FLAGS);

		paddr += PMD_PAGE_SIZE;
	}

	/* Flush the TLB - no globals so cr3 is enough */
	native_write_cr3(__native_read_cr3());

	/*
	 * A new pagetable structure is being built to allow for the kernel
	 * to be encrypted. It starts with an empty PGD that will then be
	 * populated with new PUDs and PMDs as the encrypted and decrypted
	 * kernel mappings are created.
	 */
	pgd = pgtable_area;
	memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
	pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;

	/* Add encrypted kernel (identity) mappings */
	pmd_flags = PMD_FLAGS | _PAGE_ENC;
	paddr = kernel_start;
	while (paddr < kernel_end) {
		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
						paddr,
						paddr + pmd_flags);

		paddr += PMD_PAGE_SIZE;
	}

	/*
	 * A different PGD index/entry must be used to get different
	 * pagetable entries for the decrypted mapping. Choose the next
	 * PGD index and convert it to a virtual address to be used as
	 * the base of the mapping.
	 */
	decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
	decrypted_base <<= PGDIR_SHIFT;

	/* Add decrypted, write-protected kernel (non-identity) mappings */
	pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
	paddr = kernel_start;
	while (paddr < kernel_end) {
		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
						paddr + decrypted_base,
						paddr + pmd_flags);

		paddr += PMD_PAGE_SIZE;
	}

	/* Add decrypted workarea mappings to both kernel mappings */
	paddr = workarea_start;
	while (paddr < workarea_end) {
		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
						paddr,
						paddr + PMD_FLAGS);

		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
						paddr + decrypted_base,
						paddr + PMD_FLAGS);

		paddr += PMD_PAGE_SIZE;
	}

	/* Perform the encryption */
	sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
			    kernel_len, workarea_start, (unsigned long)pgd);

	/*
	 * At this point we are running encrypted.  Remove the mappings for
	 * the decrypted areas - all that is needed for this is to remove
	 * the PGD entry/entries.
	 */
	sme_clear_pgd(pgd, kernel_start + decrypted_base,
		      kernel_end + decrypted_base);

	sme_clear_pgd(pgd, workarea_start + decrypted_base,
		      workarea_end + decrypted_base);

	/* Flush the TLB - no globals so cr3 is enough */
	native_write_cr3(__native_read_cr3());
}

void __init sme_enable(void)
+149 −0
Original line number Diff line number Diff line
/*
 * AMD Memory Encryption Support
 *
 * Copyright (C) 2016 Advanced Micro Devices, Inc.
 *
 * Author: Tom Lendacky <thomas.lendacky@amd.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/linkage.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/processor-flags.h>
#include <asm/msr-index.h>
#include <asm/frame.h>

	.text
	.code64
ENTRY(sme_encrypt_execute)

	/*
	 * Entry parameters:
	 *   RDI - virtual address for the encrypted kernel mapping
	 *   RSI - virtual address for the decrypted kernel mapping
	 *   RDX - length of kernel
	 *   RCX - virtual address of the encryption workarea, including:
	 *     - stack page (PAGE_SIZE)
	 *     - encryption routine page (PAGE_SIZE)
	 *     - intermediate copy buffer (PMD_PAGE_SIZE)
	 *    R8 - physcial address of the pagetables to use for encryption
	 */

	FRAME_BEGIN			/* RBP now has original stack pointer */

	/* Set up a one page stack in the non-encrypted memory area */
	movq	%rcx, %rax		/* Workarea stack page */
	leaq	PAGE_SIZE(%rax), %rsp	/* Set new stack pointer */
	addq	$PAGE_SIZE, %rax	/* Workarea encryption routine */

	push	%r12
	movq	%rdi, %r10		/* Encrypted kernel */
	movq	%rsi, %r11		/* Decrypted kernel */
	movq	%rdx, %r12		/* Kernel length */

	/* Copy encryption routine into the workarea */
	movq	%rax, %rdi				/* Workarea encryption routine */
	leaq	__enc_copy(%rip), %rsi			/* Encryption routine */
	movq	$(.L__enc_copy_end - __enc_copy), %rcx	/* Encryption routine length */
	rep	movsb

	/* Setup registers for call */
	movq	%r10, %rdi		/* Encrypted kernel */
	movq	%r11, %rsi		/* Decrypted kernel */
	movq	%r8, %rdx		/* Pagetables used for encryption */
	movq	%r12, %rcx		/* Kernel length */
	movq	%rax, %r8		/* Workarea encryption routine */
	addq	$PAGE_SIZE, %r8		/* Workarea intermediate copy buffer */

	call	*%rax			/* Call the encryption routine */

	pop	%r12

	movq	%rbp, %rsp		/* Restore original stack pointer */
	FRAME_END

	ret
ENDPROC(sme_encrypt_execute)

ENTRY(__enc_copy)
/*
 * Routine used to encrypt kernel.
 *   This routine must be run outside of the kernel proper since
 *   the kernel will be encrypted during the process. So this
 *   routine is defined here and then copied to an area outside
 *   of the kernel where it will remain and run decrypted
 *   during execution.
 *
 *   On entry the registers must be:
 *     RDI - virtual address for the encrypted kernel mapping
 *     RSI - virtual address for the decrypted kernel mapping
 *     RDX - address of the pagetables to use for encryption
 *     RCX - length of kernel
 *      R8 - intermediate copy buffer
 *
 *     RAX - points to this routine
 *
 * The kernel will be encrypted by copying from the non-encrypted
 * kernel space to an intermediate buffer and then copying from the
 * intermediate buffer back to the encrypted kernel space. The physical
 * addresses of the two kernel space mappings are the same which
 * results in the kernel being encrypted "in place".
 */
	/* Enable the new page tables */
	mov	%rdx, %cr3

	/* Flush any global TLBs */
	mov	%cr4, %rdx
	andq	$~X86_CR4_PGE, %rdx
	mov	%rdx, %cr4
	orq	$X86_CR4_PGE, %rdx
	mov	%rdx, %cr4

	/* Set the PAT register PA5 entry to write-protect */
	push	%rcx
	movl	$MSR_IA32_CR_PAT, %ecx
	rdmsr
	push	%rdx			/* Save original PAT value */
	andl	$0xffff00ff, %edx	/* Clear PA5 */
	orl	$0x00000500, %edx	/* Set PA5 to WP */
	wrmsr
	pop	%rdx			/* RDX contains original PAT value */
	pop	%rcx

	movq	%rcx, %r9		/* Save kernel length */
	movq	%rdi, %r10		/* Save encrypted kernel address */
	movq	%rsi, %r11		/* Save decrypted kernel address */

	wbinvd				/* Invalidate any cache entries */

	/* Copy/encrypt 2MB at a time */
1:
	movq	%r11, %rsi		/* Source - decrypted kernel */
	movq	%r8, %rdi		/* Dest   - intermediate copy buffer */
	movq	$PMD_PAGE_SIZE, %rcx	/* 2MB length */
	rep	movsb

	movq	%r8, %rsi		/* Source - intermediate copy buffer */
	movq	%r10, %rdi		/* Dest   - encrypted kernel */
	movq	$PMD_PAGE_SIZE, %rcx	/* 2MB length */
	rep	movsb

	addq	$PMD_PAGE_SIZE, %r11
	addq	$PMD_PAGE_SIZE, %r10
	subq	$PMD_PAGE_SIZE, %r9	/* Kernel length decrement */
	jnz	1b			/* Kernel length not zero? */

	/* Restore PAT register */
	push	%rdx			/* Save original PAT value */
	movl	$MSR_IA32_CR_PAT, %ecx
	rdmsr
	pop	%rdx			/* Restore original PAT value */
	wrmsr

	ret
.L__enc_copy_end:
ENDPROC(__enc_copy)