Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d2f7cbe7 authored by Borislav Petkov's avatar Borislav Petkov Committed by Matt Fleming
Browse files

x86/efi: Runtime services virtual mapping



We map the EFI regions needed for runtime services non-contiguously,
with preserved alignment on virtual addresses starting from -4G down
for a total max space of 64G. This way, we provide for stable runtime
services addresses across kernels so that a kexec'd kernel can still use
them.

Thus, they're mapped in a separate pagetable so that we don't pollute
the kernel namespace.

Add an efi= kernel command line parameter for passing miscellaneous
options and chicken bits from the command line.

While at it, add a chicken bit called "efi=old_map" which can be used as
a fallback to the old runtime services mapping method in case there's
some b0rkage with a particular EFI implementation (haha, it is hard to
hold up the sarcasm here...).

Also, add the UEFI RT VA space to Documentation/x86/x86_64/mm.txt.

Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Signed-off-by: default avatarMatt Fleming <matt.fleming@intel.com>
parent 82f0712c
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -835,6 +835,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
	edd=		[EDD]
			Format: {"off" | "on" | "skip[mbr]"}

	efi=		[EFI]
			Format: { "old_map" }
			old_map [X86-64]: switch to the old ioremap-based EFI
			runtime services mapping. 32-bit still uses this one by
			default.

	efi_no_storage_paranoia [EFI; X86]
			Using this parameter you can use more than 50% of
			your efi variable storage. Use this parameter only if
+7 −0
Original line number Diff line number Diff line
@@ -28,4 +28,11 @@ reference.
Current X86-64 implementations only support 40 bits of address space,
but we support up to 46 bits. This expands into MBZ space in the page tables.

->trampoline_pgd:

We map EFI runtime services in the aforementioned PGD in the virtual
range of 64Gb (arbitrarily set, can be raised if needed)

0xffffffef00000000 - 0xffffffff00000000

-Andi Kleen, Jul 2004
+47 −17
Original line number Diff line number Diff line
#ifndef _ASM_X86_EFI_H
#define _ASM_X86_EFI_H

/*
 * We map the EFI regions needed for runtime services non-contiguously,
 * with preserved alignment on virtual addresses starting from -4G down
 * for a total max space of 64G. This way, we provide for stable runtime
 * services addresses across kernels so that a kexec'd kernel can still
 * use them.
 *
 * This is the main reason why we're doing stable VA mappings for RT
 * services.
 *
 * This flag is used in conjuction with a chicken bit called
 * "efi=old_map" which can be used as a fallback to the old runtime
 * services mapping method in case there's some b0rkage with a
 * particular EFI implementation (haha, it is hard to hold up the
 * sarcasm here...).
 */
#define EFI_OLD_MEMMAP		EFI_ARCH_1

#ifdef CONFIG_X86_32

#define EFI_LOADER_SIGNATURE	"EL32"
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
	efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3),		\
		  (u64)(a4), (u64)(a5), (u64)(a6))

#define _efi_call_virtX(x, f, ...)					\
({									\
	efi_status_t __s;						\
									\
	efi_sync_low_kernel_mappings();					\
	preempt_disable();						\
	__s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__);	\
	preempt_enable();						\
	__s;								\
})

#define efi_call_virt0(f)				\
	efi_call0((efi.systab->runtime->f))
	_efi_call_virtX(0, f)
#define efi_call_virt1(f, a1)				\
	efi_call1((efi.systab->runtime->f), (u64)(a1))
	_efi_call_virtX(1, f, (u64)(a1))
#define efi_call_virt2(f, a1, a2)			\
	efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2))
	_efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
#define efi_call_virt3(f, a1, a2, a3)			\
	efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		  (u64)(a3))
	_efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
#define efi_call_virt4(f, a1, a2, a3, a4)		\
	efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		  (u64)(a3), (u64)(a4))
	_efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
#define efi_call_virt5(f, a1, a2, a3, a4, a5)		\
	efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		  (u64)(a3), (u64)(a4), (u64)(a5))
	_efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)	\
	efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
	_efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))

extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
				 u32 type, u64 attribute);
@@ -95,12 +120,17 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,

extern int add_efi_memmap;
extern unsigned long x86_efi_facility;
extern struct efi_scratch efi_scratch;
extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int efi_memblock_x86_reserve_range(void);
extern void efi_call_phys_prelog(void);
extern void efi_call_phys_epilog(void);
extern void efi_unmap_memmap(void);
extern void efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
extern void efi_sync_low_kernel_mappings(void);
extern void efi_setup_page_tables(void);
extern void __init old_map_region(efi_memory_desc_t *md);

#ifdef CONFIG_EFI

+2 −1
Original line number Diff line number Diff line
@@ -379,7 +379,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
 */
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern phys_addr_t slow_virt_to_phys(void *__address);

extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
				   unsigned numpages, unsigned long page_flags);
#endif	/* !__ASSEMBLY__ */

#endif /* _ASM_X86_PGTABLE_DEFS_H */
+66 −28
Original line number Diff line number Diff line
@@ -12,6 +12,8 @@
 *	Bibo Mao <bibo.mao@intel.com>
 *	Chandramouli Narayanan <mouli@linux.intel.com>
 *	Huang Ying <ying.huang@intel.com>
 * Copyright (C) 2013 SuSE Labs
 *	Borislav Petkov <bp@suse.de> - runtime services VA mapping
 *
 * Copied from efi_32.c to eliminate the duplicated code between EFI
 * 32/64 support code. --ying 2007-10-26
@@ -745,21 +747,56 @@ void efi_memory_uc(u64 addr, unsigned long size)
	set_memory_uc(addr, npages);
}

void __init old_map_region(efi_memory_desc_t *md)
{
	u64 start_pfn, end_pfn, end;
	unsigned long size;
	void *va;

	start_pfn = PFN_DOWN(md->phys_addr);
	size	  = md->num_pages << PAGE_SHIFT;
	end	  = md->phys_addr + size;
	end_pfn   = PFN_UP(end);

	if (pfn_range_is_mapped(start_pfn, end_pfn)) {
		va = __va(md->phys_addr);

		if (!(md->attribute & EFI_MEMORY_WB))
			efi_memory_uc((u64)(unsigned long)va, size);
	} else
		va = efi_ioremap(md->phys_addr, size,
				 md->type, md->attribute);

	md->virt_addr = (u64) (unsigned long) va;
	if (!va)
		pr_err("ioremap of 0x%llX failed!\n",
		       (unsigned long long)md->phys_addr);
}

/*
 * This function will switch the EFI runtime services to virtual mode.
 * Essentially, look through the EFI memmap and map every region that
 * has the runtime attribute bit set in its memory descriptor and update
 * that memory descriptor with the virtual address obtained from ioremap().
 * This enables the runtime services to be called without having to
 * Essentially, we look through the EFI memmap and map every region that
 * has the runtime attribute bit set in its memory descriptor into the
 * ->trampoline_pgd page table using a top-down VA allocation scheme.
 *
 * The old method which used to update that memory descriptor with the
 * virtual address obtained from ioremap() is still supported when the
 * kernel is booted with efi=old_map on its command line. Same old
 * method enabled the runtime services to be called without having to
 * thunk back into physical mode for every invocation.
 *
 * The new method does a pagetable switch in a preemption-safe manner
 * so that we're in a different address space when calling a runtime
 * function. For function arguments passing we do copy the PGDs of the
 * kernel page table into ->trampoline_pgd prior to each call.
 */
void __init efi_enter_virtual_mode(void)
{
	efi_memory_desc_t *md, *prev_md = NULL;
	efi_status_t status;
	void *p, *new_memmap = NULL;
	unsigned long size;
	u64 end, systab, start_pfn, end_pfn;
	void *p, *va, *new_memmap = NULL;
	efi_status_t status;
	u64 end, systab;
	int count = 0;

	efi.systab = NULL;
@@ -768,7 +805,6 @@ void __init efi_enter_virtual_mode(void)
	 * We don't do virtual mode, since we don't do runtime services, on
	 * non-native EFI
	 */

	if (!efi_is_native()) {
		efi_unmap_memmap();
		return;
@@ -799,6 +835,7 @@ void __init efi_enter_virtual_mode(void)
			continue;
		}
		prev_md = md;

	}

	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -808,33 +845,18 @@ void __init efi_enter_virtual_mode(void)
		    md->type != EFI_BOOT_SERVICES_DATA)
			continue;

		efi_map_region(md);

		size = md->num_pages << EFI_PAGE_SHIFT;
		end = md->phys_addr + size;

		start_pfn = PFN_DOWN(md->phys_addr);
		end_pfn = PFN_UP(end);
		if (pfn_range_is_mapped(start_pfn, end_pfn)) {
			va = __va(md->phys_addr);

			if (!(md->attribute & EFI_MEMORY_WB))
				efi_memory_uc((u64)(unsigned long)va, size);
		} else
			va = efi_ioremap(md->phys_addr, size,
					 md->type, md->attribute);

		md->virt_addr = (u64) (unsigned long) va;

		if (!va) {
			pr_err("ioremap of 0x%llX failed!\n",
			       (unsigned long long)md->phys_addr);
			continue;
		}

		systab = (u64) (unsigned long) efi_phys.systab;
		if (md->phys_addr <= systab && systab < end) {
			systab += md->virt_addr - md->phys_addr;

			efi.systab = (efi_system_table_t *) (unsigned long) systab;
		}

		new_memmap = krealloc(new_memmap,
				      (count + 1) * memmap.desc_size,
				      GFP_KERNEL);
@@ -845,6 +867,9 @@ void __init efi_enter_virtual_mode(void)

	BUG_ON(!efi.systab);

	efi_setup_page_tables();
	efi_sync_low_kernel_mappings();

	status = phys_efi_set_virtual_address_map(
		memmap.desc_size * count,
		memmap.desc_size,
@@ -877,7 +902,8 @@ void __init efi_enter_virtual_mode(void)
	efi.query_variable_info = virt_efi_query_variable_info;
	efi.update_capsule = virt_efi_update_capsule;
	efi.query_capsule_caps = virt_efi_query_capsule_caps;
	if (__supported_pte_mask & _PAGE_NX)

	if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
		runtime_code_page_mkexec();

	kfree(new_memmap);
@@ -1007,3 +1033,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
	return EFI_SUCCESS;
}
EXPORT_SYMBOL_GPL(efi_query_variable_store);

static int __init parse_efi_cmdline(char *str)
{
	if (*str == '=')
		str++;

	if (!strncmp(str, "old_map", 7))
		set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);

	return 0;
}
early_param("efi", parse_efi_cmdline);
Loading