Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 61d06697 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'efi-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi into x86/efi



Pull EFI virtual mapping changes from Matt Fleming:

  * New static EFI runtime services virtual mapping layout which is
    groundwork for kexec support on EFI. (Borislav Petkov)

Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents b975dc36 ee411430
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -890,6 +890,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
	edd=		[EDD]
			Format: {"off" | "on" | "skip[mbr]"}

	efi=		[EFI]
			Format: { "old_map" }
			old_map [X86-64]: switch to the old ioremap-based EFI
			runtime services mapping. 32-bit still uses this one by
			default.

	efi_no_storage_paranoia [EFI; X86]
			Using this parameter you can use more than 50% of
			your efi variable storage. Use this parameter only if
+7 −0
Original line number Diff line number Diff line
@@ -28,4 +28,11 @@ reference.
Current X86-64 implementations only support 40 bits of address space,
but we support up to 46 bits. This expands into MBZ space in the page tables.

->trampoline_pgd:

We map EFI runtime services in the aforementioned PGD in the virtual
range of 64Gb (arbitrarily set, can be raised if needed)

0xffffffef00000000 - 0xffffffff00000000

-Andi Kleen, Jul 2004
+47 −17
Original line number Diff line number Diff line
#ifndef _ASM_X86_EFI_H
#define _ASM_X86_EFI_H

/*
 * We map the EFI regions needed for runtime services non-contiguously,
 * with preserved alignment on virtual addresses starting from -4G down
 * for a total max space of 64G. This way, we provide for stable runtime
 * services addresses across kernels so that a kexec'd kernel can still
 * use them.
 *
 * This is the main reason why we're doing stable VA mappings for RT
 * services.
 *
 * This flag is used in conjuction with a chicken bit called
 * "efi=old_map" which can be used as a fallback to the old runtime
 * services mapping method in case there's some b0rkage with a
 * particular EFI implementation (haha, it is hard to hold up the
 * sarcasm here...).
 */
#define EFI_OLD_MEMMAP		EFI_ARCH_1

#ifdef CONFIG_X86_32

#define EFI_LOADER_SIGNATURE	"EL32"
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
	efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3),		\
		  (u64)(a4), (u64)(a5), (u64)(a6))

#define _efi_call_virtX(x, f, ...)					\
({									\
	efi_status_t __s;						\
									\
	efi_sync_low_kernel_mappings();					\
	preempt_disable();						\
	__s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__);	\
	preempt_enable();						\
	__s;								\
})

#define efi_call_virt0(f)				\
	efi_call0((efi.systab->runtime->f))
	_efi_call_virtX(0, f)
#define efi_call_virt1(f, a1)				\
	efi_call1((efi.systab->runtime->f), (u64)(a1))
	_efi_call_virtX(1, f, (u64)(a1))
#define efi_call_virt2(f, a1, a2)			\
	efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2))
	_efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
#define efi_call_virt3(f, a1, a2, a3)			\
	efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		  (u64)(a3))
	_efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
#define efi_call_virt4(f, a1, a2, a3, a4)		\
	efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		  (u64)(a3), (u64)(a4))
	_efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
#define efi_call_virt5(f, a1, a2, a3, a4, a5)		\
	efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		  (u64)(a3), (u64)(a4), (u64)(a5))
	_efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)	\
	efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
	_efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))

extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
				 u32 type, u64 attribute);
@@ -95,12 +120,17 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,

extern int add_efi_memmap;
extern unsigned long x86_efi_facility;
extern struct efi_scratch efi_scratch;
extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int efi_memblock_x86_reserve_range(void);
extern void efi_call_phys_prelog(void);
extern void efi_call_phys_epilog(void);
extern void efi_unmap_memmap(void);
extern void efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
extern void efi_sync_low_kernel_mappings(void);
extern void efi_setup_page_tables(void);
extern void __init old_map_region(efi_memory_desc_t *md);

#ifdef CONFIG_EFI

+2 −1
Original line number Diff line number Diff line
@@ -382,7 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
 */
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern phys_addr_t slow_virt_to_phys(void *__address);

extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
				   unsigned numpages, unsigned long page_flags);
#endif	/* !__ASSEMBLY__ */

#endif /* _ASM_X86_PGTABLE_DEFS_H */
+444 −17
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
 */
struct cpa_data {
	unsigned long	*vaddr;
	pgd_t		*pgd;
	pgprot_t	mask_set;
	pgprot_t	mask_clr;
	int		numpages;
@@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
	return prot;
}

/*
 * Lookup the page table entry for a virtual address. Return a pointer
 * to the entry and the level of the mapping.
 *
 * Note: We return pud and pmd either when the entry is marked large
 * or when the present bit is not set. Otherwise we would return a
 * pointer to a nonexisting mapping.
 */
pte_t *lookup_address(unsigned long address, unsigned int *level)
static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
				      unsigned int *level)
{
	pgd_t *pgd = pgd_offset_k(address);
	pud_t *pud;
	pmd_t *pmd;

@@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)

	return pte_offset_kernel(pmd, address);
}

/*
 * Lookup the page table entry for a virtual address. Return a pointer
 * to the entry and the level of the mapping.
 *
 * Note: We return pud and pmd either when the entry is marked large
 * or when the present bit is not set. Otherwise we would return a
 * pointer to a nonexisting mapping.
 */
pte_t *lookup_address(unsigned long address, unsigned int *level)
{
        return __lookup_address_in_pgd(pgd_offset_k(address), address, level);
}
EXPORT_SYMBOL_GPL(lookup_address);

static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
				  unsigned int *level)
{
        if (cpa->pgd)
		return __lookup_address_in_pgd(cpa->pgd + pgd_index(address),
					       address, level);

        return lookup_address(address, level);
}

/*
 * This is necessary because __pa() does not work on some
 * kinds of memory, like vmalloc() or the alloc_remap()
@@ -437,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
	 * Check for races, another CPU might have split this page
	 * up already:
	 */
	tmp = lookup_address(address, &level);
	tmp = _lookup_address_cpa(cpa, address, &level);
	if (tmp != kpte)
		goto out_unlock;

@@ -543,7 +559,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
}

static int
__split_large_page(pte_t *kpte, unsigned long address, struct page *base)
__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
		   struct page *base)
{
	pte_t *pbase = (pte_t *)page_address(base);
	unsigned long pfn, pfninc = 1;
@@ -556,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
	 * Check for races, another CPU might have split this page
	 * up for us already:
	 */
	tmp = lookup_address(address, &level);
	tmp = _lookup_address_cpa(cpa, address, &level);
	if (tmp != kpte) {
		spin_unlock(&pgd_lock);
		return 1;
@@ -632,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
	return 0;
}

static int split_large_page(pte_t *kpte, unsigned long address)
static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
			    unsigned long address)
{
	struct page *base;

@@ -644,15 +662,390 @@ static int split_large_page(pte_t *kpte, unsigned long address)
	if (!base)
		return -ENOMEM;

	if (__split_large_page(kpte, address, base))
	if (__split_large_page(cpa, kpte, address, base))
		__free_page(base);

	return 0;
}

static bool try_to_free_pte_page(pte_t *pte)
{
	int i;

	for (i = 0; i < PTRS_PER_PTE; i++)
		if (!pte_none(pte[i]))
			return false;

	free_page((unsigned long)pte);
	return true;
}

static bool try_to_free_pmd_page(pmd_t *pmd)
{
	int i;

	for (i = 0; i < PTRS_PER_PMD; i++)
		if (!pmd_none(pmd[i]))
			return false;

	free_page((unsigned long)pmd);
	return true;
}

static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
{
	pte_t *pte = pte_offset_kernel(pmd, start);

	while (start < end) {
		set_pte(pte, __pte(0));

		start += PAGE_SIZE;
		pte++;
	}

	if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
		pmd_clear(pmd);
		return true;
	}
	return false;
}

static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
			      unsigned long start, unsigned long end)
{
	if (unmap_pte_range(pmd, start, end))
		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
			pud_clear(pud);
}

static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
{
	pmd_t *pmd = pmd_offset(pud, start);

	/*
	 * Not on a 2MB page boundary?
	 */
	if (start & (PMD_SIZE - 1)) {
		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
		unsigned long pre_end = min_t(unsigned long, end, next_page);

		__unmap_pmd_range(pud, pmd, start, pre_end);

		start = pre_end;
		pmd++;
	}

	/*
	 * Try to unmap in 2M chunks.
	 */
	while (end - start >= PMD_SIZE) {
		if (pmd_large(*pmd))
			pmd_clear(pmd);
		else
			__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);

		start += PMD_SIZE;
		pmd++;
	}

	/*
	 * 4K leftovers?
	 */
	if (start < end)
		return __unmap_pmd_range(pud, pmd, start, end);

	/*
	 * Try again to free the PMD page if haven't succeeded above.
	 */
	if (!pud_none(*pud))
		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
			pud_clear(pud);
}

static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
{
	pud_t *pud = pud_offset(pgd, start);

	/*
	 * Not on a GB page boundary?
	 */
	if (start & (PUD_SIZE - 1)) {
		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
		unsigned long pre_end	= min_t(unsigned long, end, next_page);

		unmap_pmd_range(pud, start, pre_end);

		start = pre_end;
		pud++;
	}

	/*
	 * Try to unmap in 1G chunks?
	 */
	while (end - start >= PUD_SIZE) {

		if (pud_large(*pud))
			pud_clear(pud);
		else
			unmap_pmd_range(pud, start, start + PUD_SIZE);

		start += PUD_SIZE;
		pud++;
	}

	/*
	 * 2M leftovers?
	 */
	if (start < end)
		unmap_pmd_range(pud, start, end);

	/*
	 * No need to try to free the PUD page because we'll free it in
	 * populate_pgd's error path
	 */
}

static int alloc_pte_page(pmd_t *pmd)
{
	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
	if (!pte)
		return -1;

	set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
	return 0;
}

static int alloc_pmd_page(pud_t *pud)
{
	pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
	if (!pmd)
		return -1;

	set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
	return 0;
}

static void populate_pte(struct cpa_data *cpa,
			 unsigned long start, unsigned long end,
			 unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
{
	pte_t *pte;

	pte = pte_offset_kernel(pmd, start);

	while (num_pages-- && start < end) {

		/* deal with the NX bit */
		if (!(pgprot_val(pgprot) & _PAGE_NX))
			cpa->pfn &= ~_PAGE_NX;

		set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot));

		start	 += PAGE_SIZE;
		cpa->pfn += PAGE_SIZE;
		pte++;
	}
}

static int populate_pmd(struct cpa_data *cpa,
			unsigned long start, unsigned long end,
			unsigned num_pages, pud_t *pud, pgprot_t pgprot)
{
	unsigned int cur_pages = 0;
	pmd_t *pmd;

	/*
	 * Not on a 2M boundary?
	 */
	if (start & (PMD_SIZE - 1)) {
		unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;

		pre_end   = min_t(unsigned long, pre_end, next_page);
		cur_pages = (pre_end - start) >> PAGE_SHIFT;
		cur_pages = min_t(unsigned int, num_pages, cur_pages);

		/*
		 * Need a PTE page?
		 */
		pmd = pmd_offset(pud, start);
		if (pmd_none(*pmd))
			if (alloc_pte_page(pmd))
				return -1;

		populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);

		start = pre_end;
	}

	/*
	 * We mapped them all?
	 */
	if (num_pages == cur_pages)
		return cur_pages;

	while (end - start >= PMD_SIZE) {

		/*
		 * We cannot use a 1G page so allocate a PMD page if needed.
		 */
		if (pud_none(*pud))
			if (alloc_pmd_page(pud))
				return -1;

		pmd = pmd_offset(pud, start);

		set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));

		start	  += PMD_SIZE;
		cpa->pfn  += PMD_SIZE;
		cur_pages += PMD_SIZE >> PAGE_SHIFT;
	}

	/*
	 * Map trailing 4K pages.
	 */
	if (start < end) {
		pmd = pmd_offset(pud, start);
		if (pmd_none(*pmd))
			if (alloc_pte_page(pmd))
				return -1;

		populate_pte(cpa, start, end, num_pages - cur_pages,
			     pmd, pgprot);
	}
	return num_pages;
}

static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
			pgprot_t pgprot)
{
	pud_t *pud;
	unsigned long end;
	int cur_pages = 0;

	end = start + (cpa->numpages << PAGE_SHIFT);

	/*
	 * Not on a Gb page boundary? => map everything up to it with
	 * smaller pages.
	 */
	if (start & (PUD_SIZE - 1)) {
		unsigned long pre_end;
		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;

		pre_end   = min_t(unsigned long, end, next_page);
		cur_pages = (pre_end - start) >> PAGE_SHIFT;
		cur_pages = min_t(int, (int)cpa->numpages, cur_pages);

		pud = pud_offset(pgd, start);

		/*
		 * Need a PMD page?
		 */
		if (pud_none(*pud))
			if (alloc_pmd_page(pud))
				return -1;

		cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
					 pud, pgprot);
		if (cur_pages < 0)
			return cur_pages;

		start = pre_end;
	}

	/* We mapped them all? */
	if (cpa->numpages == cur_pages)
		return cur_pages;

	pud = pud_offset(pgd, start);

	/*
	 * Map everything starting from the Gb boundary, possibly with 1G pages
	 */
	while (end - start >= PUD_SIZE) {
		set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));

		start	  += PUD_SIZE;
		cpa->pfn  += PUD_SIZE;
		cur_pages += PUD_SIZE >> PAGE_SHIFT;
		pud++;
	}

	/* Map trailing leftover */
	if (start < end) {
		int tmp;

		pud = pud_offset(pgd, start);
		if (pud_none(*pud))
			if (alloc_pmd_page(pud))
				return -1;

		tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
				   pud, pgprot);
		if (tmp < 0)
			return cur_pages;

		cur_pages += tmp;
	}
	return cur_pages;
}

/*
 * Restrictions for kernel page table do not necessarily apply when mapping in
 * an alternate PGD.
 */
static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
{
	pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
	bool allocd_pgd = false;
	pgd_t *pgd_entry;
	pud_t *pud = NULL;	/* shut up gcc */
	int ret;

	pgd_entry = cpa->pgd + pgd_index(addr);

	/*
	 * Allocate a PUD page and hand it down for mapping.
	 */
	if (pgd_none(*pgd_entry)) {
		pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
		if (!pud)
			return -1;

		set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
		allocd_pgd = true;
	}

	pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
	pgprot_val(pgprot) |=  pgprot_val(cpa->mask_set);

	ret = populate_pud(cpa, addr, pgd_entry, pgprot);
	if (ret < 0) {
		unmap_pud_range(pgd_entry, addr,
				addr + (cpa->numpages << PAGE_SHIFT));

		if (allocd_pgd) {
			/*
			 * If I allocated this PUD page, I can just as well
			 * free it in this error path.
			 */
			pgd_clear(pgd_entry);
			free_page((unsigned long)pud);
		}
		return ret;
	}
	cpa->numpages = ret;
	return 0;
}

static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
			       int primary)
{
	if (cpa->pgd)
		return populate_pgd(cpa, vaddr);

	/*
	 * Ignore all non primary paths.
	 */
@@ -697,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
	else
		address = *cpa->vaddr;
repeat:
	kpte = lookup_address(address, &level);
	kpte = _lookup_address_cpa(cpa, address, &level);
	if (!kpte)
		return __cpa_process_fault(cpa, address, primary);

@@ -761,7 +1154,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
	/*
	 * We have to split the large page:
	 */
	err = split_large_page(kpte, address);
	err = split_large_page(cpa, kpte, address);
	if (!err) {
		/*
	 	 * Do a global flush tlb after splitting the large page
@@ -910,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
	int ret, cache, checkalias;
	unsigned long baddr = 0;

	memset(&cpa, 0, sizeof(cpa));

	/*
	 * Check, if we are requested to change a not supported
	 * feature:
@@ -1356,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages)
{
	unsigned long tempaddr = (unsigned long) page_address(page);
	struct cpa_data cpa = { .vaddr = &tempaddr,
				.pgd = NULL,
				.numpages = numpages,
				.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
				.mask_clr = __pgprot(0),
@@ -1374,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages)
{
	unsigned long tempaddr = (unsigned long) page_address(page);
	struct cpa_data cpa = { .vaddr = &tempaddr,
				.pgd = NULL,
				.numpages = numpages,
				.mask_set = __pgprot(0),
				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
@@ -1434,6 +1831,36 @@ bool kernel_page_present(struct page *page)

#endif /* CONFIG_DEBUG_PAGEALLOC */

int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
			    unsigned numpages, unsigned long page_flags)
{
	int retval = -EINVAL;

	struct cpa_data cpa = {
		.vaddr = &address,
		.pfn = pfn,
		.pgd = pgd,
		.numpages = numpages,
		.mask_set = __pgprot(0),
		.mask_clr = __pgprot(0),
		.flags = 0,
	};

	if (!(__supported_pte_mask & _PAGE_NX))
		goto out;

	if (!(page_flags & _PAGE_NX))
		cpa.mask_clr = __pgprot(_PAGE_NX);

	cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);

	retval = __change_page_attr_set_clr(&cpa, 0);
	__flush_tlb_all();

out:
	return retval;
}

/*
 * The testcases use internal knowledge of the implementation that shouldn't
 * be exposed to the rest of the kernel. Include these directly here.
Loading