Merge tag 'efi-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi into x86/efi (61d06697) · Commits · e / devices / android_kernel_oneplus_sm7250

Documentation/kernel-parameters.txt

+6 −0

Original line number	Diff line number	Diff line
		@@ -890,6 +890,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
		edd= [EDD]
		Format: {"off" \| "on" \| "skip[mbr]"}

		efi= [EFI]
		Format: { "old_map" }
		old_map [X86-64]: switch to the old ioremap-based EFI
		runtime services mapping. 32-bit still uses this one by
		default.

		efi_no_storage_paranoia [EFI; X86]
		Using this parameter you can use more than 50% of
		your efi variable storage. Use this parameter only if

Documentation/x86/x86_64/mm.txt

+7 −0

Original line number	Diff line number	Diff line
		@@ -28,4 +28,11 @@ reference.
		Current X86-64 implementations only support 40 bits of address space,
		but we support up to 46 bits. This expands into MBZ space in the page tables.

		->trampoline_pgd:

		We map EFI runtime services in the aforementioned PGD in the virtual
		range of 64Gb (arbitrarily set, can be raised if needed)

		0xffffffef00000000 - 0xffffffff00000000

		-Andi Kleen, Jul 2004

arch/x86/include/asm/efi.h

+47 −17

Original line number	Diff line number	Diff line
		#ifndef _ASM_X86_EFI_H
		#define _ASM_X86_EFI_H

		/*
		* We map the EFI regions needed for runtime services non-contiguously,
		* with preserved alignment on virtual addresses starting from -4G down
		* for a total max space of 64G. This way, we provide for stable runtime
		* services addresses across kernels so that a kexec'd kernel can still
		* use them.
		*
		* This is the main reason why we're doing stable VA mappings for RT
		* services.
		*
		* This flag is used in conjuction with a chicken bit called
		* "efi=old_map" which can be used as a fallback to the old runtime
		* services mapping method in case there's some b0rkage with a
		* particular EFI implementation (haha, it is hard to hold up the
		* sarcasm here...).
		*/
		#define EFI_OLD_MEMMAP EFI_ARCH_1

		#ifdef CONFIG_X86_32

		#define EFI_LOADER_SIGNATURE "EL32"
		@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
		efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \
		(u64)(a4), (u64)(a5), (u64)(a6))

		#define _efi_call_virtX(x, f, ...) \
		({ \
		efi_status_t __s; \
		\
		efi_sync_low_kernel_mappings(); \
		preempt_disable(); \
		__s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \
		preempt_enable(); \
		__s; \
		})

		#define efi_call_virt0(f) \
		efi_call0((efi.systab->runtime->f))
		_efi_call_virtX(0, f)
		#define efi_call_virt1(f, a1) \
		efi_call1((efi.systab->runtime->f), (u64)(a1))
		_efi_call_virtX(1, f, (u64)(a1))
		#define efi_call_virt2(f, a1, a2) \
		efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2))
		_efi_call_virtX(2, f, (u64)(a1), (u64)(a2))
		#define efi_call_virt3(f, a1, a2, a3) \
		efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		(u64)(a3))
		_efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3))
		#define efi_call_virt4(f, a1, a2, a3, a4) \
		efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		(u64)(a3), (u64)(a4))
		_efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4))
		#define efi_call_virt5(f, a1, a2, a3, a4, a5) \
		efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		(u64)(a3), (u64)(a4), (u64)(a5))
		_efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5))
		#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
		efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
		(u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
		_efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))

		extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
		u32 type, u64 attribute);
		@@ -95,12 +120,17 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,

		extern int add_efi_memmap;
		extern unsigned long x86_efi_facility;
		extern struct efi_scratch efi_scratch;
		extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
		extern int efi_memblock_x86_reserve_range(void);
		extern void efi_call_phys_prelog(void);
		extern void efi_call_phys_epilog(void);
		extern void efi_unmap_memmap(void);
		extern void efi_memory_uc(u64 addr, unsigned long size);
		extern void __init efi_map_region(efi_memory_desc_t *md);
		extern void efi_sync_low_kernel_mappings(void);
		extern void efi_setup_page_tables(void);
		extern void __init old_map_region(efi_memory_desc_t *md);

		#ifdef CONFIG_EFI

arch/x86/include/asm/pgtable_types.h

+2 −1

Original line number	Diff line number	Diff line
		@@ -382,7 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
		*/
		extern pte_t lookup_address(unsigned long address, unsigned int level);
		extern phys_addr_t slow_virt_to_phys(void *__address);

		extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
		unsigned numpages, unsigned long page_flags);
		#endif /* !__ASSEMBLY__ */

		#endif /* _ASM_X86_PGTABLE_DEFS_H */

arch/x86/mm/pageattr.c

+444 −17

Original line number	Diff line number	Diff line
		@@ -30,6 +30,7 @@
		*/
		struct cpa_data {
		unsigned long *vaddr;
		pgd_t *pgd;
		pgprot_t mask_set;
		pgprot_t mask_clr;
		int numpages;
		@@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
		return prot;
		}

		/*
		* Lookup the page table entry for a virtual address. Return a pointer
		* to the entry and the level of the mapping.
		*
		* Note: We return pud and pmd either when the entry is marked large
		* or when the present bit is not set. Otherwise we would return a
		* pointer to a nonexisting mapping.
		*/
		pte_t lookup_address(unsigned long address, unsigned int level)
		static pte_t __lookup_address_in_pgd(pgd_t pgd, unsigned long address,
		unsigned int *level)
		{
		pgd_t *pgd = pgd_offset_k(address);
		pud_t *pud;
		pmd_t *pmd;

		@@ -361,8 +354,31 @@ pte_t lookup_address(unsigned long address, unsigned int level)

		return pte_offset_kernel(pmd, address);
		}

		/*
		* Lookup the page table entry for a virtual address. Return a pointer
		* to the entry and the level of the mapping.
		*
		* Note: We return pud and pmd either when the entry is marked large
		* or when the present bit is not set. Otherwise we would return a
		* pointer to a nonexisting mapping.
		*/
		pte_t lookup_address(unsigned long address, unsigned int level)
		{
		return __lookup_address_in_pgd(pgd_offset_k(address), address, level);
		}
		EXPORT_SYMBOL_GPL(lookup_address);

		static pte_t _lookup_address_cpa(struct cpa_data cpa, unsigned long address,
		unsigned int *level)
		{
		if (cpa->pgd)
		return __lookup_address_in_pgd(cpa->pgd + pgd_index(address),
		address, level);

		return lookup_address(address, level);
		}

		/*
		* This is necessary because __pa() does not work on some
		* kinds of memory, like vmalloc() or the alloc_remap()
		@@ -437,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
		* Check for races, another CPU might have split this page
		* up already:
		*/
		tmp = lookup_address(address, &level);
		tmp = _lookup_address_cpa(cpa, address, &level);
		if (tmp != kpte)
		goto out_unlock;

		@@ -543,7 +559,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
		}

		static int
		__split_large_page(pte_t kpte, unsigned long address, struct page base)
		__split_large_page(struct cpa_data cpa, pte_t kpte, unsigned long address,
		struct page *base)
		{
		pte_t pbase = (pte_t )page_address(base);
		unsigned long pfn, pfninc = 1;
		@@ -556,7 +573,7 @@ __split_large_page(pte_t kpte, unsigned long address, struct page base)
		* Check for races, another CPU might have split this page
		* up for us already:
		*/
		tmp = lookup_address(address, &level);
		tmp = _lookup_address_cpa(cpa, address, &level);
		if (tmp != kpte) {
		spin_unlock(&pgd_lock);
		return 1;
		@@ -632,7 +649,8 @@ __split_large_page(pte_t kpte, unsigned long address, struct page base)
		return 0;
		}

		static int split_large_page(pte_t *kpte, unsigned long address)
		static int split_large_page(struct cpa_data cpa, pte_t kpte,
		unsigned long address)
		{
		struct page *base;

		@@ -644,15 +662,390 @@ static int split_large_page(pte_t *kpte, unsigned long address)
		if (!base)
		return -ENOMEM;

		if (__split_large_page(kpte, address, base))
		if (__split_large_page(cpa, kpte, address, base))
		__free_page(base);

		return 0;
		}

		static bool try_to_free_pte_page(pte_t *pte)
		{
		int i;

		for (i = 0; i < PTRS_PER_PTE; i++)
		if (!pte_none(pte[i]))
		return false;

		free_page((unsigned long)pte);
		return true;
		}

		static bool try_to_free_pmd_page(pmd_t *pmd)
		{
		int i;

		for (i = 0; i < PTRS_PER_PMD; i++)
		if (!pmd_none(pmd[i]))
		return false;

		free_page((unsigned long)pmd);
		return true;
		}

		static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
		{
		pte_t *pte = pte_offset_kernel(pmd, start);

		while (start < end) {
		set_pte(pte, __pte(0));

		start += PAGE_SIZE;
		pte++;
		}

		if (try_to_free_pte_page((pte_t )pmd_page_vaddr(pmd))) {
		pmd_clear(pmd);
		return true;
		}
		return false;
		}

		static void __unmap_pmd_range(pud_t pud, pmd_t pmd,
		unsigned long start, unsigned long end)
		{
		if (unmap_pte_range(pmd, start, end))
		if (try_to_free_pmd_page((pmd_t )pud_page_vaddr(pud)))
		pud_clear(pud);
		}

		static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
		{
		pmd_t *pmd = pmd_offset(pud, start);

		/*
		* Not on a 2MB page boundary?
		*/
		if (start & (PMD_SIZE - 1)) {
		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
		unsigned long pre_end = min_t(unsigned long, end, next_page);

		__unmap_pmd_range(pud, pmd, start, pre_end);

		start = pre_end;
		pmd++;
		}

		/*
		* Try to unmap in 2M chunks.
		*/
		while (end - start >= PMD_SIZE) {
		if (pmd_large(*pmd))
		pmd_clear(pmd);
		else
		__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);

		start += PMD_SIZE;
		pmd++;
		}

		/*
		* 4K leftovers?
		*/
		if (start < end)
		return __unmap_pmd_range(pud, pmd, start, end);

		/*
		* Try again to free the PMD page if haven't succeeded above.
		*/
		if (!pud_none(*pud))
		if (try_to_free_pmd_page((pmd_t )pud_page_vaddr(pud)))
		pud_clear(pud);
		}

		static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
		{
		pud_t *pud = pud_offset(pgd, start);

		/*
		* Not on a GB page boundary?
		*/
		if (start & (PUD_SIZE - 1)) {
		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
		unsigned long pre_end = min_t(unsigned long, end, next_page);

		unmap_pmd_range(pud, start, pre_end);

		start = pre_end;
		pud++;
		}

		/*
		* Try to unmap in 1G chunks?
		*/
		while (end - start >= PUD_SIZE) {

		if (pud_large(*pud))
		pud_clear(pud);
		else
		unmap_pmd_range(pud, start, start + PUD_SIZE);

		start += PUD_SIZE;
		pud++;
		}

		/*
		* 2M leftovers?
		*/
		if (start < end)
		unmap_pmd_range(pud, start, end);

		/*
		* No need to try to free the PUD page because we'll free it in
		* populate_pgd's error path
		*/
		}

		static int alloc_pte_page(pmd_t *pmd)
		{
		pte_t pte = (pte_t )get_zeroed_page(GFP_KERNEL \| __GFP_NOTRACK);
		if (!pte)
		return -1;

		set_pmd(pmd, __pmd(__pa(pte) \| _KERNPG_TABLE));
		return 0;
		}

		static int alloc_pmd_page(pud_t *pud)
		{
		pmd_t pmd = (pmd_t )get_zeroed_page(GFP_KERNEL \| __GFP_NOTRACK);
		if (!pmd)
		return -1;

		set_pud(pud, __pud(__pa(pmd) \| _KERNPG_TABLE));
		return 0;
		}

		static void populate_pte(struct cpa_data *cpa,
		unsigned long start, unsigned long end,
		unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
		{
		pte_t *pte;

		pte = pte_offset_kernel(pmd, start);

		while (num_pages-- && start < end) {

		/* deal with the NX bit */
		if (!(pgprot_val(pgprot) & _PAGE_NX))
		cpa->pfn &= ~_PAGE_NX;

		set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot));

		start += PAGE_SIZE;
		cpa->pfn += PAGE_SIZE;
		pte++;
		}
		}

		static int populate_pmd(struct cpa_data *cpa,
		unsigned long start, unsigned long end,
		unsigned num_pages, pud_t *pud, pgprot_t pgprot)
		{
		unsigned int cur_pages = 0;
		pmd_t *pmd;

		/*
		* Not on a 2M boundary?
		*/
		if (start & (PMD_SIZE - 1)) {
		unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;

		pre_end = min_t(unsigned long, pre_end, next_page);
		cur_pages = (pre_end - start) >> PAGE_SHIFT;
		cur_pages = min_t(unsigned int, num_pages, cur_pages);

		/*
		* Need a PTE page?
		*/
		pmd = pmd_offset(pud, start);
		if (pmd_none(*pmd))
		if (alloc_pte_page(pmd))
		return -1;

		populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);

		start = pre_end;
		}

		/*
		* We mapped them all?
		*/
		if (num_pages == cur_pages)
		return cur_pages;

		while (end - start >= PMD_SIZE) {

		/*
		* We cannot use a 1G page so allocate a PMD page if needed.
		*/
		if (pud_none(*pud))
		if (alloc_pmd_page(pud))
		return -1;

		pmd = pmd_offset(pud, start);

		set_pmd(pmd, __pmd(cpa->pfn \| _PAGE_PSE \| massage_pgprot(pgprot)));

		start += PMD_SIZE;
		cpa->pfn += PMD_SIZE;
		cur_pages += PMD_SIZE >> PAGE_SHIFT;
		}

		/*
		* Map trailing 4K pages.
		*/
		if (start < end) {
		pmd = pmd_offset(pud, start);
		if (pmd_none(*pmd))
		if (alloc_pte_page(pmd))
		return -1;

		populate_pte(cpa, start, end, num_pages - cur_pages,
		pmd, pgprot);
		}
		return num_pages;
		}

		static int populate_pud(struct cpa_data cpa, unsigned long start, pgd_t pgd,
		pgprot_t pgprot)
		{
		pud_t *pud;
		unsigned long end;
		int cur_pages = 0;

		end = start + (cpa->numpages << PAGE_SHIFT);

		/*
		* Not on a Gb page boundary? => map everything up to it with
		* smaller pages.
		*/
		if (start & (PUD_SIZE - 1)) {
		unsigned long pre_end;
		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;

		pre_end = min_t(unsigned long, end, next_page);
		cur_pages = (pre_end - start) >> PAGE_SHIFT;
		cur_pages = min_t(int, (int)cpa->numpages, cur_pages);

		pud = pud_offset(pgd, start);

		/*
		* Need a PMD page?
		*/
		if (pud_none(*pud))
		if (alloc_pmd_page(pud))
		return -1;

		cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
		pud, pgprot);
		if (cur_pages < 0)
		return cur_pages;

		start = pre_end;
		}

		/* We mapped them all? */
		if (cpa->numpages == cur_pages)
		return cur_pages;

		pud = pud_offset(pgd, start);

		/*
		* Map everything starting from the Gb boundary, possibly with 1G pages
		*/
		while (end - start >= PUD_SIZE) {
		set_pud(pud, __pud(cpa->pfn \| _PAGE_PSE \| massage_pgprot(pgprot)));

		start += PUD_SIZE;
		cpa->pfn += PUD_SIZE;
		cur_pages += PUD_SIZE >> PAGE_SHIFT;
		pud++;
		}

		/* Map trailing leftover */
		if (start < end) {
		int tmp;

		pud = pud_offset(pgd, start);
		if (pud_none(*pud))
		if (alloc_pmd_page(pud))
		return -1;

		tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
		pud, pgprot);
		if (tmp < 0)
		return cur_pages;

		cur_pages += tmp;
		}
		return cur_pages;
		}

		/*
		* Restrictions for kernel page table do not necessarily apply when mapping in
		* an alternate PGD.
		*/
		static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
		{
		pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
		bool allocd_pgd = false;
		pgd_t *pgd_entry;
		pud_t pud = NULL; / shut up gcc */
		int ret;

		pgd_entry = cpa->pgd + pgd_index(addr);

		/*
		* Allocate a PUD page and hand it down for mapping.
		*/
		if (pgd_none(*pgd_entry)) {
		pud = (pud_t *)get_zeroed_page(GFP_KERNEL \| __GFP_NOTRACK);
		if (!pud)
		return -1;

		set_pgd(pgd_entry, __pgd(__pa(pud) \| _KERNPG_TABLE));
		allocd_pgd = true;
		}

		pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
		pgprot_val(pgprot) \|= pgprot_val(cpa->mask_set);

		ret = populate_pud(cpa, addr, pgd_entry, pgprot);
		if (ret < 0) {
		unmap_pud_range(pgd_entry, addr,
		addr + (cpa->numpages << PAGE_SHIFT));

		if (allocd_pgd) {
		/*
		* If I allocated this PUD page, I can just as well
		* free it in this error path.
		*/
		pgd_clear(pgd_entry);
		free_page((unsigned long)pud);
		}
		return ret;
		}
		cpa->numpages = ret;
		return 0;
		}

		static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
		int primary)
		{
		if (cpa->pgd)
		return populate_pgd(cpa, vaddr);

		/*
		* Ignore all non primary paths.
		*/
		@@ -697,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
		else
		address = *cpa->vaddr;
		repeat:
		kpte = lookup_address(address, &level);
		kpte = _lookup_address_cpa(cpa, address, &level);
		if (!kpte)
		return __cpa_process_fault(cpa, address, primary);

		@@ -761,7 +1154,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
		/*
		* We have to split the large page:
		*/
		err = split_large_page(kpte, address);
		err = split_large_page(cpa, kpte, address);
		if (!err) {
		/*
		* Do a global flush tlb after splitting the large page
		@@ -910,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
		int ret, cache, checkalias;
		unsigned long baddr = 0;

		memset(&cpa, 0, sizeof(cpa));

		/*
		* Check, if we are requested to change a not supported
		* feature:
		@@ -1356,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages)
		{
		unsigned long tempaddr = (unsigned long) page_address(page);
		struct cpa_data cpa = { .vaddr = &tempaddr,
		.pgd = NULL,
		.numpages = numpages,
		.mask_set = __pgprot(_PAGE_PRESENT \| _PAGE_RW),
		.mask_clr = __pgprot(0),
		@@ -1374,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages)
		{
		unsigned long tempaddr = (unsigned long) page_address(page);
		struct cpa_data cpa = { .vaddr = &tempaddr,
		.pgd = NULL,
		.numpages = numpages,
		.mask_set = __pgprot(0),
		.mask_clr = __pgprot(_PAGE_PRESENT \| _PAGE_RW),
		@@ -1434,6 +1831,36 @@ bool kernel_page_present(struct page *page)

		#endif /* CONFIG_DEBUG_PAGEALLOC */

		int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
		unsigned numpages, unsigned long page_flags)
		{
		int retval = -EINVAL;

		struct cpa_data cpa = {
		.vaddr = &address,
		.pfn = pfn,
		.pgd = pgd,
		.numpages = numpages,
		.mask_set = __pgprot(0),
		.mask_clr = __pgprot(0),
		.flags = 0,
		};

		if (!(__supported_pte_mask & _PAGE_NX))
		goto out;

		if (!(page_flags & _PAGE_NX))
		cpa.mask_clr = __pgprot(_PAGE_NX);

		cpa.mask_set = __pgprot(_PAGE_PRESENT \| page_flags);

		retval = __change_page_attr_set_clr(&cpa, 0);
		__flush_tlb_all();

		out:
		return retval;
		}

		/*
		* The testcases use internal knowledge of the implementation that shouldn't
		* be exposed to the rest of the kernel. Include these directly here.