Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f5deb796 authored by Huang Ying's avatar Huang Ying Committed by H. Peter Anvin
Browse files

x86: kexec: Use one page table in x86_64 machine_kexec



Impact: reduce kernel BSS size by 7 pages, improve code readability

Two page tables are used in current x86_64 kexec implementation. One
is used to jump from kernel virtual address to identity map address,
the other is used to map all physical memory. In fact, on x86_64,
there is no conflict between kernel virtual address space and physical
memory space, so just one page table is sufficient. The page table
pages used to map control page are dynamically allocated to save
memory if kexec image is not loaded. ASM code used to map control page
is replaced by C code too.

Signed-off-by: default avatarHuang Ying <ying.huang@intel.com>
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parent c415b3dc
Loading
Loading
Loading
Loading
+9 −18
Original line number Original line Diff line number Diff line
@@ -9,23 +9,8 @@
# define PAGES_NR		4
# define PAGES_NR		4
#else
#else
# define PA_CONTROL_PAGE	0
# define PA_CONTROL_PAGE	0
# define VA_CONTROL_PAGE	1
# define PA_TABLE_PAGE		1
# define PA_PGD			2
# define PAGES_NR		2
# define VA_PGD			3
# define PA_PUD_0		4
# define VA_PUD_0		5
# define PA_PMD_0		6
# define VA_PMD_0		7
# define PA_PTE_0		8
# define VA_PTE_0		9
# define PA_PUD_1		10
# define VA_PUD_1		11
# define PA_PMD_1		12
# define VA_PMD_1		13
# define PA_PTE_1		14
# define VA_PTE_1		15
# define PA_TABLE_PAGE		16
# define PAGES_NR		17
#endif
#endif


#ifdef CONFIG_X86_32
#ifdef CONFIG_X86_32
@@ -157,9 +142,9 @@ relocate_kernel(unsigned long indirection_page,
		unsigned long start_address) ATTRIB_NORET;
		unsigned long start_address) ATTRIB_NORET;
#endif
#endif


#ifdef CONFIG_X86_32
#define ARCH_HAS_KIMAGE_ARCH
#define ARCH_HAS_KIMAGE_ARCH


#ifdef CONFIG_X86_32
struct kimage_arch {
struct kimage_arch {
	pgd_t *pgd;
	pgd_t *pgd;
#ifdef CONFIG_X86_PAE
#ifdef CONFIG_X86_PAE
@@ -169,6 +154,12 @@ struct kimage_arch {
	pte_t *pte0;
	pte_t *pte0;
	pte_t *pte1;
	pte_t *pte1;
};
};
#else
struct kimage_arch {
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
};
#endif
#endif


#endif /* __ASSEMBLY__ */
#endif /* __ASSEMBLY__ */
+55 −27
Original line number Original line Diff line number Diff line
@@ -18,15 +18,6 @@
#include <asm/mmu_context.h>
#include <asm/mmu_context.h>
#include <asm/io.h>
#include <asm/io.h>


#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u64 kexec_pgd[512] PAGE_ALIGNED;
static u64 kexec_pud0[512] PAGE_ALIGNED;
static u64 kexec_pmd0[512] PAGE_ALIGNED;
static u64 kexec_pte0[512] PAGE_ALIGNED;
static u64 kexec_pud1[512] PAGE_ALIGNED;
static u64 kexec_pmd1[512] PAGE_ALIGNED;
static u64 kexec_pte1[512] PAGE_ALIGNED;

static void init_level2_page(pmd_t *level2p, unsigned long addr)
static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
{
	unsigned long end_addr;
	unsigned long end_addr;
@@ -107,12 +98,65 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
	return result;
	return result;
}
}


static void free_transition_pgtable(struct kimage *image)
{
	free_page((unsigned long)image->arch.pud);
	free_page((unsigned long)image->arch.pmd);
	free_page((unsigned long)image->arch.pte);
}

static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
{
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	unsigned long vaddr, paddr;
	int result = -ENOMEM;

	vaddr = (unsigned long)relocate_kernel;
	paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
	pgd += pgd_index(vaddr);
	if (!pgd_present(*pgd)) {
		pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
		if (!pud)
			goto err;
		image->arch.pud = pud;
		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
	}
	pud = pud_offset(pgd, vaddr);
	if (!pud_present(*pud)) {
		pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
		if (!pmd)
			goto err;
		image->arch.pmd = pmd;
		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
	}
	pmd = pmd_offset(pud, vaddr);
	if (!pmd_present(*pmd)) {
		pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
		if (!pte)
			goto err;
		image->arch.pte = pte;
		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
	}
	pte = pte_offset_kernel(pmd, vaddr);
	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
	return 0;
err:
	free_transition_pgtable(image);
	return result;
}



static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
{
{
	pgd_t *level4p;
	pgd_t *level4p;
	int result;
	level4p = (pgd_t *)__va(start_pgtable);
	level4p = (pgd_t *)__va(start_pgtable);
	return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
	result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
	if (result)
		return result;
	return init_transition_pgtable(image, level4p);
}
}


static void set_idt(void *newidt, u16 limit)
static void set_idt(void *newidt, u16 limit)
@@ -174,7 +218,7 @@ int machine_kexec_prepare(struct kimage *image)


void machine_kexec_cleanup(struct kimage *image)
void machine_kexec_cleanup(struct kimage *image)
{
{
	return;
	free_transition_pgtable(image);
}
}


/*
/*
@@ -195,22 +239,6 @@ void machine_kexec(struct kimage *image)
	memcpy(control_page, relocate_kernel, PAGE_SIZE);
	memcpy(control_page, relocate_kernel, PAGE_SIZE);


	page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
	page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
	page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
	page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
	page_list[VA_PGD] = (unsigned long)kexec_pgd;
	page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
	page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
	page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
	page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
	page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
	page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
	page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
	page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
	page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
	page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
	page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
	page_list[VA_PTE_1] = (unsigned long)kexec_pte1;

	page_list[PA_TABLE_PAGE] =
	page_list[PA_TABLE_PAGE] =
	  (unsigned long)__pa(page_address(image->control_code_page));
	  (unsigned long)__pa(page_address(image->control_code_page));


+3 −122
Original line number Original line Diff line number Diff line
@@ -29,122 +29,6 @@ relocate_kernel:
	 * %rdx start address
	 * %rdx start address
	 */
	 */


	/* map the control page at its virtual address */

	movq	$0x0000ff8000000000, %r10        /* mask */
	mov	$(39 - 3), %cl                   /* bits to shift */
	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */

	movq	%r11, %r9
	andq	%r10, %r9
	shrq	%cl, %r9

	movq	PTR(VA_PGD)(%rsi), %r8
	addq	%r8, %r9
	movq	PTR(PA_PUD_0)(%rsi), %r8
	orq	$PAGE_ATTR, %r8
	movq	%r8, (%r9)

	shrq	$9, %r10
	sub	$9, %cl

	movq	%r11, %r9
	andq	%r10, %r9
	shrq	%cl, %r9

	movq	PTR(VA_PUD_0)(%rsi), %r8
	addq	%r8, %r9
	movq	PTR(PA_PMD_0)(%rsi), %r8
	orq	$PAGE_ATTR, %r8
	movq	%r8, (%r9)

	shrq	$9, %r10
	sub	$9, %cl

	movq	%r11, %r9
	andq	%r10, %r9
	shrq	%cl, %r9

	movq	PTR(VA_PMD_0)(%rsi), %r8
	addq	%r8, %r9
	movq	PTR(PA_PTE_0)(%rsi), %r8
	orq	$PAGE_ATTR, %r8
	movq	%r8, (%r9)

	shrq	$9, %r10
	sub	$9, %cl

	movq	%r11, %r9
	andq	%r10, %r9
	shrq	%cl, %r9

	movq	PTR(VA_PTE_0)(%rsi), %r8
	addq	%r8, %r9
	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
	orq	$PAGE_ATTR, %r8
	movq	%r8, (%r9)

	/* identity map the control page at its physical address */

	movq	$0x0000ff8000000000, %r10        /* mask */
	mov	$(39 - 3), %cl                   /* bits to shift */
	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */

	movq	%r11, %r9
	andq	%r10, %r9
	shrq	%cl, %r9

	movq	PTR(VA_PGD)(%rsi), %r8
	addq	%r8, %r9
	movq	PTR(PA_PUD_1)(%rsi), %r8
	orq	$PAGE_ATTR, %r8
	movq	%r8, (%r9)

	shrq	$9, %r10
	sub	$9, %cl

	movq	%r11, %r9
	andq	%r10, %r9
	shrq	%cl, %r9

	movq	PTR(VA_PUD_1)(%rsi), %r8
	addq	%r8, %r9
	movq	PTR(PA_PMD_1)(%rsi), %r8
	orq	$PAGE_ATTR, %r8
	movq	%r8, (%r9)

	shrq	$9, %r10
	sub	$9, %cl

	movq	%r11, %r9
	andq	%r10, %r9
	shrq	%cl, %r9

	movq	PTR(VA_PMD_1)(%rsi), %r8
	addq	%r8, %r9
	movq	PTR(PA_PTE_1)(%rsi), %r8
	orq	$PAGE_ATTR, %r8
	movq	%r8, (%r9)

	shrq	$9, %r10
	sub	$9, %cl

	movq	%r11, %r9
	andq	%r10, %r9
	shrq	%cl, %r9

	movq	PTR(VA_PTE_1)(%rsi), %r8
	addq	%r8, %r9
	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
	orq	$PAGE_ATTR, %r8
	movq	%r8, (%r9)

relocate_new_kernel:
	/* %rdi indirection_page
	 * %rsi page_list
	 * %rdx start address
	 */

	/* zero out flags, and disable interrupts */
	/* zero out flags, and disable interrupts */
	pushq $0
	pushq $0
	popfq
	popfq
@@ -156,9 +40,8 @@ relocate_new_kernel:
	/* get physical address of page table now too */
	/* get physical address of page table now too */
	movq	PTR(PA_TABLE_PAGE)(%rsi), %rcx
	movq	PTR(PA_TABLE_PAGE)(%rsi), %rcx


	/* switch to new set of page tables */
	/* Switch to the identity mapped page tables */
	movq	PTR(PA_PGD)(%rsi), %r9
	movq	%rcx, %cr3
	movq	%r9, %cr3


	/* setup a new stack at the end of the physical control page */
	/* setup a new stack at the end of the physical control page */
	lea	PAGE_SIZE(%r8), %rsp
	lea	PAGE_SIZE(%r8), %rsp
@@ -194,9 +77,7 @@ identity_mapped:
	jmp 1f
	jmp 1f
1:
1:


	/* Switch to the identity mapped page tables,
	/* Flush the TLB (needed?) */
	 * and flush the TLB.
	*/
	movq	%rcx, %cr3
	movq	%rcx, %cr3


	/* Do the copies */
	/* Do the copies */