Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fee7b0d8 authored by Huang Ying's avatar Huang Ying Committed by H. Peter Anvin
Browse files

x86, kexec: x86_64: add kexec jump support for x86_64



Impact: New major feature

This patch add kexec jump support for x86_64. More information about
kexec jump can be found in corresponding x86_32 support patch.

Signed-off-by: default avatarHuang Ying <ying.huang@intel.com>
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parent 53594547
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -1431,7 +1431,7 @@ config CRASH_DUMP
config KEXEC_JUMP
config KEXEC_JUMP
	bool "kexec jump (EXPERIMENTAL)"
	bool "kexec jump (EXPERIMENTAL)"
	depends on EXPERIMENTAL
	depends on EXPERIMENTAL
	depends on KEXEC && HIBERNATION && X86_32
	depends on KEXEC && HIBERNATION
	---help---
	---help---
	  Jump between original kernel and kexeced kernel and invoke
	  Jump between original kernel and kexeced kernel and invoke
	  code in physical address mode via KEXEC
	  code in physical address mode via KEXEC
+7 −6
Original line number Original line Diff line number Diff line
@@ -9,13 +9,13 @@
# define PAGES_NR		4
# define PAGES_NR		4
#else
#else
# define PA_CONTROL_PAGE	0
# define PA_CONTROL_PAGE	0
# define PA_TABLE_PAGE		1
# define VA_CONTROL_PAGE	1
# define PAGES_NR		2
# define PA_TABLE_PAGE		2
# define PA_SWAP_PAGE		3
# define PAGES_NR		4
#endif
#endif


#ifdef CONFIG_X86_32
# define KEXEC_CONTROL_CODE_MAX_SIZE	2048
# define KEXEC_CONTROL_CODE_MAX_SIZE	2048
#endif


#ifndef __ASSEMBLY__
#ifndef __ASSEMBLY__


@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
		unsigned int has_pae,
		unsigned int has_pae,
		unsigned int preserve_context);
		unsigned int preserve_context);
#else
#else
NORET_TYPE void
unsigned long
relocate_kernel(unsigned long indirection_page,
relocate_kernel(unsigned long indirection_page,
		unsigned long page_list,
		unsigned long page_list,
		unsigned long start_address) ATTRIB_NORET;
		unsigned long start_address,
		unsigned int preserve_context);
#endif
#endif


#define ARCH_HAS_KIMAGE_ARCH
#define ARCH_HAS_KIMAGE_ARCH
+38 −4
Original line number Original line Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/numa.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
#include <linux/ftrace.h>
#include <linux/io.h>
#include <linux/io.h>
#include <linux/suspend.h>


#include <asm/pgtable.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/tlbflush.h>
@@ -270,19 +271,43 @@ void machine_kexec(struct kimage *image)
{
{
	unsigned long page_list[PAGES_NR];
	unsigned long page_list[PAGES_NR];
	void *control_page;
	void *control_page;
	int save_ftrace_enabled;


	tracer_disable();
#ifdef CONFIG_KEXEC_JUMP
	if (kexec_image->preserve_context)
		save_processor_state();
#endif

	save_ftrace_enabled = __ftrace_enabled_save();


	/* Interrupts aren't acceptable while we reboot */
	/* Interrupts aren't acceptable while we reboot */
	local_irq_disable();
	local_irq_disable();


	if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
		/*
		 * We need to put APICs in legacy mode so that we can
		 * get timer interrupts in second kernel. kexec/kdump
		 * paths already have calls to disable_IO_APIC() in
		 * one form or other. kexec jump path also need
		 * one.
		 */
		disable_IO_APIC();
#endif
	}

	control_page = page_address(image->control_code_page) + PAGE_SIZE;
	control_page = page_address(image->control_code_page) + PAGE_SIZE;
	memcpy(control_page, relocate_kernel, PAGE_SIZE);
	memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);


	page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
	page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
	page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
	page_list[PA_TABLE_PAGE] =
	page_list[PA_TABLE_PAGE] =
	  (unsigned long)__pa(page_address(image->control_code_page));
	  (unsigned long)__pa(page_address(image->control_code_page));


	if (image->type == KEXEC_TYPE_DEFAULT)
		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
						<< PAGE_SHIFT);

	/*
	/*
	 * The segment registers are funny things, they have both a
	 * The segment registers are funny things, they have both a
	 * visible and an invisible part.  Whenever the visible part is
	 * visible and an invisible part.  Whenever the visible part is
@@ -302,8 +327,17 @@ void machine_kexec(struct kimage *image)
	set_idt(phys_to_virt(0), 0);
	set_idt(phys_to_virt(0), 0);


	/* now call it */
	/* now call it */
	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
	image->start = relocate_kernel((unsigned long)image->head,
			image->start);
				       (unsigned long)page_list,
				       image->start,
				       image->preserve_context);

#ifdef CONFIG_KEXEC_JUMP
	if (kexec_image->preserve_context)
		restore_processor_state();
#endif

	__ftrace_enabled_restore(save_ftrace_enabled);
}
}


void arch_crash_save_vmcoreinfo(void)
void arch_crash_save_vmcoreinfo(void)
+144 −33
Original line number Original line Diff line number Diff line
@@ -19,6 +19,24 @@
#define PTR(x) (x << 3)
#define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)


/*
 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
 * ~ control_page + PAGE_SIZE are used as data storage and stack for
 * jumping back
 */
#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))

/* Minimal CPU state */
#define RSP			DATA(0x0)
#define CR0			DATA(0x8)
#define CR3			DATA(0x10)
#define CR4			DATA(0x18)

/* other data */
#define CP_PA_TABLE_PAGE	DATA(0x20)
#define CP_PA_SWAP_PAGE		DATA(0x28)
#define CP_PA_BACKUP_PAGES_MAP	DATA(0x30)

	.text
	.text
	.align PAGE_SIZE
	.align PAGE_SIZE
	.code64
	.code64
@@ -28,8 +46,27 @@ relocate_kernel:
	 * %rdi indirection_page
	 * %rdi indirection_page
	 * %rsi page_list
	 * %rsi page_list
	 * %rdx start address
	 * %rdx start address
	 * %rcx preserve_context
	 */
	 */


	/* Save the CPU context, used for jumping back */
	pushq %rbx
	pushq %rbp
	pushq %r12
	pushq %r13
	pushq %r14
	pushq %r15
	pushf

	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11
	movq	%rsp, RSP(%r11)
	movq	%cr0, %rax
	movq	%rax, CR0(%r11)
	movq	%cr3, %rax
	movq	%rax, CR3(%r11)
	movq	%cr4, %rax
	movq	%rax, CR4(%r11)

	/* zero out flags, and disable interrupts */
	/* zero out flags, and disable interrupts */
	pushq $0
	pushq $0
	popfq
	popfq
@@ -41,10 +78,18 @@ relocate_kernel:
	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8


	/* get physical address of page table now too */
	/* get physical address of page table now too */
	movq	PTR(PA_TABLE_PAGE)(%rsi), %rcx
	movq	PTR(PA_TABLE_PAGE)(%rsi), %r9

	/* get physical address of swap page now */
	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10

	/* save some information for jumping back */
	movq	%r9, CP_PA_TABLE_PAGE(%r11)
	movq	%r10, CP_PA_SWAP_PAGE(%r11)
	movq	%rdi, CP_PA_BACKUP_PAGES_MAP(%r11)


	/* Switch to the identity mapped page tables */
	/* Switch to the identity mapped page tables */
	movq	%rcx, %cr3
	movq	%r9, %cr3


	/* setup a new stack at the end of the physical control page */
	/* setup a new stack at the end of the physical control page */
	lea	PAGE_SIZE(%r8), %rsp
	lea	PAGE_SIZE(%r8), %rsp
@@ -83,9 +128,87 @@ identity_mapped:
1:
1:


	/* Flush the TLB (needed?) */
	/* Flush the TLB (needed?) */
	movq	%rcx, %cr3
	movq	%r9, %cr3

	movq	%rcx, %r11
	call	swap_pages

	/*
	 * To be certain of avoiding problems with self-modifying code
	 * I need to execute a serializing instruction here.
	 * So I flush the TLB by reloading %cr3 here, it's handy,
	 * and not processor dependent.
	 */
	movq	%cr3, %rax
	movq	%rax, %cr3

	/*
	 * set all of the registers to known values
	 * leave %rsp alone
	 */

	testq	%r11, %r11
	jnz 1f
	xorq	%rax, %rax
	xorq	%rbx, %rbx
	xorq    %rcx, %rcx
	xorq    %rdx, %rdx
	xorq    %rsi, %rsi
	xorq    %rdi, %rdi
	xorq    %rbp, %rbp
	xorq	%r8,  %r8
	xorq	%r9,  %r9
	xorq	%r10, %r9
	xorq	%r11, %r11
	xorq	%r12, %r12
	xorq	%r13, %r13
	xorq	%r14, %r14
	xorq	%r15, %r15

	ret

1:
	popq	%rdx
	leaq	PAGE_SIZE(%r10), %rsp
	call	*%rdx

	/* get the re-entry point of the peer system */
	movq	0(%rsp), %rbp
	call	1f
1:
	popq	%r8
	subq	$(1b - relocate_kernel), %r8
	movq	CP_PA_SWAP_PAGE(%r8), %r10
	movq	CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
	movq	CP_PA_TABLE_PAGE(%r8), %rax
	movq	%rax, %cr3
	lea	PAGE_SIZE(%r8), %rsp
	call	swap_pages
	movq	$virtual_mapped, %rax
	pushq	%rax
	ret

virtual_mapped:
	movq	RSP(%r8), %rsp
	movq	CR4(%r8), %rax
	movq	%rax, %cr4
	movq	CR3(%r8), %rax
	movq	CR0(%r8), %r8
	movq	%rax, %cr3
	movq	%r8, %cr0
	movq	%rbp, %rax

	popf
	popq	%r15
	popq	%r14
	popq	%r13
	popq	%r12
	popq	%rbp
	popq	%rbx
	ret


	/* Do the copies */
	/* Do the copies */
swap_pages:
	movq	%rdi, %rcx 	/* Put the page_list in %rcx */
	movq	%rdi, %rcx 	/* Put the page_list in %rcx */
	xorq	%rdi, %rdi
	xorq	%rdi, %rdi
	xorq	%rsi, %rsi
	xorq	%rsi, %rsi
@@ -117,39 +240,27 @@ identity_mapped:
	movq	%rcx,   %rsi  /* For ever source page do a copy */
	movq	%rcx,   %rsi  /* For ever source page do a copy */
	andq	$0xfffffffffffff000, %rsi
	andq	$0xfffffffffffff000, %rsi


	movq	%rdi, %rdx
	movq	%rsi, %rax

	movq	%r10, %rdi
	movq	$512,   %rcx
	movq	$512,   %rcx
	rep ; movsq
	rep ; movsq
	jmp	0b
3:


	/*
	movq	%rax, %rdi
	 * To be certain of avoiding problems with self-modifying code
	movq	%rdx, %rsi
	 * I need to execute a serializing instruction here.
	movq	$512,   %rcx
	 * So I flush the TLB by reloading %cr3 here, it's handy,
	rep ; movsq
	 * and not processor dependent.
	 */
	movq	%cr3, %rax
	movq	%rax, %cr3

	/*
	 * set all of the registers to known values
	 * leave %rsp alone
	 */


	xorq	%rax, %rax
	movq	%rdx, %rdi
	xorq	%rbx, %rbx
	movq	%r10, %rsi
	xorq    %rcx, %rcx
	movq	$512,   %rcx
	xorq    %rdx, %rdx
	rep ; movsq
	xorq    %rsi, %rsi
	xorq    %rdi, %rdi
	xorq    %rbp, %rbp
	xorq	%r8,  %r8
	xorq	%r9,  %r9
	xorq	%r10, %r9
	xorq	%r11, %r11
	xorq	%r12, %r12
	xorq	%r13, %r13
	xorq	%r14, %r14
	xorq	%r15, %r15


	lea	PAGE_SIZE(%rax), %rsi
	jmp	0b
3:
	ret
	ret

	.globl kexec_control_code_size
.set kexec_control_code_size, . - relocate_kernel
+7 −0
Original line number Original line Diff line number Diff line
@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
ASSERT((per_cpu__irq_stack_union == 0),
ASSERT((per_cpu__irq_stack_union == 0),
        "irq_stack_union is not at start of per-cpu area");
        "irq_stack_union is not at start of per-cpu area");
#endif
#endif

#ifdef CONFIG_KEXEC
#include <asm/kexec.h>

ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
       "kexec control code size is too big")
#endif