Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fee7b0d8 authored by Huang Ying's avatar Huang Ying Committed by H. Peter Anvin
Browse files

x86, kexec: x86_64: add kexec jump support for x86_64



Impact: New major feature

This patch add kexec jump support for x86_64. More information about
kexec jump can be found in corresponding x86_32 support patch.

Signed-off-by: default avatarHuang Ying <ying.huang@intel.com>
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parent 53594547
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1431,7 +1431,7 @@ config CRASH_DUMP
config KEXEC_JUMP
	bool "kexec jump (EXPERIMENTAL)"
	depends on EXPERIMENTAL
	depends on KEXEC && HIBERNATION && X86_32
	depends on KEXEC && HIBERNATION
	---help---
	  Jump between original kernel and kexeced kernel and invoke
	  code in physical address mode via KEXEC
+7 −6
Original line number Diff line number Diff line
@@ -9,13 +9,13 @@
# define PAGES_NR		4
#else
# define PA_CONTROL_PAGE	0
# define PA_TABLE_PAGE		1
# define PAGES_NR		2
# define VA_CONTROL_PAGE	1
# define PA_TABLE_PAGE		2
# define PA_SWAP_PAGE		3
# define PAGES_NR		4
#endif

#ifdef CONFIG_X86_32
# define KEXEC_CONTROL_CODE_MAX_SIZE	2048
#endif

#ifndef __ASSEMBLY__

@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
		unsigned int has_pae,
		unsigned int preserve_context);
#else
NORET_TYPE void
unsigned long
relocate_kernel(unsigned long indirection_page,
		unsigned long page_list,
		unsigned long start_address) ATTRIB_NORET;
		unsigned long start_address,
		unsigned int preserve_context);
#endif

#define ARCH_HAS_KIMAGE_ARCH
+38 −4
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/numa.h>
#include <linux/ftrace.h>
#include <linux/io.h>
#include <linux/suspend.h>

#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -270,19 +271,43 @@ void machine_kexec(struct kimage *image)
{
	unsigned long page_list[PAGES_NR];
	void *control_page;
	int save_ftrace_enabled;

	tracer_disable();
#ifdef CONFIG_KEXEC_JUMP
	if (kexec_image->preserve_context)
		save_processor_state();
#endif

	save_ftrace_enabled = __ftrace_enabled_save();

	/* Interrupts aren't acceptable while we reboot */
	local_irq_disable();

	if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
		/*
		 * We need to put APICs in legacy mode so that we can
		 * get timer interrupts in second kernel. kexec/kdump
		 * paths already have calls to disable_IO_APIC() in
		 * one form or other. kexec jump path also need
		 * one.
		 */
		disable_IO_APIC();
#endif
	}

	control_page = page_address(image->control_code_page) + PAGE_SIZE;
	memcpy(control_page, relocate_kernel, PAGE_SIZE);
	memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);

	page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
	page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
	page_list[PA_TABLE_PAGE] =
	  (unsigned long)__pa(page_address(image->control_code_page));

	if (image->type == KEXEC_TYPE_DEFAULT)
		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
						<< PAGE_SHIFT);

	/*
	 * The segment registers are funny things, they have both a
	 * visible and an invisible part.  Whenever the visible part is
@@ -302,8 +327,17 @@ void machine_kexec(struct kimage *image)
	set_idt(phys_to_virt(0), 0);

	/* now call it */
	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
			image->start);
	image->start = relocate_kernel((unsigned long)image->head,
				       (unsigned long)page_list,
				       image->start,
				       image->preserve_context);

#ifdef CONFIG_KEXEC_JUMP
	if (kexec_image->preserve_context)
		restore_processor_state();
#endif

	__ftrace_enabled_restore(save_ftrace_enabled);
}

void arch_crash_save_vmcoreinfo(void)
+144 −33
Original line number Diff line number Diff line
@@ -19,6 +19,24 @@
#define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)

/*
 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
 * ~ control_page + PAGE_SIZE are used as data storage and stack for
 * jumping back
 */
#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))

/* Minimal CPU state */
#define RSP			DATA(0x0)
#define CR0			DATA(0x8)
#define CR3			DATA(0x10)
#define CR4			DATA(0x18)

/* other data */
#define CP_PA_TABLE_PAGE	DATA(0x20)
#define CP_PA_SWAP_PAGE		DATA(0x28)
#define CP_PA_BACKUP_PAGES_MAP	DATA(0x30)

	.text
	.align PAGE_SIZE
	.code64
@@ -28,8 +46,27 @@ relocate_kernel:
	 * %rdi indirection_page
	 * %rsi page_list
	 * %rdx start address
	 * %rcx preserve_context
	 */

	/* Save the CPU context, used for jumping back */
	pushq %rbx
	pushq %rbp
	pushq %r12
	pushq %r13
	pushq %r14
	pushq %r15
	pushf

	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11
	movq	%rsp, RSP(%r11)
	movq	%cr0, %rax
	movq	%rax, CR0(%r11)
	movq	%cr3, %rax
	movq	%rax, CR3(%r11)
	movq	%cr4, %rax
	movq	%rax, CR4(%r11)

	/* zero out flags, and disable interrupts */
	pushq $0
	popfq
@@ -41,10 +78,18 @@ relocate_kernel:
	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8

	/* get physical address of page table now too */
	movq	PTR(PA_TABLE_PAGE)(%rsi), %rcx
	movq	PTR(PA_TABLE_PAGE)(%rsi), %r9

	/* get physical address of swap page now */
	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10

	/* save some information for jumping back */
	movq	%r9, CP_PA_TABLE_PAGE(%r11)
	movq	%r10, CP_PA_SWAP_PAGE(%r11)
	movq	%rdi, CP_PA_BACKUP_PAGES_MAP(%r11)

	/* Switch to the identity mapped page tables */
	movq	%rcx, %cr3
	movq	%r9, %cr3

	/* setup a new stack at the end of the physical control page */
	lea	PAGE_SIZE(%r8), %rsp
@@ -83,9 +128,87 @@ identity_mapped:
1:

	/* Flush the TLB (needed?) */
	movq	%rcx, %cr3
	movq	%r9, %cr3

	movq	%rcx, %r11
	call	swap_pages

	/*
	 * To be certain of avoiding problems with self-modifying code
	 * I need to execute a serializing instruction here.
	 * So I flush the TLB by reloading %cr3 here, it's handy,
	 * and not processor dependent.
	 */
	movq	%cr3, %rax
	movq	%rax, %cr3

	/*
	 * set all of the registers to known values
	 * leave %rsp alone
	 */

	testq	%r11, %r11
	jnz 1f
	xorq	%rax, %rax
	xorq	%rbx, %rbx
	xorq    %rcx, %rcx
	xorq    %rdx, %rdx
	xorq    %rsi, %rsi
	xorq    %rdi, %rdi
	xorq    %rbp, %rbp
	xorq	%r8,  %r8
	xorq	%r9,  %r9
	xorq	%r10, %r9
	xorq	%r11, %r11
	xorq	%r12, %r12
	xorq	%r13, %r13
	xorq	%r14, %r14
	xorq	%r15, %r15

	ret

1:
	popq	%rdx
	leaq	PAGE_SIZE(%r10), %rsp
	call	*%rdx

	/* get the re-entry point of the peer system */
	movq	0(%rsp), %rbp
	call	1f
1:
	popq	%r8
	subq	$(1b - relocate_kernel), %r8
	movq	CP_PA_SWAP_PAGE(%r8), %r10
	movq	CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
	movq	CP_PA_TABLE_PAGE(%r8), %rax
	movq	%rax, %cr3
	lea	PAGE_SIZE(%r8), %rsp
	call	swap_pages
	movq	$virtual_mapped, %rax
	pushq	%rax
	ret

virtual_mapped:
	movq	RSP(%r8), %rsp
	movq	CR4(%r8), %rax
	movq	%rax, %cr4
	movq	CR3(%r8), %rax
	movq	CR0(%r8), %r8
	movq	%rax, %cr3
	movq	%r8, %cr0
	movq	%rbp, %rax

	popf
	popq	%r15
	popq	%r14
	popq	%r13
	popq	%r12
	popq	%rbp
	popq	%rbx
	ret

	/* Do the copies */
swap_pages:
	movq	%rdi, %rcx 	/* Put the page_list in %rcx */
	xorq	%rdi, %rdi
	xorq	%rsi, %rsi
@@ -117,39 +240,27 @@ identity_mapped:
	movq	%rcx,   %rsi  /* For ever source page do a copy */
	andq	$0xfffffffffffff000, %rsi

	movq	%rdi, %rdx
	movq	%rsi, %rax

	movq	%r10, %rdi
	movq	$512,   %rcx
	rep ; movsq
	jmp	0b
3:

	/*
	 * To be certain of avoiding problems with self-modifying code
	 * I need to execute a serializing instruction here.
	 * So I flush the TLB by reloading %cr3 here, it's handy,
	 * and not processor dependent.
	 */
	movq	%cr3, %rax
	movq	%rax, %cr3

	/*
	 * set all of the registers to known values
	 * leave %rsp alone
	 */
	movq	%rax, %rdi
	movq	%rdx, %rsi
	movq	$512,   %rcx
	rep ; movsq

	xorq	%rax, %rax
	xorq	%rbx, %rbx
	xorq    %rcx, %rcx
	xorq    %rdx, %rdx
	xorq    %rsi, %rsi
	xorq    %rdi, %rdi
	xorq    %rbp, %rbp
	xorq	%r8,  %r8
	xorq	%r9,  %r9
	xorq	%r10, %r9
	xorq	%r11, %r11
	xorq	%r12, %r12
	xorq	%r13, %r13
	xorq	%r14, %r14
	xorq	%r15, %r15
	movq	%rdx, %rdi
	movq	%r10, %rsi
	movq	$512,   %rcx
	rep ; movsq

	lea	PAGE_SIZE(%rax), %rsi
	jmp	0b
3:
	ret

	.globl kexec_control_code_size
.set kexec_control_code_size, . - relocate_kernel
+7 −0
Original line number Diff line number Diff line
@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
ASSERT((per_cpu__irq_stack_union == 0),
        "irq_stack_union is not at start of per-cpu area");
#endif

#ifdef CONFIG_KEXEC
#include <asm/kexec.h>

ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
       "kexec control code size is too big")
#endif