Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 64a48099 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'WIP.x86-pti.entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 syscall entry code changes for PTI from Ingo Molnar:
 "The main changes here are Andy Lutomirski's changes to switch the
  x86-64 entry code to use the 'per CPU entry trampoline stack'. This,
  besides helping fix KASLR leaks (the pending Page Table Isolation
  (PTI) work), also robustifies the x86 entry code"

* 'WIP.x86-pti.entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits)
  x86/cpufeatures: Make CPU bugs sticky
  x86/paravirt: Provide a way to check for hypervisors
  x86/paravirt: Dont patch flush_tlb_single
  x86/entry/64: Make cpu_entry_area.tss read-only
  x86/entry: Clean up the SYSENTER_stack code
  x86/entry/64: Remove the SYSENTER stack canary
  x86/entry/64: Move the IST stacks into struct cpu_entry_area
  x86/entry/64: Create a per-CPU SYSCALL entry trampoline
  x86/entry/64: Return to userspace from the trampoline stack
  x86/entry/64: Use a per-CPU trampoline stack for IDT entries
  x86/espfix/64: Stop assuming that pt_regs is on the entry stack
  x86/entry/64: Separate cpu_current_top_of_stack from TSS.sp0
  x86/entry: Remap the TSS into the CPU entry area
  x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct
  x86/dumpstack: Handle stack overflow on all stacks
  x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss
  x86/kasan/64: Teach KASAN about the cpu_entry_area
  x86/mm/fixmap: Generalize the GDT fixmap mechanism, introduce struct cpu_entry_area
  x86/entry/gdt: Put per-CPU GDT remaps in ascending order
  x86/dumpstack: Add get_stack_info() support for the SYSENTER stack
  ...
parents 1291a0d5 6cbd2171
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -941,7 +941,8 @@ ENTRY(debug)
	movl	%esp, %eax			# pt_regs pointer

	/* Are we currently on the SYSENTER stack? */
	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
	movl	PER_CPU_VAR(cpu_entry_area), %ecx
	addl	$CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
	cmpl	$SIZEOF_SYSENTER_stack, %ecx
	jb	.Ldebug_from_sysenter_stack
@@ -984,7 +985,8 @@ ENTRY(nmi)
	movl	%esp, %eax			# pt_regs pointer

	/* Are we currently on the SYSENTER stack? */
	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
	movl	PER_CPU_VAR(cpu_entry_area), %ecx
	addl	$CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
	cmpl	$SIZEOF_SYSENTER_stack, %ecx
	jb	.Lnmi_from_sysenter_stack
+164 −25
Original line number Diff line number Diff line
@@ -140,6 +140,64 @@ END(native_usergs_sysret64)
 * with them due to bugs in both AMD and Intel CPUs.
 */

	.pushsection .entry_trampoline, "ax"

/*
 * The code in here gets remapped into cpu_entry_area's trampoline.  This means
 * that the assembler and linker have the wrong idea as to where this code
 * lives (and, in fact, it's mapped more than once, so it's not even at a
 * fixed address).  So we can't reference any symbols outside the entry
 * trampoline and expect it to work.
 *
 * Instead, we carefully abuse %rip-relative addressing.
 * _entry_trampoline(%rip) refers to the start of the remapped) entry
 * trampoline.  We can thus find cpu_entry_area with this macro:
 */

#define CPU_ENTRY_AREA \
	_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)

/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
#define RSP_SCRATCH	CPU_ENTRY_AREA_SYSENTER_stack + \
			SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA

ENTRY(entry_SYSCALL_64_trampoline)
	UNWIND_HINT_EMPTY
	swapgs

	/* Stash the user RSP. */
	movq	%rsp, RSP_SCRATCH

	/* Load the top of the task stack into RSP */
	movq	CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp

	/* Start building the simulated IRET frame. */
	pushq	$__USER_DS			/* pt_regs->ss */
	pushq	RSP_SCRATCH			/* pt_regs->sp */
	pushq	%r11				/* pt_regs->flags */
	pushq	$__USER_CS			/* pt_regs->cs */
	pushq	%rcx				/* pt_regs->ip */

	/*
	 * x86 lacks a near absolute jump, and we can't jump to the real
	 * entry text with a relative jump.  We could push the target
	 * address and then use retq, but this destroys the pipeline on
	 * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
	 * spill RDI and restore it in a second-stage trampoline.
	 */
	pushq	%rdi
	movq	$entry_SYSCALL_64_stage2, %rdi
	jmp	*%rdi
END(entry_SYSCALL_64_trampoline)

	.popsection

ENTRY(entry_SYSCALL_64_stage2)
	UNWIND_HINT_EMPTY
	popq	%rdi
	jmp	entry_SYSCALL_64_after_hwframe
END(entry_SYSCALL_64_stage2)

ENTRY(entry_SYSCALL_64)
	UNWIND_HINT_EMPTY
	/*
@@ -330,8 +388,24 @@ syscall_return_via_sysret:
	popq	%rsi	/* skip rcx */
	popq	%rdx
	popq	%rsi

	/*
	 * Now all regs are restored except RSP and RDI.
	 * Save old stack pointer and switch to trampoline stack.
	 */
	movq	%rsp, %rdi
	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp

	pushq	RSP-RDI(%rdi)	/* RSP */
	pushq	(%rdi)		/* RDI */

	/*
	 * We are on the trampoline stack.  All regs except RDI are live.
	 * We can do future final exit work right here.
	 */

	popq	%rdi
	movq	RSP-ORIG_RAX(%rsp), %rsp
	popq	%rsp
	USERGS_SYSRET64
END(entry_SYSCALL_64)

@@ -466,12 +540,13 @@ END(irq_entries_start)

.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
	pushfq
	testl $X86_EFLAGS_IF, (%rsp)
	pushq %rax
	SAVE_FLAGS(CLBR_RAX)
	testl $X86_EFLAGS_IF, %eax
	jz .Lokay_\@
	ud2
.Lokay_\@:
	addq $8, %rsp
	popq %rax
#endif
.endm

@@ -563,6 +638,13 @@ END(irq_entries_start)
/* 0(%rsp): ~(interrupt number) */
	.macro interrupt func
	cld

	testb	$3, CS-ORIG_RAX(%rsp)
	jz	1f
	SWAPGS
	call	switch_to_thread_stack
1:

	ALLOC_PT_GPREGS_ON_STACK
	SAVE_C_REGS
	SAVE_EXTRA_REGS
@@ -572,12 +654,8 @@ END(irq_entries_start)
	jz	1f

	/*
	 * IRQ from user mode.  Switch to kernel gsbase and inform context
	 * tracking that we're in kernel mode.
	 */
	SWAPGS

	/*
	 * IRQ from user mode.
	 *
	 * We need to tell lockdep that IRQs are off.  We can't do this until
	 * we fix gsbase, and we should do it before enter_from_user_mode
	 * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +708,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
	ud2
1:
#endif
	SWAPGS
	POP_EXTRA_REGS
	POP_C_REGS
	addq	$8, %rsp	/* skip regs->orig_ax */
	popq	%r11
	popq	%r10
	popq	%r9
	popq	%r8
	popq	%rax
	popq	%rcx
	popq	%rdx
	popq	%rsi

	/*
	 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
	 * Save old stack pointer and switch to trampoline stack.
	 */
	movq	%rsp, %rdi
	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp

	/* Copy the IRET frame to the trampoline stack. */
	pushq	6*8(%rdi)	/* SS */
	pushq	5*8(%rdi)	/* RSP */
	pushq	4*8(%rdi)	/* EFLAGS */
	pushq	3*8(%rdi)	/* CS */
	pushq	2*8(%rdi)	/* RIP */

	/* Push user RDI on the trampoline stack. */
	pushq	(%rdi)

	/*
	 * We are on the trampoline stack.  All regs except RDI are live.
	 * We can do future final exit work right here.
	 */

	/* Restore RDI. */
	popq	%rdi
	SWAPGS
	INTERRUPT_RETURN


@@ -829,7 +938,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
/*
 * Exception entry points.
 */
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)

/*
 * Switch to the thread stack.  This is called with the IRET frame and
 * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
 * space has not been allocated for them.)
 */
ENTRY(switch_to_thread_stack)
	UNWIND_HINT_FUNC

	pushq	%rdi
	movq	%rsp, %rdi
	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
	UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI

	pushq	7*8(%rdi)		/* regs->ss */
	pushq	6*8(%rdi)		/* regs->rsp */
	pushq	5*8(%rdi)		/* regs->eflags */
	pushq	4*8(%rdi)		/* regs->cs */
	pushq	3*8(%rdi)		/* regs->ip */
	pushq	2*8(%rdi)		/* regs->orig_ax */
	pushq	8(%rdi)			/* return address */
	UNWIND_HINT_FUNC

	movq	(%rdi), %rdi
	ret
END(switch_to_thread_stack)

.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
@@ -848,11 +983,12 @@ ENTRY(\sym)

	ALLOC_PT_GPREGS_ON_STACK

	.if \paranoid
	.if \paranoid == 1
	.if \paranoid < 2
	testb	$3, CS(%rsp)			/* If coming from userspace, switch stacks */
	jnz	1f
	jnz	.Lfrom_usermode_switch_stack_\@
	.endif

	.if \paranoid
	call	paranoid_entry
	.else
	call	error_entry
@@ -894,20 +1030,15 @@ ENTRY(\sym)
	jmp	error_exit
	.endif

	.if \paranoid == 1
	.if \paranoid < 2
	/*
	 * Paranoid entry from userspace.  Switch stacks and treat it
	 * Entry from userspace.  Switch stacks and treat it
	 * as a normal entry.  This means that paranoid handlers
	 * run in real process context if user_mode(regs).
	 */
1:
.Lfrom_usermode_switch_stack_\@:
	call	error_entry


	movq	%rsp, %rdi			/* pt_regs pointer */
	call	sync_regs
	movq	%rax, %rsp			/* switch stack */

	movq	%rsp, %rdi			/* pt_regs pointer */

	.if \has_error_code
@@ -1170,6 +1301,14 @@ ENTRY(error_entry)
	SWAPGS

.Lerror_entry_from_usermode_after_swapgs:
	/* Put us onto the real thread stack. */
	popq	%r12				/* save return addr in %12 */
	movq	%rsp, %rdi			/* arg0 = pt_regs pointer */
	call	sync_regs
	movq	%rax, %rsp			/* switch stack */
	ENCODE_FRAME_POINTER
	pushq	%r12

	/*
	 * We need to tell lockdep that IRQs are off.  We can't do this until
	 * we fix gsbase, and we should do it before enter_from_user_mode
+5 −2
Original line number Diff line number Diff line
@@ -48,7 +48,7 @@
 */
ENTRY(entry_SYSENTER_compat)
	/* Interrupts are off on entry. */
	SWAPGS_UNSAFE_STACK
	SWAPGS
	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp

	/*
@@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
	 */
	movl	%eax, %eax

	/* Construct struct pt_regs on stack (iret frame is already on stack) */
	pushq	%rax			/* pt_regs->orig_ax */

	/* switch to thread stack expects orig_ax to be pushed */
	call	switch_to_thread_stack

	pushq	%rdi			/* pt_regs->di */
	pushq	%rsi			/* pt_regs->si */
	pushq	%rdx			/* pt_regs->dx */
+2 −0
Original line number Diff line number Diff line
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
	set_bit(bit, (unsigned long *)cpu_caps_set);	\
} while (0)

#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)

#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
 * Static testing of CPU features.  Used the same as boot_cpu_has().
+2 −9
Original line number Diff line number Diff line
@@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
	return this_cpu_ptr(&gdt_page)->gdt;
}

/* Get the fixmap index for a specific processor */
static inline unsigned int get_cpu_gdt_ro_index(int cpu)
{
	return FIX_GDT_REMAP_BEGIN + cpu;
}

/* Provide the fixmap address of the remapped GDT */
static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
{
	unsigned int idx = get_cpu_gdt_ro_index(cpu);
	return (struct desc_struct *)__fix_to_virt(idx);
	return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
}

/* Provide the current read-only GDT */
@@ -185,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
#endif
}

static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
{
	struct desc_struct *d = get_cpu_gdt_rw(cpu);
	tss_desc tss;
Loading