Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9b6e6a83 authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Ingo Molnar
Browse files

x86/nmi/64: Switch stacks on userspace NMI entry



Returning to userspace is tricky: IRET can fail, and ESPFIX can
rearrange the stack prior to IRET.

The NMI nesting fixup relies on a precise stack layout and
atomic IRET.  Rather than trying to teach the NMI nesting fixup
to handle ESPFIX and failed IRET, punt: run NMIs that came from
user mode on the normal kernel stack.

This will make some nested NMIs visible to C code, but the C
code is okay with that.

As a side effect, this should speed up perf: it eliminates an
RDMSR when NMIs come from user mode.

Signed-off-by: default avatarAndy Lutomirski <luto@kernel.org>
Reviewed-by: default avatarSteven Rostedt <rostedt@goodmis.org>
Reviewed-by: default avatarBorislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 0e181bb5
Loading
Loading
Loading
Loading
+58 −4
Original line number Diff line number Diff line
@@ -1250,18 +1250,72 @@ ENTRY(nmi)
	 * a nested NMI that updated the copy interrupt stack frame, a
	 * jump will be made to the repeat_nmi code that will handle the second
	 * NMI.
	 *
	 * However, espfix prevents us from directly returning to userspace
	 * with a single IRET instruction.  Similarly, IRET to user mode
	 * can fault.  We therefore handle NMIs from user space like
	 * other IST entries.
	 */

	/* Use %rdx as our temp variable throughout */
	pushq	%rdx

	testb	$3, CS-RIP+8(%rsp)
	jz	.Lnmi_from_kernel

	/*
	 * If %cs was not the kernel segment, then the NMI triggered in user
	 * space, which means it is definitely not nested.
	 * NMI from user mode.  We need to run on the thread stack, but we
	 * can't go through the normal entry paths: NMIs are masked, and
	 * we don't want to enable interrupts, because then we'll end
	 * up in an awkward situation in which IRQs are on but NMIs
	 * are off.
	 */
	cmpl	$__KERNEL_CS, 16(%rsp)
	jne	first_nmi

	SWAPGS
	cld
	movq	%rsp, %rdx
	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
	pushq	5*8(%rdx)	/* pt_regs->ss */
	pushq	4*8(%rdx)	/* pt_regs->rsp */
	pushq	3*8(%rdx)	/* pt_regs->flags */
	pushq	2*8(%rdx)	/* pt_regs->cs */
	pushq	1*8(%rdx)	/* pt_regs->rip */
	pushq   $-1		/* pt_regs->orig_ax */
	pushq   %rdi		/* pt_regs->di */
	pushq   %rsi		/* pt_regs->si */
	pushq   (%rdx)		/* pt_regs->dx */
	pushq   %rcx		/* pt_regs->cx */
	pushq   %rax		/* pt_regs->ax */
	pushq   %r8		/* pt_regs->r8 */
	pushq   %r9		/* pt_regs->r9 */
	pushq   %r10		/* pt_regs->r10 */
	pushq   %r11		/* pt_regs->r11 */
	pushq	%rbx		/* pt_regs->rbx */
	pushq	%rbp		/* pt_regs->rbp */
	pushq	%r12		/* pt_regs->r12 */
	pushq	%r13		/* pt_regs->r13 */
	pushq	%r14		/* pt_regs->r14 */
	pushq	%r15		/* pt_regs->r15 */

	/*
	 * At this point we no longer need to worry about stack damage
	 * due to nesting -- we're on the normal thread stack and we're
	 * done with the NMI stack.
	 */

	movq	%rsp, %rdi
	movq	$-1, %rsi
	call	do_nmi

	/*
	 * Return back to user mode.  We must *not* do the normal exit
	 * work, because we don't want to enable interrupts.  Fortunately,
	 * do_nmi doesn't modify pt_regs.
	 */
	SWAPGS
	jmp	restore_c_regs_and_iret

.Lnmi_from_kernel:
	/*
	 * Check the special variable on the stack to see if NMIs are
	 * executing.