Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 057e3a87 authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Sasha Levin
Browse files

x86/nmi/64: Switch stacks on userspace NMI entry



[ Upstream commit 9b6e6a8334d56354853f9c255d1395c2ba570e0a ]

Returning to userspace is tricky: IRET can fail, and ESPFIX can
rearrange the stack prior to IRET.

The NMI nesting fixup relies on a precise stack layout and
atomic IRET.  Rather than trying to teach the NMI nesting fixup
to handle ESPFIX and failed IRET, punt: run NMIs that came from
user mode on the normal kernel stack.

This will make some nested NMIs visible to C code, but the C
code is okay with that.

As a side effect, this should speed up perf: it eliminates an
RDMSR when NMIs come from user mode.

Signed-off-by: default avatarAndy Lutomirski <luto@kernel.org>
Reviewed-by: default avatarSteven Rostedt <rostedt@goodmis.org>
Reviewed-by: default avatarBorislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Signed-off-by: default avatarSasha Levin <sasha.levin@oracle.com>
parent 20c593d2
Loading
Loading
Loading
Loading
+61 −4
Original line number Diff line number Diff line
@@ -1459,19 +1459,76 @@ ENTRY(nmi)
	 * a nested NMI that updated the copy interrupt stack frame, a
	 * jump will be made to the repeat_nmi code that will handle the second
	 * NMI.
	 *
	 * However, espfix prevents us from directly returning to userspace
	 * with a single IRET instruction.  Similarly, IRET to user mode
	 * can fault.  We therefore handle NMIs from user space like
	 * other IST entries.
	 */

	/* Use %rdx as out temp variable throughout */
	pushq_cfi %rdx
	CFI_REL_OFFSET rdx, 0

	testb	$3, CS-RIP+8(%rsp)
	jz	.Lnmi_from_kernel

	/*
	 * NMI from user mode.  We need to run on the thread stack, but we
	 * can't go through the normal entry paths: NMIs are masked, and
	 * we don't want to enable interrupts, because then we'll end
	 * up in an awkward situation in which IRQs are on but NMIs
	 * are off.
	 */

	SWAPGS
	cld
	movq	%rsp, %rdx
	movq	PER_CPU_VAR(kernel_stack), %rsp
	addq	$KERNEL_STACK_OFFSET, %rsp
	pushq	5*8(%rdx)	/* pt_regs->ss */
	pushq	4*8(%rdx)	/* pt_regs->rsp */
	pushq	3*8(%rdx)	/* pt_regs->flags */
	pushq	2*8(%rdx)	/* pt_regs->cs */
	pushq	1*8(%rdx)	/* pt_regs->rip */
	pushq   $-1		/* pt_regs->orig_ax */
	pushq   %rdi		/* pt_regs->di */
	pushq   %rsi		/* pt_regs->si */
	pushq   (%rdx)		/* pt_regs->dx */
	pushq   %rcx		/* pt_regs->cx */
	pushq   %rax		/* pt_regs->ax */
	pushq   %r8		/* pt_regs->r8 */
	pushq   %r9		/* pt_regs->r9 */
	pushq   %r10		/* pt_regs->r10 */
	pushq   %r11		/* pt_regs->r11 */
	pushq	%rbx		/* pt_regs->rbx */
	pushq	%rbp		/* pt_regs->rbp */
	pushq	%r12		/* pt_regs->r12 */
	pushq	%r13		/* pt_regs->r13 */
	pushq	%r14		/* pt_regs->r14 */
	pushq	%r15		/* pt_regs->r15 */

	/*
	 * If %cs was not the kernel segment, then the NMI triggered in user
	 * space, which means it is definitely not nested.
	 * At this point we no longer need to worry about stack damage
	 * due to nesting -- we're on the normal thread stack and we're
	 * done with the NMI stack.
	 */
	cmpl $__KERNEL_CS, 16(%rsp)
	jne first_nmi

	movq	%rsp, %rdi
	movq	$-1, %rsi
	call	do_nmi

	/*
	 * Return back to user mode.  We must *not* do the normal exit
	 * work, because we don't want to enable interrupts.  Fortunately,
	 * do_nmi doesn't modify pt_regs.
	 */
	SWAPGS

	addq	$6*8, %rsp	/* skip bx, bp, and r12-r15 */
	jmp	restore_args

.Lnmi_from_kernel:
	/*
	 * Check the special variable on the stack to see if NMIs are
	 * executing.