Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1d3e53e8 authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Ingo Molnar
Browse files

x86/entry/64: Refactor IRQ stacks and make them NMI-safe



This will allow IRQ stacks to nest inside NMIs or similar entries
that can happen during IRQ stack setup or teardown.

The new macros won't work correctly if they're invoked with IRQs on.
Add a check under CONFIG_DEBUG_ENTRY to detect that.

Signed-off-by: default avatarAndy Lutomirski <luto@kernel.org>
[ Use %r10 instead of %r11 in xen_do_hypervisor_callback to make objtool
  and ORC unwinder's lives a little easier. ]
Signed-off-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: live-patching@vger.kernel.org
Link: http://lkml.kernel.org/r/b0b2ff5fb97d2da2e1d7e1f380190c92545c8bb5.1499786555.git.jpoimboe@redhat.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent cb8c65cc
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -305,8 +305,6 @@ config DEBUG_ENTRY
	  Some of these sanity checks may slow down kernel entries and
	  exits or otherwise impact performance.

	  This is currently used to help test NMI code.

	  If unsure, say N.

config DEBUG_NMI_SELFTEST
+61 −24
Original line number Diff line number Diff line
@@ -447,6 +447,59 @@ ENTRY(irq_entries_start)
    .endr
END(irq_entries_start)

.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
	pushfq
	testl $X86_EFLAGS_IF, (%rsp)
	jz .Lokay_\@
	ud2
.Lokay_\@:
	addq $8, %rsp
#endif
.endm

/*
 * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
 * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
 * Requires kernel GSBASE.
 *
 * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
 */
.macro ENTER_IRQ_STACK old_rsp
	DEBUG_ENTRY_ASSERT_IRQS_OFF
	movq	%rsp, \old_rsp
	incl	PER_CPU_VAR(irq_count)

	/*
	 * Right now, if we just incremented irq_count to zero, we've
	 * claimed the IRQ stack but we haven't switched to it yet.
	 *
	 * If anything is added that can interrupt us here without using IST,
	 * it must be *extremely* careful to limit its stack usage.  This
	 * could include kprobes and a hypothetical future IST-less #DB
	 * handler.
	 */

	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
	pushq	\old_rsp
.endm

/*
 * Undoes ENTER_IRQ_STACK.
 */
.macro LEAVE_IRQ_STACK
	DEBUG_ENTRY_ASSERT_IRQS_OFF
	/* We need to be off the IRQ stack before decrementing irq_count. */
	popq	%rsp

	/*
	 * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
	 * the irq stack but we're not on it.
	 */

	decl	PER_CPU_VAR(irq_count)
.endm

/*
 * Interrupt entry/exit.
 *
@@ -485,17 +538,7 @@ END(irq_entries_start)
	CALL_enter_from_user_mode

1:
	/*
	 * Save previous stack pointer, optionally switch to interrupt stack.
	 * irq_count is used to check if a CPU is already on an interrupt stack
	 * or not. While this is essentially redundant with preempt_count it is
	 * a little cheaper to use a separate counter in the PDA (short of
	 * moving irq_enter into assembly, which would be too much work)
	 */
	movq	%rsp, %rdi
	incl	PER_CPU_VAR(irq_count)
	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
	pushq	%rdi
	ENTER_IRQ_STACK old_rsp=%rdi
	/* We entered an interrupt context - irqs are off: */
	TRACE_IRQS_OFF

@@ -515,10 +558,8 @@ common_interrupt:
ret_from_intr:
	DISABLE_INTERRUPTS(CLBR_ANY)
	TRACE_IRQS_OFF
	decl	PER_CPU_VAR(irq_count)

	/* Restore saved previous stack */
	popq	%rsp
	LEAVE_IRQ_STACK

	testb	$3, CS(%rsp)
	jz	retint_kernel
@@ -891,12 +932,10 @@ bad_gs:
ENTRY(do_softirq_own_stack)
	pushq	%rbp
	mov	%rsp, %rbp
	incl	PER_CPU_VAR(irq_count)
	cmove	PER_CPU_VAR(irq_stack_ptr), %rsp
	push	%rbp				/* frame pointer backlink */
	ENTER_IRQ_STACK old_rsp=%r11
	call	__do_softirq
	LEAVE_IRQ_STACK
	leaveq
	decl	PER_CPU_VAR(irq_count)
	ret
END(do_softirq_own_stack)

@@ -923,13 +962,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
 * see the correct pointer to the pt_regs
 */
	movq	%rdi, %rsp			/* we don't return, adjust the stack frame */
11:	incl	PER_CPU_VAR(irq_count)
	movq	%rsp, %rbp
	cmovzq	PER_CPU_VAR(irq_stack_ptr), %rsp
	pushq	%rbp				/* frame pointer backlink */

	ENTER_IRQ_STACK old_rsp=%r10
	call	xen_evtchn_do_upcall
	popq	%rsp
	decl	PER_CPU_VAR(irq_count)
	LEAVE_IRQ_STACK

#ifndef CONFIG_PREEMPT
	call	xen_maybe_preempt_hcall
#endif
+3 −0
Original line number Diff line number Diff line
@@ -279,6 +279,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
	unsigned prev_fsindex, prev_gsindex;

	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
		     this_cpu_read(irq_count) != -1);

	switch_fpu_prepare(prev_fpu, cpu);

	/* We must save %fs and %gs before load_TLS() because