Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b57c0b51 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'pr-20150201-x86-entry' of...

Merge tag 'pr-20150201-x86-entry' of git://git.kernel.org/pub/scm/linux/kernel/git/luto/linux into x86/asm

Pull "x86: Entry cleanups and a bugfix for 3.20" from Andy Lutomirski:

 " This fixes a bug in the RCU code I added in ist_enter.  It also includes
   the sysret stuff discussed here:

     http://lkml.kernel.org/g/cover.1421453410.git.luto%40amacapital.net

 "

Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents ad6e4686 96b6352c
Loading
Loading
Loading
Loading
+59 −47
Original line number Diff line number Diff line
@@ -361,15 +361,12 @@ system_call_fastpath:
 * Has incomplete stack frame and undefined top of stack.
 */
ret_from_sys_call:
	movl $_TIF_ALLWORK_MASK,%edi
	/* edi:	flagmask */
sysret_check:
	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
	jnz int_ret_from_sys_call_fixup	/* Go the the slow path */

	LOCKDEP_SYS_EXIT
	DISABLE_INTERRUPTS(CLBR_NONE)
	TRACE_IRQS_OFF
	movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
	andl %edi,%edx
	jnz  sysret_careful
	CFI_REMEMBER_STATE
	/*
	 * sysretq will re-enable interrupts:
@@ -383,49 +380,10 @@ sysret_check:
	USERGS_SYSRET64

	CFI_RESTORE_STATE
	/* Handle reschedules */
	/* edx:	work, edi: workmask */
sysret_careful:
	bt $TIF_NEED_RESCHED,%edx
	jnc sysret_signal
	TRACE_IRQS_ON
	ENABLE_INTERRUPTS(CLBR_NONE)
	pushq_cfi %rdi
	SCHEDULE_USER
	popq_cfi %rdi
	jmp sysret_check

	/* Handle a signal */
sysret_signal:
	TRACE_IRQS_ON
	ENABLE_INTERRUPTS(CLBR_NONE)
#ifdef CONFIG_AUDITSYSCALL
	bt $TIF_SYSCALL_AUDIT,%edx
	jc sysret_audit
#endif
	/*
	 * We have a signal, or exit tracing or single-step.
	 * These all wind up with the iret return path anyway,
	 * so just join that path right now.
	 */
int_ret_from_sys_call_fixup:
	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
	jmp int_check_syscall_exit_work

#ifdef CONFIG_AUDITSYSCALL
	/*
	 * Return fast path for syscall audit.  Call __audit_syscall_exit()
	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
	 * masked off.
	 */
sysret_audit:
	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
	cmpq $-MAX_ERRNO,%rsi	/* is it < -MAX_ERRNO? */
	setbe %al		/* 1 if so, 0 if not */
	movzbl %al,%edi		/* zero-extend that into %edi */
	call __audit_syscall_exit
	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
	jmp sysret_check
#endif	/* CONFIG_AUDITSYSCALL */
	jmp int_ret_from_sys_call

	/* Do syscall tracing */
tracesys:
@@ -794,6 +752,60 @@ retint_swapgs: /* return to user-space */
	 */
	DISABLE_INTERRUPTS(CLBR_ANY)
	TRACE_IRQS_IRETQ

	/*
	 * Try to use SYSRET instead of IRET if we're returning to
	 * a completely clean 64-bit userspace context.
	 */
	movq (RCX-R11)(%rsp), %rcx
	cmpq %rcx,(RIP-R11)(%rsp)		/* RCX == RIP */
	jne opportunistic_sysret_failed

	/*
	 * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
	 * in kernel space.  This essentially lets the user take over
	 * the kernel, since userspace controls RSP.  It's not worth
	 * testing for canonicalness exactly -- this check detects any
	 * of the 17 high bits set, which is true for non-canonical
	 * or kernel addresses.  (This will pessimize vsyscall=native.
	 * Big deal.)
	 *
	 * If virtual addresses ever become wider, this will need
	 * to be updated to remain correct on both old and new CPUs.
	 */
	.ifne __VIRTUAL_MASK_SHIFT - 47
	.error "virtual address width changed -- sysret checks need update"
	.endif
	shr $__VIRTUAL_MASK_SHIFT, %rcx
	jnz opportunistic_sysret_failed

	cmpq $__USER_CS,(CS-R11)(%rsp)		/* CS must match SYSRET */
	jne opportunistic_sysret_failed

	movq (R11-ARGOFFSET)(%rsp), %r11
	cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp)	/* R11 == RFLAGS */
	jne opportunistic_sysret_failed

	testq $X86_EFLAGS_RF,%r11		/* sysret can't restore RF */
	jnz opportunistic_sysret_failed

	/* nothing to check for RSP */

	cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp)	/* SS must match SYSRET */
	jne opportunistic_sysret_failed

	/*
	 * We win!  This label is here just for ease of understanding
	 * perf profiles.  Nothing jumps here.
	 */
irq_return_via_sysret:
	CFI_REMEMBER_STATE
	RESTORE_ARGS 1,8,1
	movq (RSP-RIP)(%rsp),%rsp
	USERGS_SYSRET64
	CFI_RESTORE_STATE

opportunistic_sysret_failed:
	SWAPGS
	jmp restore_args

+18 −7
Original line number Diff line number Diff line
@@ -110,15 +110,11 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)

enum ctx_state ist_enter(struct pt_regs *regs)
{
	/*
	 * We are atomic because we're on the IST stack (or we're on x86_32,
	 * in which case we still shouldn't schedule.
	 */
	preempt_count_add(HARDIRQ_OFFSET);
	enum ctx_state prev_state;

	if (user_mode_vm(regs)) {
		/* Other than that, we're just an exception. */
		return exception_enter();
		prev_state = exception_enter();
	} else {
		/*
		 * We might have interrupted pretty much anything.  In
@@ -127,12 +123,27 @@ enum ctx_state ist_enter(struct pt_regs *regs)
		 * but we need to notify RCU.
		 */
		rcu_nmi_enter();
		return IN_KERNEL;  /* the value is irrelevant. */
		prev_state = IN_KERNEL;  /* the value is irrelevant. */
	}

	/*
	 * We are atomic because we're on the IST stack (or we're on x86_32,
	 * in which case we still shouldn't schedule).
	 *
	 * This must be after exception_enter(), because exception_enter()
	 * won't do anything if in_interrupt() returns true.
	 */
	preempt_count_add(HARDIRQ_OFFSET);

	/* This code is a bit fragile.  Test it. */
	rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work");

	return prev_state;
}

void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
{
	/* Must be before exception_exit. */
	preempt_count_sub(HARDIRQ_OFFSET);

	if (user_mode_vm(regs))