Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a75a3f6f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm changes from Ingo Molnar:
 "The main change in this cycle is another step in the big x86 system
  call interface rework by Andy Lutomirski, which moves most of the low
  level x86 entry code from assembly to C, for all syscall entries
  except native 64-bit system calls:

    arch/x86/entry/entry_32.S        | 182 ++++------
    arch/x86/entry/entry_64_compat.S | 547 ++++++++-----------------------
    194 insertions(+), 535 deletions(-)

  ... our hope is that the final remaining step (converting native
  64-bit system calls) will be less painful as all the previous steps,
  given that most of the legacies and quirks are concentrated around
  native 32-bit and compat environments"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits)
  x86/entry/32: Fix FS and GS restore in opportunistic SYSEXIT
  x86/entry/32: Fix entry_INT80_32() to expect interrupts to be on
  um/x86: Fix build after x86 syscall changes
  x86/asm: Remove the xyz_cfi macros from dwarf2.h
  selftests/x86: Style fixes for the 'unwind_vdso' test
  x86/entry/64/compat: Document sysenter_fix_flags's reason for existence
  x86/entry: Split and inline syscall_return_slowpath()
  x86/entry: Split and inline prepare_exit_to_usermode()
  x86/entry: Use pt_regs_to_thread_info() in syscall entry tracing
  x86/entry: Hide two syscall entry assertions behind CONFIG_DEBUG_ENTRY
  x86/entry: Micro-optimize compat fast syscall arg fetch
  x86/entry: Force inlining of 32-bit syscall code
  x86/entry: Make irqs_disabled checks in exit code depend on lockdep
  x86/entry: Remove unnecessary IRQ twiddling in fast 32-bit syscalls
  x86/asm: Remove thread_info.sysenter_return
  x86/entry/32: Re-implement SYSENTER using the new C path
  x86/entry/32: Switch INT80 to the new C syscall path
  x86/entry/32: Open-code return tracking from fork and kthreads
  x86/entry/compat: Implement opportunistic SYSRETL for compat syscalls
  x86/vdso/compat: Wire up SYSENTER and SYSCSALL for compat userspace
  ...
parents d2bea739 3bd29515
Loading
Loading
Loading
Loading
+49 −0
Original line number Diff line number Diff line
@@ -2027,6 +2027,55 @@ config COMPAT_VDSO
	  If unsure, say N: if you are compiling your own kernel, you
	  are unlikely to be using a buggy version of glibc.

choice
	prompt "vsyscall table for legacy applications"
	depends on X86_64
	default LEGACY_VSYSCALL_EMULATE
	help
	  Legacy user code that does not know how to find the vDSO expects
	  to be able to issue three syscalls by calling fixed addresses in
	  kernel space. Since this location is not randomized with ASLR,
	  it can be used to assist security vulnerability exploitation.

	  This setting can be changed at boot time via the kernel command
	  line parameter vsyscall=[native|emulate|none].

	  On a system with recent enough glibc (2.14 or newer) and no
	  static binaries, you can say None without a performance penalty
	  to improve security.

	  If unsure, select "Emulate".

	config LEGACY_VSYSCALL_NATIVE
		bool "Native"
		help
		  Actual executable code is located in the fixed vsyscall
		  address mapping, implementing time() efficiently. Since
		  this makes the mapping executable, it can be used during
		  security vulnerability exploitation (traditionally as
		  ROP gadgets). This configuration is not recommended.

	config LEGACY_VSYSCALL_EMULATE
		bool "Emulate"
		help
		  The kernel traps and emulates calls into the fixed
		  vsyscall address mapping. This makes the mapping
		  non-executable, but it still contains known contents,
		  which could be used in certain rare security vulnerability
		  exploits. This configuration is recommended when userspace
		  still uses the vsyscall area.

	config LEGACY_VSYSCALL_NONE
		bool "None"
		help
		  There will be no vsyscall mapping at all. This will
		  eliminate any risk of ASLR bypass due to the vsyscall
		  fixed address mapping. Attempts to use the vsyscalls
		  will be reported to dmesg, so that either old or
		  malicious userspace programs can be identified.

endchoice

config CMDLINE_BOOL
	bool "Built-in kernel command line"
	---help---
+8 −2
Original line number Diff line number Diff line
@@ -159,6 +159,12 @@ endif
sp-$(CONFIG_X86_32) := esp
sp-$(CONFIG_X86_64) := rsp

# do binutils support CFI?
cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
# is .cfi_signal_frame supported too?
cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)

# does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
@@ -166,8 +172,8 @@ asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)

KBUILD_AFLAGS += $(asinstr) $(avx_instr) $(avx2_instr)
KBUILD_CFLAGS += $(asinstr) $(avx_instr) $(avx2_instr)
KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)

LDFLAGS := -m elf_$(UTS_MACHINE)

+216 −48
Original line number Diff line number Diff line
@@ -24,10 +24,19 @@

#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/vdso.h>
#include <asm/uaccess.h>

#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>

static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
{
	unsigned long top_of_stack =
		(unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
	return (struct thread_info *)(top_of_stack - THREAD_SIZE);
}

#ifdef CONFIG_CONTEXT_TRACKING
/* Called on entry from user mode with IRQs off. */
__visible void enter_from_user_mode(void)
@@ -66,13 +75,14 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
 */
unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
{
	struct thread_info *ti = pt_regs_to_thread_info(regs);
	unsigned long ret = 0;
	u32 work;

	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
		BUG_ON(regs != task_pt_regs(current));

	work = ACCESS_ONCE(current_thread_info()->flags) &
		_TIF_WORK_SYSCALL_ENTRY;
	work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;

#ifdef CONFIG_CONTEXT_TRACKING
	/*
@@ -154,10 +164,11 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
				unsigned long phase1_result)
{
	struct thread_info *ti = pt_regs_to_thread_info(regs);
	long ret = 0;
	u32 work = ACCESS_ONCE(current_thread_info()->flags) &
		_TIF_WORK_SYSCALL_ENTRY;
	u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;

	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
		BUG_ON(regs != task_pt_regs(current));

	/*
@@ -207,19 +218,12 @@ long syscall_trace_enter(struct pt_regs *regs)
		return syscall_trace_enter_phase2(regs, arch, phase1_result);
}

static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
{
	unsigned long top_of_stack =
		(unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
	return (struct thread_info *)(top_of_stack - THREAD_SIZE);
}
#define EXIT_TO_USERMODE_LOOP_FLAGS				\
	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |	\
	 _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)

/* Called with IRQs disabled. */
__visible void prepare_exit_to_usermode(struct pt_regs *regs)
static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
{
	if (WARN_ON(!irqs_disabled()))
		local_irq_disable();

	/*
	 * In order to return to user mode, we need to have IRQs off with
	 * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY,
@@ -229,14 +233,6 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)
	 * work to clear some of the flags can sleep.
	 */
	while (true) {
		u32 cached_flags =
			READ_ONCE(pt_regs_to_thread_info(regs)->flags);

		if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME |
				      _TIF_UPROBE | _TIF_NEED_RESCHED |
				      _TIF_USER_RETURN_NOTIFY)))
			break;

		/* We have work to do. */
		local_irq_enable();

@@ -260,33 +256,42 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)

		/* Disable IRQs and retry */
		local_irq_disable();

		cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags);

		if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
			break;

	}
}

/* Called with IRQs disabled. */
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
{
	u32 cached_flags;

	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
		local_irq_disable();

	lockdep_sys_exit();

	cached_flags =
		READ_ONCE(pt_regs_to_thread_info(regs)->flags);

	if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
		exit_to_usermode_loop(regs, cached_flags);

	user_enter();
}

/*
 * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
 * state such that we can immediately switch to user mode.
 */
__visible void syscall_return_slowpath(struct pt_regs *regs)
#define SYSCALL_EXIT_WORK_FLAGS				\
	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |	\
	 _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)

static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
{
	struct thread_info *ti = pt_regs_to_thread_info(regs);
	u32 cached_flags = READ_ONCE(ti->flags);
	bool step;

	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);

	if (WARN(irqs_disabled(), "syscall %ld left IRQs disabled",
		 regs->orig_ax))
		local_irq_enable();

	/*
	 * First do one-time work.  If these work items are enabled, we
	 * want to run them exactly once per syscall exit with IRQs on.
	 */
	if (cached_flags & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |
			    _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)) {
	audit_syscall_exit(regs);

	if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
@@ -305,6 +310,28 @@ __visible void syscall_return_slowpath(struct pt_regs *regs)
		tracehook_report_syscall_exit(regs, step);
}

/*
 * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
 * state such that we can immediately switch to user mode.
 */
__visible inline void syscall_return_slowpath(struct pt_regs *regs)
{
	struct thread_info *ti = pt_regs_to_thread_info(regs);
	u32 cached_flags = READ_ONCE(ti->flags);

	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);

	if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
	    WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
		local_irq_enable();

	/*
	 * First do one-time work.  If these work items are enabled, we
	 * want to run them exactly once per syscall exit with IRQs on.
	 */
	if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
		syscall_slow_exit_work(regs, cached_flags);

#ifdef CONFIG_COMPAT
	/*
	 * Compat syscalls set TS_COMPAT.  Make sure we clear it before
@@ -316,3 +343,144 @@ __visible void syscall_return_slowpath(struct pt_regs *regs)
	local_irq_disable();
	prepare_exit_to_usermode(regs);
}

#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
/*
 * Does a 32-bit syscall.  Called with IRQs on and does all entry and
 * exit work and returns with IRQs off.  This function is extremely hot
 * in workloads that use it, and it's usually called from
 * do_fast_syscall_32, so forcibly inline it to improve performance.
 */
#ifdef CONFIG_X86_32
/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
__visible
#else
/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
static
#endif
__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
{
	struct thread_info *ti = pt_regs_to_thread_info(regs);
	unsigned int nr = (unsigned int)regs->orig_ax;

#ifdef CONFIG_IA32_EMULATION
	ti->status |= TS_COMPAT;
#endif

	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
		/*
		 * Subtlety here: if ptrace pokes something larger than
		 * 2^32-1 into orig_ax, this truncates it.  This may or
		 * may not be necessary, but it matches the old asm
		 * behavior.
		 */
		nr = syscall_trace_enter(regs);
	}

	if (likely(nr < IA32_NR_syscalls)) {
		/*
		 * It's possible that a 32-bit syscall implementation
		 * takes a 64-bit parameter but nonetheless assumes that
		 * the high bits are zero.  Make sure we zero-extend all
		 * of the args.
		 */
		regs->ax = ia32_sys_call_table[nr](
			(unsigned int)regs->bx, (unsigned int)regs->cx,
			(unsigned int)regs->dx, (unsigned int)regs->si,
			(unsigned int)regs->di, (unsigned int)regs->bp);
	}

	syscall_return_slowpath(regs);
}

#ifdef CONFIG_X86_64
/* Handles INT80 on 64-bit kernels */
__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
{
	local_irq_enable();
	do_syscall_32_irqs_on(regs);
}
#endif

/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs)
{
	/*
	 * Called using the internal vDSO SYSENTER/SYSCALL32 calling
	 * convention.  Adjust regs so it looks like we entered using int80.
	 */

	unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
		vdso_image_32.sym_int80_landing_pad;

	/*
	 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
	 * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
	 * Fix it up.
	 */
	regs->ip = landing_pad;

	/*
	 * Fetch ECX from where the vDSO stashed it.
	 *
	 * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
	 */
	local_irq_enable();
	if (
#ifdef CONFIG_X86_64
		/*
		 * Micro-optimization: the pointer we're following is explicitly
		 * 32 bits, so it can't be out of range.
		 */
		__get_user(*(u32 *)&regs->cx,
			    (u32 __user __force *)(unsigned long)(u32)regs->sp)
#else
		get_user(*(u32 *)&regs->cx,
			 (u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif
		) {

		/* User code screwed up. */
		local_irq_disable();
		regs->ax = -EFAULT;
#ifdef CONFIG_CONTEXT_TRACKING
		enter_from_user_mode();
#endif
		prepare_exit_to_usermode(regs);
		return 0;	/* Keep it simple: use IRET. */
	}

	/* Now this is just like a normal syscall. */
	do_syscall_32_irqs_on(regs);

#ifdef CONFIG_X86_64
	/*
	 * Opportunistic SYSRETL: if possible, try to return using SYSRETL.
	 * SYSRETL is available on all 64-bit CPUs, so we don't need to
	 * bother with SYSEXIT.
	 *
	 * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
	 * because the ECX fixup above will ensure that this is essentially
	 * never the case.
	 */
	return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
		regs->ip == landing_pad &&
		(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
#else
	/*
	 * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
	 *
	 * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
	 * because the ECX fixup above will ensure that this is essentially
	 * never the case.
	 *
	 * We don't allow syscalls at all from VM86 mode, but we still
	 * need to check VM, because we might be returning from sys_vm86.
	 */
	return static_cpu_has(X86_FEATURE_SEP) &&
		regs->cs == __USER_CS && regs->ss == __USER_DS &&
		regs->ip == landing_pad &&
		(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
#endif
}
#endif
+60 −122
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
 *
 * entry_32.S contains the system-call and low-level fault and trap handling routines.
 *
 * Stack layout in 'syscall_exit':
 * Stack layout while running C code:
 *	ptrace needs to have all registers on the stack.
 *	If the order here is changed, it needs to be
 *	updated in fork.c:copy_process(), signal.c:do_signal(),
@@ -153,13 +153,13 @@

#endif /* CONFIG_X86_32_LAZY_GS */

.macro SAVE_ALL
.macro SAVE_ALL pt_regs_ax=%eax
	cld
	PUSH_GS
	pushl	%fs
	pushl	%es
	pushl	%ds
	pushl	%eax
	pushl	\pt_regs_ax
	pushl	%ebp
	pushl	%edi
	pushl	%esi
@@ -211,7 +211,11 @@ ENTRY(ret_from_fork)
	popl	%eax
	pushl	$0x0202				# Reset kernel eflags
	popfl
	jmp	syscall_exit

	/* When we fork, we trace the syscall return in the child, too. */
	movl    %esp, %eax
	call    syscall_return_slowpath
	jmp     restore_all
END(ret_from_fork)

ENTRY(ret_from_kernel_thread)
@@ -224,7 +228,15 @@ ENTRY(ret_from_kernel_thread)
	movl	PT_EBP(%esp), %eax
	call	*PT_EBX(%esp)
	movl	$0, PT_EAX(%esp)
	jmp	syscall_exit

	/*
	 * Kernel threads return to userspace as if returning from a syscall.
	 * We should check whether anything actually uses this path and, if so,
	 * consider switching it over to ret_from_fork.
	 */
	movl    %esp, %eax
	call    syscall_return_slowpath
	jmp     restore_all
ENDPROC(ret_from_kernel_thread)

/*
@@ -255,7 +267,6 @@ ret_from_intr:
	jb	resume_kernel			# not returning to v8086 or userspace

ENTRY(resume_userspace)
	LOCKDEP_SYS_EXIT
	DISABLE_INTERRUPTS(CLBR_ANY)
	TRACE_IRQS_OFF
	movl	%esp, %eax
@@ -276,76 +287,47 @@ need_resched:
END(resume_kernel)
#endif

/*
 * SYSENTER_RETURN points to after the SYSENTER instruction
 * in the vsyscall page.  See vsyscall-sysentry.S, which defines
 * the symbol.
 */

	# SYSENTER  call handler stub
ENTRY(entry_SYSENTER_32)
	movl	TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp:
	/*
	 * Interrupts are disabled here, but we can't trace it until
	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
	 * we immediately enable interrupts at that point anyway.
	 */
	pushl	$__USER_DS
	pushl	%ebp
	pushfl
	orl	$X86_EFLAGS_IF, (%esp)
	pushl	$__USER_CS
	/*
	 * Push current_thread_info()->sysenter_return to the stack.
	 * A tiny bit of offset fixup is necessary: TI_sysenter_return
	 * is relative to thread_info, which is at the bottom of the
	 * kernel stack page.  4*4 means the 4 words pushed above;
	 * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
	 * and THREAD_SIZE takes us to the bottom.
	 */
	pushl	((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)

	pushl	%eax
	SAVE_ALL
	ENABLE_INTERRUPTS(CLBR_NONE)
	pushl	$__USER_DS		/* pt_regs->ss */
	pushl	%ecx			/* pt_regs->cx */
	pushfl				/* pt_regs->flags (except IF = 0) */
	orl	$X86_EFLAGS_IF, (%esp)	/* Fix IF */
	pushl	$__USER_CS		/* pt_regs->cs */
	pushl	$0			/* pt_regs->ip = 0 (placeholder) */
	pushl	%eax			/* pt_regs->orig_ax */
	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */

	/*
 * Load the potential sixth argument from user stack.
 * Careful about security.
	 * User mode is traced as though IRQs are on, and SYSENTER
	 * turned them off.
	 */
	cmpl	$__PAGE_OFFSET-3, %ebp
	jae	syscall_fault
	ASM_STAC
1:	movl	(%ebp), %ebp
	ASM_CLAC
	movl	%ebp, PT_EBP(%esp)
	_ASM_EXTABLE(1b, syscall_fault)
	TRACE_IRQS_OFF

	GET_THREAD_INFO(%ebp)
	movl	%esp, %eax
	call	do_fast_syscall_32
	testl	%eax, %eax
	jz	.Lsyscall_32_done

	testl	$_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
	jnz	syscall_trace_entry
sysenter_do_call:
	cmpl	$(NR_syscalls), %eax
	jae	sysenter_badsys
	call	*sys_call_table(, %eax, 4)
sysenter_after_call:
	movl	%eax, PT_EAX(%esp)
	LOCKDEP_SYS_EXIT
	DISABLE_INTERRUPTS(CLBR_ANY)
	TRACE_IRQS_OFF
	movl	TI_flags(%ebp), %ecx
	testl	$_TIF_ALLWORK_MASK, %ecx
	jnz	syscall_exit_work_irqs_off
sysenter_exit:
/* if something modifies registers it must also disable sysexit */
	movl	PT_EIP(%esp), %edx
	movl	PT_OLDESP(%esp), %ecx
	xorl	%ebp, %ebp
	TRACE_IRQS_ON
/* Opportunistic SYSEXIT */
	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
	movl	PT_EIP(%esp), %edx	/* pt_regs->ip */
	movl	PT_OLDESP(%esp), %ecx	/* pt_regs->sp */
1:	mov	PT_FS(%esp), %fs
	PTGS_TO_GS
	popl	%ebx			/* pt_regs->bx */
	addl	$2*4, %esp		/* skip pt_regs->cx and pt_regs->dx */
	popl	%esi			/* pt_regs->si */
	popl	%edi			/* pt_regs->di */
	popl	%ebp			/* pt_regs->bp */
	popl	%eax			/* pt_regs->ax */

	/*
	 * Return back to the vDSO, which will pop ecx and edx.
	 * Don't bother with DS and ES (they already contain __USER_DS).
	 */
	ENABLE_INTERRUPTS_SYSEXIT

.pushsection .fixup, "ax"
@@ -359,21 +341,18 @@ ENDPROC(entry_SYSENTER_32)
	# system call handler stub
ENTRY(entry_INT80_32)
	ASM_CLAC
	pushl	%eax				# save orig_eax
	SAVE_ALL
	GET_THREAD_INFO(%ebp)
						# system call tracing in operation / emulation
	testl	$_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
	jnz	syscall_trace_entry
	cmpl	$(NR_syscalls), %eax
	jae	syscall_badsys
syscall_call:
	call	*sys_call_table(, %eax, 4)
syscall_after_call:
	movl	%eax, PT_EAX(%esp)		# store the return value
syscall_exit:
	LOCKDEP_SYS_EXIT
	jmp	syscall_exit_work
	pushl	%eax			/* pt_regs->orig_ax */
	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */

	/*
	 * User mode is traced as though IRQs are on.  Unlike the 64-bit
	 * case, INT80 is a trap gate on 32-bit kernels, so interrupts
	 * are already on (unless user code is messing around with iopl).
	 */

	movl	%esp, %eax
	call	do_syscall_32_irqs_on
.Lsyscall_32_done:

restore_all:
	TRACE_IRQS_IRET
@@ -450,47 +429,6 @@ ldt_ss:
#endif
ENDPROC(entry_INT80_32)

	# perform syscall exit tracing
	ALIGN
syscall_trace_entry:
	movl	$-ENOSYS, PT_EAX(%esp)
	movl	%esp, %eax
	call	syscall_trace_enter
	/* What it returned is what we'll actually use.  */
	cmpl	$(NR_syscalls), %eax
	jnae	syscall_call
	jmp	syscall_exit
END(syscall_trace_entry)

	# perform syscall exit tracing
	ALIGN
syscall_exit_work_irqs_off:
	TRACE_IRQS_ON
	ENABLE_INTERRUPTS(CLBR_ANY)

syscall_exit_work:
	movl	%esp, %eax
	call	syscall_return_slowpath
	jmp	restore_all
END(syscall_exit_work)

syscall_fault:
	ASM_CLAC
	GET_THREAD_INFO(%ebp)
	movl	$-EFAULT, PT_EAX(%esp)
	jmp	resume_userspace
END(syscall_fault)

syscall_badsys:
	movl	$-ENOSYS, %eax
	jmp	syscall_after_call
END(syscall_badsys)

sysenter_badsys:
	movl	$-ENOSYS, %eax
	jmp	sysenter_after_call
END(sysenter_badsys)

.macro FIXUP_ESPFIX_STACK
/*
 * Switch back for ESPFIX stack to the normal zerobased stack
+2 −7
Original line number Diff line number Diff line
@@ -391,20 +391,16 @@ GLOBAL(stub_execveat)
	jmp	return_from_execve
END(stub_execveat)

#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
#if defined(CONFIG_X86_X32_ABI)
	.align	8
GLOBAL(stub_x32_execve)
GLOBAL(stub32_execve)
	call	compat_sys_execve
	jmp	return_from_execve
END(stub32_execve)
END(stub_x32_execve)
	.align	8
GLOBAL(stub_x32_execveat)
GLOBAL(stub32_execveat)
	call	compat_sys_execveat
	jmp	return_from_execve
END(stub32_execveat)
END(stub_x32_execveat)
#endif

@@ -557,7 +553,6 @@ ret_from_intr:
	jz	retint_kernel

	/* Interrupt came from user space */
	LOCKDEP_SYS_EXIT_IRQ
GLOBAL(retint_user)
	mov	%rsp,%rdi
	call	prepare_exit_to_usermode
@@ -587,7 +582,7 @@ retint_kernel:
 * At this label, code paths which return to kernel and to user,
 * which come from interrupts/exception and from syscalls, merge.
 */
restore_regs_and_iret:
GLOBAL(restore_regs_and_iret)
	RESTORE_EXTRA_REGS
restore_c_regs_and_iret:
	RESTORE_C_REGS
Loading