Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 302f5b26 authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Ingo Molnar
Browse files

x86/entry/64: Always run ptregs-using syscalls on the slow path

64-bit syscalls currently have an optimization in which they are
called with partial pt_regs.  A small handful require full
pt_regs.

In the 32-bit and compat cases, I cleaned this up by forcing
full pt_regs for all syscalls.  The performance hit doesn't
really matter as the affected system calls are fundamentally
heavy and this is the 32-bit compat case.

I want to clean up the 64-bit case as well, but I don't want to
hurt fast path performance.  To do that, I want to force the
syscalls that use pt_regs onto the slow path.  This will enable
us to make slow path syscalls be real ABI-compliant C functions.

Use the new syscall entry qualification machinery for this.
'stub_clone' is now 'stub_clone/ptregs'.

The next patch will eliminate the stubs, and we'll just have
'sys_clone/ptregs'.

As of this patch, two-phase entry tracing is no longer used.  It
has served its purpose (namely a huge speedup on some workloads
prior to more general opportunistic SYSRET support), and once
the dust settles I'll send patches to back it out.

The implementation is heavily based on a patch from Brian Gerst:

  http://lkml.kernel.org/g/1449666173-15366-1-git-send-email-brgerst@gmail.com



Originally-From: Brian Gerst <brgerst@gmail.com>
Signed-off-by: default avatarAndy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/b9beda88460bcefec6e7d792bd44eca9b760b0c4.1454022279.git.luto@kernel.org


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent cfcbadb4
Loading
Loading
Loading
Loading
+42 −14
Original line number Diff line number Diff line
@@ -182,7 +182,15 @@ entry_SYSCALL_64_fastpath:
#endif
	ja	1f				/* return -ENOSYS (already in pt_regs->ax) */
	movq	%r10, %rcx

	/*
	 * This call instruction is handled specially in stub_ptregs_64.
	 * It might end up jumping to the slow path.  If it jumps, RAX is
	 * clobbered.
	 */
	call	*sys_call_table(, %rax, 8)
.Lentry_SYSCALL_64_after_fastpath_call:

	movq	%rax, RAX(%rsp)
1:
/*
@@ -235,25 +243,13 @@ GLOBAL(int_ret_from_sys_call_irqs_off)

	/* Do syscall entry tracing */
tracesys:
	movq	%rsp, %rdi
	movl	$AUDIT_ARCH_X86_64, %esi
	call	syscall_trace_enter_phase1
	test	%rax, %rax
	jnz	tracesys_phase2			/* if needed, run the slow path */
	RESTORE_C_REGS_EXCEPT_RAX		/* else restore clobbered regs */
	movq	ORIG_RAX(%rsp), %rax
	jmp	entry_SYSCALL_64_fastpath	/* and return to the fast path */

tracesys_phase2:
	SAVE_EXTRA_REGS
	movq	%rsp, %rdi
	movl	$AUDIT_ARCH_X86_64, %esi
	movq	%rax, %rdx
	call	syscall_trace_enter_phase2
	call	syscall_trace_enter

	/*
	 * Reload registers from stack in case ptrace changed them.
	 * We don't reload %rax because syscall_trace_entry_phase2() returned
	 * We don't reload %rax because syscall_trace_enter() returned
	 * the value it wants us to use in the table lookup.
	 */
	RESTORE_C_REGS_EXCEPT_RAX
@@ -355,6 +351,38 @@ opportunistic_sysret_failed:
	jmp	restore_c_regs_and_iret
END(entry_SYSCALL_64)

ENTRY(stub_ptregs_64)
	/*
	 * Syscalls marked as needing ptregs land here.
	 * If we are on the fast path, we need to save the extra regs.
	 * If we are on the slow path, the extra regs are already saved.
	 *
	 * RAX stores a pointer to the C function implementing the syscall.
	 */
	cmpq	$.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
	jne	1f

	/* Called from fast path -- pop return address and jump to slow path */
	popq	%rax
	jmp	tracesys	/* called from fast path */

1:
	/* Called from C */
	jmp	*%rax				/* called from C */
END(stub_ptregs_64)

.macro ptregs_stub func
ENTRY(ptregs_\func)
	leaq	\func(%rip), %rax
	jmp	stub_ptregs_64
END(ptregs_\func)
.endm

/* Instantiate ptregs_stub for each ptregs-using syscall */
#define __SYSCALL_64_QUAL_(sym)
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
#include <asm/syscalls_64.h>

	.macro FORK_LIKE func
ENTRY(stub_\func)
+5 −2
Original line number Diff line number Diff line
@@ -6,11 +6,14 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>

#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#define __SYSCALL_64_QUAL_(sym) sym
#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym

#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64

#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),

extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);

+8 −8
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@
12	common	brk			sys_brk
13	64	rt_sigaction		sys_rt_sigaction
14	common	rt_sigprocmask		sys_rt_sigprocmask
15	64	rt_sigreturn		stub_rt_sigreturn
15	64	rt_sigreturn		stub_rt_sigreturn/ptregs
16	64	ioctl			sys_ioctl
17	common	pread64			sys_pread64
18	common	pwrite64		sys_pwrite64
@@ -62,10 +62,10 @@
53	common	socketpair		sys_socketpair
54	64	setsockopt		sys_setsockopt
55	64	getsockopt		sys_getsockopt
56	common	clone			stub_clone
57	common	fork			stub_fork
58	common	vfork			stub_vfork
59	64	execve			stub_execve
56	common	clone			stub_clone/ptregs
57	common	fork			stub_fork/ptregs
58	common	vfork			stub_vfork/ptregs
59	64	execve			stub_execve/ptregs
60	common	exit			sys_exit
61	common	wait4			sys_wait4
62	common	kill			sys_kill
@@ -328,7 +328,7 @@
319	common	memfd_create		sys_memfd_create
320	common	kexec_file_load		sys_kexec_file_load
321	common	bpf			sys_bpf
322	64	execveat		stub_execveat
322	64	execveat		stub_execveat/ptregs
323	common	userfaultfd		sys_userfaultfd
324	common	membarrier		sys_membarrier
325	common	mlock2			sys_mlock2
@@ -346,7 +346,7 @@
517	x32	recvfrom		compat_sys_recvfrom
518	x32	sendmsg			compat_sys_sendmsg
519	x32	recvmsg			compat_sys_recvmsg
520	x32	execve			stub_x32_execve
520	x32	execve			stub_x32_execve/ptregs
521	x32	ptrace			compat_sys_ptrace
522	x32	rt_sigpending		compat_sys_rt_sigpending
523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait
@@ -371,4 +371,4 @@
542	x32	getsockopt		compat_sys_getsockopt
543	x32	io_setup		compat_sys_io_setup
544	x32	io_submit		compat_sys_io_submit
545	x32	execveat		stub_x32_execveat
545	x32	execveat		stub_x32_execveat/ptregs