Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fa697140 authored by Dominik Brodowski's avatar Dominik Brodowski Committed by Ingo Molnar
Browse files

syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls



Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems:

Each syscall defines a stub which takes struct pt_regs as its only
argument. It decodes just those parameters it needs, e.g:

	asmlinkage long sys_xyzzy(const struct pt_regs *regs)
	{
		return SyS_xyzzy(regs->di, regs->si, regs->dx);
	}

This approach avoids leaking random user-provided register content down
the call chain.

For example, for sys_recv() which is a 4-parameter syscall, the assembly
now is (in slightly reordered fashion):

	<sys_recv>:
		callq	<__fentry__>

		/* decode regs->di, ->si, ->dx and ->r10 */
		mov	0x70(%rdi),%rdi
		mov	0x68(%rdi),%rsi
		mov	0x60(%rdi),%rdx
		mov	0x38(%rdi),%rcx

		[ SyS_recv() is automatically inlined by the compiler,
		  as it is not [yet] used anywhere else ]
		/* clear %r9 and %r8, the 5th and 6th args */
		xor	%r9d,%r9d
		xor	%r8d,%r8d

		/* do the actual work */
		callq	__sys_recvfrom

		/* cleanup and return */
		cltq
		retq

The only valid place in an x86-64 kernel which rightfully calls
a syscall function on its own -- vsyscall -- needs to be modified
to pass struct pt_regs onwards as well.

To keep the syscall table generation working independent of
SYSCALL_PTREGS being enabled, the stubs are named the same as the
"original" syscall stubs, i.e. sys_*().

This patch is based on an original proof-of-concept

 | From: Linus Torvalds <torvalds@linux-foundation.org>
 | Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>

and was split up and heavily modified by me, in particular to base it on
ARCH_HAS_SYSCALL_WRAPPER, to limit it to 64-bit-only for the time being,
and to update the vsyscall to the new calling convention.

Signed-off-by: default avatarDominik Brodowski <linux@dominikbrodowski.net>
Acked-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180405095307.3730-4-linux@dominikbrodowski.net


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 1bd21c6c
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -2954,3 +2954,8 @@ source "crypto/Kconfig"
source "arch/x86/kvm/Kconfig"

source "lib/Kconfig"

config SYSCALL_PTREGS
	def_bool y
	depends on X86_64 && !COMPAT
	select ARCH_HAS_SYSCALL_WRAPPER
+4 −0
Original line number Diff line number Diff line
@@ -284,9 +284,13 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
	nr &= __SYSCALL_MASK;
	if (likely(nr < NR_syscalls)) {
		nr = array_index_nospec(nr, NR_syscalls);
#ifdef CONFIG_SYSCALL_PTREGS
		regs->ax = sys_call_table[nr](regs);
#else
		regs->ax = sys_call_table[nr](
			regs->di, regs->si, regs->dx,
			regs->r10, regs->r8, regs->r9);
#endif
	}

	syscall_return_slowpath(regs);
+7 −2
Original line number Diff line number Diff line
@@ -7,14 +7,19 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>

#ifdef CONFIG_SYSCALL_PTREGS
/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
#else /* CONFIG_SYSCALL_PTREGS */
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#endif /* CONFIG_SYSCALL_PTREGS */
#include <asm/syscalls_64.h>
#undef __SYSCALL_64

#define __SYSCALL_64(nr, sym, qual) [nr] = sym,

extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);

asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
	/*
	 * Smells like a compiler bug -- it doesn't work
+22 −0
Original line number Diff line number Diff line
@@ -127,6 +127,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
	int vsyscall_nr, syscall_nr, tmp;
	int prev_sig_on_uaccess_err;
	long ret;
#ifdef CONFIG_SYSCALL_PTREGS
	unsigned long orig_dx;
#endif

	/*
	 * No point in checking CS -- the only way to get here is a user mode
@@ -227,19 +230,38 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
	ret = -EFAULT;
	switch (vsyscall_nr) {
	case 0:
#ifdef CONFIG_SYSCALL_PTREGS
		/* this decodes regs->di and regs->si on its own */
		ret = sys_gettimeofday(regs);
#else
		ret = sys_gettimeofday(
			(struct timeval __user *)regs->di,
			(struct timezone __user *)regs->si);
#endif /* CONFIG_SYSCALL_PTREGS */
		break;

	case 1:
#ifdef CONFIG_SYSCALL_PTREGS
		/* this decodes regs->di on its own */
		ret = sys_time(regs);
#else
		ret = sys_time((time_t __user *)regs->di);
#endif /* CONFIG_SYSCALL_PTREGS */
		break;

	case 2:
#ifdef CONFIG_SYSCALL_PTREGS
		/* while we could clobber regs->dx, we didn't in the past... */
		orig_dx = regs->dx;
		regs->dx = 0;
		/* this decodes regs->di, regs->si and regs->dx on its own */
		ret = sys_getcpu(regs);
		regs->dx = orig_dx;
#else
		ret = sys_getcpu((unsigned __user *)regs->di,
				 (unsigned __user *)regs->si,
				 NULL);
#endif /* CONFIG_SYSCALL_PTREGS */
		break;
	}

+4 −0
Original line number Diff line number Diff line
@@ -20,9 +20,13 @@
#include <asm/thread_info.h>	/* for TS_COMPAT */
#include <asm/unistd.h>

#ifdef CONFIG_SYSCALL_PTREGS
typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
#else
typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
					  unsigned long, unsigned long,
					  unsigned long, unsigned long);
#endif /* CONFIG_SYSCALL_PTREGS */
extern const sys_call_ptr_t sys_call_table[];

#if defined(CONFIG_X86_32)
Loading