Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6365b842 authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Thomas Gleixner
Browse files

x86/syscalls: Split the x32 syscalls into their own table



For unfortunate historical reasons, the x32 syscalls and the x86_64
syscalls are not all numbered the same.  As an example, ioctl() is nr 16 on
x86_64 but 514 on x32.

This has potentially nasty consequences, since it means that there are two
valid RAX values to do ioctl(2) and two invalid RAX values.  The valid
values are 16 (i.e. ioctl(2) using the x86_64 ABI) and (514 | 0x40000000)
(i.e. ioctl(2) using the x32 ABI).

The invalid values are 514 and (16 | 0x40000000).  514 will enter the
"COMPAT_SYSCALL_DEFINE3(ioctl, ...)" entry point with in_compat_syscall()
and in_x32_syscall() returning false, whereas (16 | 0x40000000) will enter
the native entry point with in_compat_syscall() and in_x32_syscall()
returning true.  Both are bogus, and both will exercise code paths in the
kernel and in any running seccomp filters that really ought to be
unreachable.

Splitting out the x32 syscalls into their own tables, allows both bogus
invocations to return -ENOSYS.  I've checked glibc, musl, and Bionic, and
all of them appear to call syscalls with their correct numbers, so this
change should have no effect on them.

There is an added benefit going forward: new syscalls that need special
handling on x32 can share the same number on x32 and x86_64.  This means
that the special syscall range 512-547 can be treated as a legacy wart
instead of something that may need to be extended in the future.

Also add a selftest to verify the new behavior.

Signed-off-by: default avatarAndy Lutomirski <luto@kernel.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/208024256b764312598f014ebfb0a42472c19354.1562185330.git.luto@kernel.org
parent f85a8573
Loading
Loading
Loading
Loading
+7 −6
Original line number Diff line number Diff line
@@ -285,15 +285,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
		nr = syscall_trace_enter(regs);

	/*
	 * NB: Native and x32 syscalls are dispatched from the same
	 * table.  The only functional difference is the x32 bit in
	 * regs->orig_ax, which changes the behavior of some syscalls.
	 */
	nr &= __SYSCALL_MASK;
	if (likely(nr < NR_syscalls)) {
		nr = array_index_nospec(nr, NR_syscalls);
		regs->ax = sys_call_table[nr](regs);
#ifdef CONFIG_X86_X32_ABI
	} else if (likely((nr & __X32_SYSCALL_BIT) &&
			  (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
		nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
					X32_NR_syscalls);
		regs->ax = x32_sys_call_table[nr](regs);
#endif
	}

	syscall_return_slowpath(regs);
+25 −0
Original line number Diff line number Diff line
@@ -10,10 +10,13 @@
/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
#undef __SYSCALL_X32

#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
#define __SYSCALL_X32(nr, sym, qual)

asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
	/*
@@ -23,3 +26,25 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
	[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_64.h>
};

#undef __SYSCALL_64
#undef __SYSCALL_X32

#ifdef CONFIG_X86_X32_ABI

#define __SYSCALL_64(nr, sym, qual)
#define __SYSCALL_X32(nr, sym, qual) [nr] = sym,

asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
	/*
	 * Smells like a compiler bug -- it doesn't work
	 * when the & below is removed.
	 */
	[0 ... __NR_syscall_x32_max] = &sys_ni_syscall,
#include <asm/syscalls_64.h>
};

#undef __SYSCALL_64
#undef __SYSCALL_X32

#endif
+17 −14
Original line number Diff line number Diff line
#!/bin/sh
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0

in="$1"
out="$2"

syscall_macro() {
    abi="$1"
    nr="$2"
    entry="$3"
    local abi="$1"
    local nr="$2"
    local entry="$3"

    # Entry can be either just a function name or "function/qualifier"
    real_entry="${entry%%/*}"
@@ -21,11 +21,11 @@ syscall_macro() {
}

emit() {
    abi="$1"
    nr="$2"
    entry="$3"
    compat="$4"
    umlentry=""
    local abi="$1"
    local nr="$2"
    local entry="$3"
    local compat="$4"
    local umlentry=""

    if [ "$abi" != "I386" -a -n "$compat" ]; then
	echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
@@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | (
    while read nr abi name entry compat; do
	abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
	if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
	    # COMMON is the same as 64, except that we don't expect X32
	    # programs to use it.  Our expectation has nothing to do with
	    # any generated code, so treat them the same.
	    emit 64 "$nr" "$entry" "$compat"
	    if [ "$abi" = "COMMON" ]; then
		# COMMON means that this syscall exists in the same form for
		# 64-bit and X32.
		echo "#ifdef CONFIG_X86_X32_ABI"
		emit X32 "$nr" "$entry" "$compat"
		echo "#endif"
	    fi
	elif [ "$abi" = "X32" ]; then
	    # X32 is equivalent to 64 on an X32-compatible kernel.
	    echo "#ifdef CONFIG_X86_X32_ABI"
	    emit 64 "$nr" "$entry" "$compat"
	    emit X32 "$nr" "$entry" "$compat"
	    echo "#endif"
	elif [ "$abi" = "I386" ]; then
	    emit "$abi" "$nr" "$entry" "$compat"
+4 −0
Original line number Diff line number Diff line
@@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[];
extern const sys_call_ptr_t ia32_sys_call_table[];
#endif

#ifdef CONFIG_X86_X32_ABI
extern const sys_call_ptr_t x32_sys_call_table[];
#endif

/*
 * Only the low 32 bits of orig_ax are meaningful, so we return int.
 * This importantly ignores the high bits on 64-bit, so comparisons
+0 −6
Original line number Diff line number Diff line
@@ -5,12 +5,6 @@
#include <uapi/asm/unistd.h>


# ifdef CONFIG_X86_X32_ABI
#  define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
# else
#  define __SYSCALL_MASK (~0)
# endif

# ifdef CONFIG_X86_32

#  include <asm/unistd_32.h>
Loading