Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 13aa72f0 authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Kees Cook
Browse files

seccomp: Refactor the filter callback and the API



The reason I did this is to add a seccomp API that will be usable
for an x86 fast path.  The x86 entry code needs to use a rather
expensive slow path for a syscall that might be visible to things
like ptrace.  By splitting seccomp into two phases, we can check
whether we need the slow path and then use the fast path in if the
filter allows the syscall or just returns some errno.

As a side effect, I think the new code is much easier to understand
than the old code.

This has one user-visible effect: the audit record written for
SECCOMP_RET_TRACE is now a simple indication that SECCOMP_RET_TRACE
happened.  It used to depend in a complicated way on what the tracer
did.  I couldn't make much sense of it.

Signed-off-by: default avatarAndy Lutomirski <luto@amacapital.net>
Signed-off-by: default avatarKees Cook <keescook@chromium.org>
parent a4412fc9
Loading
Loading
Loading
Loading
+6 −0
Original line number Original line Diff line number Diff line
@@ -35,6 +35,12 @@ static inline int secure_computing(void)
		return  __secure_computing();
		return  __secure_computing();
	return 0;
	return 0;
}
}

#define SECCOMP_PHASE1_OK	0
#define SECCOMP_PHASE1_SKIP	1

extern u32 seccomp_phase1(void);
int seccomp_phase2(u32 phase1_result);
#else
#else
extern void secure_computing_strict(int this_syscall);
extern void secure_computing_strict(int this_syscall);
#endif
#endif
+124 −66
Original line number Original line Diff line number Diff line
@@ -21,8 +21,6 @@
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/syscalls.h>


/* #define SECCOMP_DEBUG 1 */

#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
#include <asm/syscall.h>
#include <asm/syscall.h>
#endif
#endif
@@ -601,10 +599,21 @@ void secure_computing_strict(int this_syscall)
#else
#else
int __secure_computing(void)
int __secure_computing(void)
{
{
	struct pt_regs *regs = task_pt_regs(current);
	u32 phase1_result = seccomp_phase1();
	int this_syscall = syscall_get_nr(current, regs);

	int exit_sig = 0;
	if (likely(phase1_result == SECCOMP_PHASE1_OK))
	u32 ret;
		return 0;
	else if (likely(phase1_result == SECCOMP_PHASE1_SKIP))
		return -1;
	else
		return seccomp_phase2(phase1_result);
}

#ifdef CONFIG_SECCOMP_FILTER
static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs)
{
	u32 filter_ret, action;
	int data;


	/*
	/*
	 * Make sure that any changes to mode from another thread have
	 * Make sure that any changes to mode from another thread have
@@ -612,35 +621,108 @@ int __secure_computing(void)
	 */
	 */
	rmb();
	rmb();


	switch (current->seccomp.mode) {
	filter_ret = seccomp_run_filters();
	case SECCOMP_MODE_STRICT:
	data = filter_ret & SECCOMP_RET_DATA;
		__secure_computing_strict(this_syscall);
	action = filter_ret & SECCOMP_RET_ACTION;
		return 0;

#ifdef CONFIG_SECCOMP_FILTER
	switch (action) {
	case SECCOMP_MODE_FILTER: {
		int data;
		ret = seccomp_run_filters();
		data = ret & SECCOMP_RET_DATA;
		ret &= SECCOMP_RET_ACTION;
		switch (ret) {
	case SECCOMP_RET_ERRNO:
	case SECCOMP_RET_ERRNO:
		/* Set the low-order 16-bits as a errno. */
		/* Set the low-order 16-bits as a errno. */
		syscall_set_return_value(current, regs,
		syscall_set_return_value(current, regs,
					 -data, 0);
					 -data, 0);
		goto skip;
		goto skip;

	case SECCOMP_RET_TRAP:
	case SECCOMP_RET_TRAP:
		/* Show the handler the original registers. */
		/* Show the handler the original registers. */
		syscall_rollback(current, regs);
		syscall_rollback(current, regs);
		/* Let the filter pass back 16 bits of data. */
		/* Let the filter pass back 16 bits of data. */
		seccomp_send_sigsys(this_syscall, data);
		seccomp_send_sigsys(this_syscall, data);
		goto skip;
		goto skip;

	case SECCOMP_RET_TRACE:
	case SECCOMP_RET_TRACE:
		return filter_ret;  /* Save the rest for phase 2. */

	case SECCOMP_RET_ALLOW:
		return SECCOMP_PHASE1_OK;

	case SECCOMP_RET_KILL:
	default:
		audit_seccomp(this_syscall, SIGSYS, action);
		do_exit(SIGSYS);
	}

	unreachable();

skip:
	audit_seccomp(this_syscall, 0, action);
	return SECCOMP_PHASE1_SKIP;
}
#endif

/**
 * seccomp_phase1() - run fast path seccomp checks on the current syscall
 *
 * This only reads pt_regs via the syscall_xyz helpers.  The only change
 * it will make to pt_regs is via syscall_set_return_value, and it will
 * only do that if it returns SECCOMP_PHASE1_SKIP.
 *
 * It may also call do_exit or force a signal; these actions must be
 * safe.
 *
 * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should
 * be processed normally.
 *
 * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be
 * invoked.  In this case, seccomp_phase1 will have set the return value
 * using syscall_set_return_value.
 *
 * If it returns anything else, then the return value should be passed
 * to seccomp_phase2 from a context in which ptrace hooks are safe.
 */
u32 seccomp_phase1(void)
{
	int mode = current->seccomp.mode;
	struct pt_regs *regs = task_pt_regs(current);
	int this_syscall = syscall_get_nr(current, regs);

	switch (mode) {
	case SECCOMP_MODE_STRICT:
		__secure_computing_strict(this_syscall);  /* may call do_exit */
		return SECCOMP_PHASE1_OK;
#ifdef CONFIG_SECCOMP_FILTER
	case SECCOMP_MODE_FILTER:
		return __seccomp_phase1_filter(this_syscall, regs);
#endif
	default:
		BUG();
	}
}

/**
 * seccomp_phase2() - finish slow path seccomp work for the current syscall
 * @phase1_result: The return value from seccomp_phase1()
 *
 * This must be called from a context in which ptrace hooks can be used.
 *
 * Returns 0 if the syscall should be processed or -1 to skip the syscall.
 */
int seccomp_phase2(u32 phase1_result)
{
	struct pt_regs *regs = task_pt_regs(current);
	u32 action = phase1_result & SECCOMP_RET_ACTION;
	int data = phase1_result & SECCOMP_RET_DATA;

	BUG_ON(action != SECCOMP_RET_TRACE);

	audit_seccomp(syscall_get_nr(current, regs), 0, action);

	/* Skip these calls if there is no tracer. */
	/* Skip these calls if there is no tracer. */
	if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
	if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
		syscall_set_return_value(current, regs,
		syscall_set_return_value(current, regs,
					 -ENOSYS, 0);
					 -ENOSYS, 0);
				goto skip;
		return -1;
	}
	}

	/* Allow the BPF to provide the event message */
	/* Allow the BPF to provide the event message */
	ptrace_event(PTRACE_EVENT_SECCOMP, data);
	ptrace_event(PTRACE_EVENT_SECCOMP, data);
	/*
	/*
@@ -650,35 +732,11 @@ int __secure_computing(void)
	 * call that may not be intended.
	 * call that may not be intended.
	 */
	 */
	if (fatal_signal_pending(current))
	if (fatal_signal_pending(current))
				break;
		do_exit(SIGSYS);
	if (syscall_get_nr(current, regs) < 0)
	if (syscall_get_nr(current, regs) < 0)
				goto skip;  /* Explicit request to skip. */
		return -1;  /* Explicit request to skip. */


	return 0;
	return 0;
		case SECCOMP_RET_ALLOW:
			return 0;
		case SECCOMP_RET_KILL:
		default:
			break;
		}
		exit_sig = SIGSYS;
		break;
	}
#endif
	default:
		BUG();
	}

#ifdef SECCOMP_DEBUG
	dump_stack();
#endif
	audit_seccomp(this_syscall, exit_sig, ret);
	do_exit(exit_sig);
#ifdef CONFIG_SECCOMP_FILTER
skip:
	audit_seccomp(this_syscall, exit_sig, ret);
	return -1;
#endif
}
}
#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */