Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 51f39a1f authored by David Drysdale's avatar David Drysdale Committed by Linus Torvalds
Browse files

syscalls: implement execveat() system call

This patchset adds execveat(2) for x86, and is derived from Meredydd
Luff's patch from Sept 2012 (https://lkml.org/lkml/2012/9/11/528).

The primary aim of adding an execveat syscall is to allow an
implementation of fexecve(3) that does not rely on the /proc filesystem,
at least for executables (rather than scripts).  The current glibc version
of fexecve(3) is implemented via /proc, which causes problems in sandboxed
or otherwise restricted environments.

Given the desire for a /proc-free fexecve() implementation, HPA suggested
(https://lkml.org/lkml/2006/7/11/556) that an execveat(2) syscall would be
an appropriate generalization.

Also, having a new syscall means that it can take a flags argument without
back-compatibility concerns.  The current implementation just defines the
AT_EMPTY_PATH and AT_SYMLINK_NOFOLLOW flags, but other flags could be
added in future -- for example, flags for new namespaces (as suggested at
https://lkml.org/lkml/2006/7/11/474).

Related history:
 - https://lkml.org/lkml/2006/12/27/123 is an example of someone
   realizing that fexecve() is likely to fail in a chroot environment.
 - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=514043 covered
   documenting the /proc requirement of fexecve(3) in its manpage, to
   "prevent other people from wasting their time".
 - https://bugzilla.redhat.com/show_bug.cgi?id=241609

 described a
   problem where a process that did setuid() could not fexecve()
   because it no longer had access to /proc/self/fd; this has since
   been fixed.

This patch (of 4):

Add a new execveat(2) system call.  execveat() is to execve() as openat()
is to open(): it takes a file descriptor that refers to a directory, and
resolves the filename relative to that.

In addition, if the filename is empty and AT_EMPTY_PATH is specified,
execveat() executes the file to which the file descriptor refers.  This
replicates the functionality of fexecve(), which is a system call in other
UNIXen, but in Linux glibc it depends on opening "/proc/self/fd/<fd>" (and
so relies on /proc being mounted).

The filename fed to the executed program as argv[0] (or the name of the
script fed to a script interpreter) will be of the form "/dev/fd/<fd>"
(for an empty filename) or "/dev/fd/<fd>/<filename>", effectively
reflecting how the executable was found.  This does however mean that
execution of a script in a /proc-less environment won't work; also, script
execution via an O_CLOEXEC file descriptor fails (as the file will not be
accessible after exec).

Based on patches by Meredydd Luff.

Signed-off-by: default avatarDavid Drysdale <drysdale@google.com>
Cc: Meredydd Luff <meredydd@senatehouse.org>
Cc: Shuah Khan <shuah.kh@samsung.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Rich Felker <dalias@aerifal.cx>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent c0ef0cc9
Loading
Loading
Loading
Loading
+4 −0
Original line number Original line Diff line number Diff line
@@ -42,6 +42,10 @@ static int load_em86(struct linux_binprm *bprm)
			return -ENOEXEC;
			return -ENOEXEC;
	}
	}


	/* Need to be able to load the file after exec */
	if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
		return -ENOENT;

	allow_write_access(bprm->file);
	allow_write_access(bprm->file);
	fput(bprm->file);
	fput(bprm->file);
	bprm->file = NULL;
	bprm->file = NULL;
+4 −0
Original line number Original line Diff line number Diff line
@@ -144,6 +144,10 @@ static int load_misc_binary(struct linux_binprm *bprm)
	if (!fmt)
	if (!fmt)
		goto ret;
		goto ret;


	/* Need to be able to load the file after exec */
	if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
		return -ENOENT;

	if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
	if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
		retval = remove_arg_zero(bprm);
		retval = remove_arg_zero(bprm);
		if (retval)
		if (retval)
+10 −0
Original line number Original line Diff line number Diff line
@@ -24,6 +24,16 @@ static int load_script(struct linux_binprm *bprm)


	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
		return -ENOEXEC;
		return -ENOEXEC;

	/*
	 * If the script filename will be inaccessible after exec, typically
	 * because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give
	 * up now (on the assumption that the interpreter will want to load
	 * this file).
	 */
	if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
		return -ENOENT;

	/*
	/*
	 * This section does the #! interpretation.
	 * This section does the #! interpretation.
	 * Sorta complicated, but hopefully it will work.  -TYT
	 * Sorta complicated, but hopefully it will work.  -TYT
+100 −13
Original line number Original line Diff line number Diff line
@@ -748,18 +748,25 @@ EXPORT_SYMBOL(setup_arg_pages);


#endif /* CONFIG_MMU */
#endif /* CONFIG_MMU */


static struct file *do_open_exec(struct filename *name)
static struct file *do_open_execat(int fd, struct filename *name, int flags)
{
{
	struct file *file;
	struct file *file;
	int err;
	int err;
	static const struct open_flags open_exec_flags = {
	struct open_flags open_exec_flags = {
		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
		.acc_mode = MAY_EXEC | MAY_OPEN,
		.acc_mode = MAY_EXEC | MAY_OPEN,
		.intent = LOOKUP_OPEN,
		.intent = LOOKUP_OPEN,
		.lookup_flags = LOOKUP_FOLLOW,
		.lookup_flags = LOOKUP_FOLLOW,
	};
	};


	file = do_filp_open(AT_FDCWD, name, &open_exec_flags);
	if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
		return ERR_PTR(-EINVAL);
	if (flags & AT_SYMLINK_NOFOLLOW)
		open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
	if (flags & AT_EMPTY_PATH)
		open_exec_flags.lookup_flags |= LOOKUP_EMPTY;

	file = do_filp_open(fd, name, &open_exec_flags);
	if (IS_ERR(file))
	if (IS_ERR(file))
		goto out;
		goto out;


@@ -770,12 +777,13 @@ static struct file *do_open_exec(struct filename *name)
	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
		goto exit;
		goto exit;


	fsnotify_open(file);

	err = deny_write_access(file);
	err = deny_write_access(file);
	if (err)
	if (err)
		goto exit;
		goto exit;


	if (name->name[0] != '\0')
		fsnotify_open(file);

out:
out:
	return file;
	return file;


@@ -787,7 +795,7 @@ static struct file *do_open_exec(struct filename *name)
struct file *open_exec(const char *name)
struct file *open_exec(const char *name)
{
{
	struct filename tmp = { .name = name };
	struct filename tmp = { .name = name };
	return do_open_exec(&tmp);
	return do_open_execat(AT_FDCWD, &tmp, 0);
}
}
EXPORT_SYMBOL(open_exec);
EXPORT_SYMBOL(open_exec);


@@ -1428,10 +1436,12 @@ static int exec_binprm(struct linux_binprm *bprm)
/*
/*
 * sys_execve() executes a new program.
 * sys_execve() executes a new program.
 */
 */
static int do_execve_common(struct filename *filename,
static int do_execveat_common(int fd, struct filename *filename,
			      struct user_arg_ptr argv,
			      struct user_arg_ptr argv,
				struct user_arg_ptr envp)
			      struct user_arg_ptr envp,
			      int flags)
{
{
	char *pathbuf = NULL;
	struct linux_binprm *bprm;
	struct linux_binprm *bprm;
	struct file *file;
	struct file *file;
	struct files_struct *displaced;
	struct files_struct *displaced;
@@ -1472,7 +1482,7 @@ static int do_execve_common(struct filename *filename,
	check_unsafe_exec(bprm);
	check_unsafe_exec(bprm);
	current->in_execve = 1;
	current->in_execve = 1;


	file = do_open_exec(filename);
	file = do_open_execat(fd, filename, flags);
	retval = PTR_ERR(file);
	retval = PTR_ERR(file);
	if (IS_ERR(file))
	if (IS_ERR(file))
		goto out_unmark;
		goto out_unmark;
@@ -1480,7 +1490,28 @@ static int do_execve_common(struct filename *filename,
	sched_exec();
	sched_exec();


	bprm->file = file;
	bprm->file = file;
	bprm->filename = bprm->interp = filename->name;
	if (fd == AT_FDCWD || filename->name[0] == '/') {
		bprm->filename = filename->name;
	} else {
		if (filename->name[0] == '\0')
			pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d", fd);
		else
			pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d/%s",
					    fd, filename->name);
		if (!pathbuf) {
			retval = -ENOMEM;
			goto out_unmark;
		}
		/*
		 * Record that a name derived from an O_CLOEXEC fd will be
		 * inaccessible after exec. Relies on having exclusive access to
		 * current->files (due to unshare_files above).
		 */
		if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
			bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
		bprm->filename = pathbuf;
	}
	bprm->interp = bprm->filename;


	retval = bprm_mm_init(bprm);
	retval = bprm_mm_init(bprm);
	if (retval)
	if (retval)
@@ -1521,6 +1552,7 @@ static int do_execve_common(struct filename *filename,
	acct_update_integrals(current);
	acct_update_integrals(current);
	task_numa_free(current);
	task_numa_free(current);
	free_bprm(bprm);
	free_bprm(bprm);
	kfree(pathbuf);
	putname(filename);
	putname(filename);
	if (displaced)
	if (displaced)
		put_files_struct(displaced);
		put_files_struct(displaced);
@@ -1538,6 +1570,7 @@ static int do_execve_common(struct filename *filename,


out_free:
out_free:
	free_bprm(bprm);
	free_bprm(bprm);
	kfree(pathbuf);


out_files:
out_files:
	if (displaced)
	if (displaced)
@@ -1553,7 +1586,18 @@ int do_execve(struct filename *filename,
{
{
	struct user_arg_ptr argv = { .ptr.native = __argv };
	struct user_arg_ptr argv = { .ptr.native = __argv };
	struct user_arg_ptr envp = { .ptr.native = __envp };
	struct user_arg_ptr envp = { .ptr.native = __envp };
	return do_execve_common(filename, argv, envp);
	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
}

int do_execveat(int fd, struct filename *filename,
		const char __user *const __user *__argv,
		const char __user *const __user *__envp,
		int flags)
{
	struct user_arg_ptr argv = { .ptr.native = __argv };
	struct user_arg_ptr envp = { .ptr.native = __envp };

	return do_execveat_common(fd, filename, argv, envp, flags);
}
}


#ifdef CONFIG_COMPAT
#ifdef CONFIG_COMPAT
@@ -1569,7 +1613,23 @@ static int compat_do_execve(struct filename *filename,
		.is_compat = true,
		.is_compat = true,
		.ptr.compat = __envp,
		.ptr.compat = __envp,
	};
	};
	return do_execve_common(filename, argv, envp);
	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
}

static int compat_do_execveat(int fd, struct filename *filename,
			      const compat_uptr_t __user *__argv,
			      const compat_uptr_t __user *__envp,
			      int flags)
{
	struct user_arg_ptr argv = {
		.is_compat = true,
		.ptr.compat = __argv,
	};
	struct user_arg_ptr envp = {
		.is_compat = true,
		.ptr.compat = __envp,
	};
	return do_execveat_common(fd, filename, argv, envp, flags);
}
}
#endif
#endif


@@ -1609,6 +1669,20 @@ SYSCALL_DEFINE3(execve,
{
{
	return do_execve(getname(filename), argv, envp);
	return do_execve(getname(filename), argv, envp);
}
}

SYSCALL_DEFINE5(execveat,
		int, fd, const char __user *, filename,
		const char __user *const __user *, argv,
		const char __user *const __user *, envp,
		int, flags)
{
	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;

	return do_execveat(fd,
			   getname_flags(filename, lookup_flags, NULL),
			   argv, envp, flags);
}

#ifdef CONFIG_COMPAT
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
	const compat_uptr_t __user *, argv,
	const compat_uptr_t __user *, argv,
@@ -1616,4 +1690,17 @@ COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
{
{
	return compat_do_execve(getname(filename), argv, envp);
	return compat_do_execve(getname(filename), argv, envp);
}
}

COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
		       const char __user *, filename,
		       const compat_uptr_t __user *, argv,
		       const compat_uptr_t __user *, envp,
		       int,  flags)
{
	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;

	return compat_do_execveat(fd,
				  getname_flags(filename, lookup_flags, NULL),
				  argv, envp, flags);
}
#endif
#endif
+1 −1
Original line number Original line Diff line number Diff line
@@ -130,7 +130,7 @@ void final_putname(struct filename *name)


#define EMBEDDED_NAME_MAX	(PATH_MAX - sizeof(struct filename))
#define EMBEDDED_NAME_MAX	(PATH_MAX - sizeof(struct filename))


static struct filename *
struct filename *
getname_flags(const char __user *filename, int flags, int *empty)
getname_flags(const char __user *filename, int flags, int *empty)
{
{
	struct filename *result, *err;
	struct filename *result, *err;
Loading