Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c7c4591d authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull namespace changes from Eric Biederman:
 "This is an assorted mishmash of small cleanups, enhancements and bug
  fixes.

  The major theme is user namespace mount restrictions.  nsown_capable
  is killed as it encourages not thinking about details that need to be
  considered.  A very hard to hit pid namespace exiting bug was finally
  tracked and fixed.  A couple of cleanups to the basic namespace
  infrastructure.

  Finally there is an enhancement that makes per user namespace
  capabilities usable as capabilities, and an enhancement that allows
  the per userns root to nice other processes in the user namespace"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  userns:  Kill nsown_capable it makes the wrong thing easy
  capabilities: allow nice if we are privileged
  pidns: Don't have unshare(CLONE_NEWPID) imply CLONE_THREAD
  userns: Allow PR_CAPBSET_DROP in a user namespace.
  namespaces: Simplify copy_namespaces so it is clear what is going on.
  pidns: Fix hang in zap_pid_ns_processes by sending a potentially extra wakeup
  sysfs: Restrict mounting sysfs
  userns: Better restrictions on when proc and sysfs can be mounted
  vfs: Don't copy mount bind mounts of /proc/<pid>/ns/mnt between namespaces
  kernel/nsproxy.c: Improving a snippet of code.
  proc: Restrict mounting the proc filesystem
  vfs: Lock in place mounts from more privileged users
parents 11c7b03d c7b96acf
Loading
Loading
Loading
Loading
+95 −26
Original line number Diff line number Diff line
@@ -831,6 +831,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
	if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
		mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;

	/* Don't allow unprivileged users to reveal what is under a mount */
	if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
		mnt->mnt.mnt_flags |= MNT_LOCKED;

	atomic_inc(&sb->s_active);
	mnt->mnt.mnt_sb = sb;
	mnt->mnt.mnt_root = dget(root);
@@ -1327,6 +1331,8 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
		goto dput_and_out;
	if (!check_mnt(mnt))
		goto dput_and_out;
	if (mnt->mnt.mnt_flags & MNT_LOCKED)
		goto dput_and_out;

	retval = do_umount(mnt, flags);
dput_and_out:
@@ -1349,14 +1355,11 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)

#endif

static bool mnt_ns_loop(struct path *path)
static bool is_mnt_ns_file(struct dentry *dentry)
{
	/* Could bind mounting the mount namespace inode cause a
	 * mount namespace loop?
	 */
	struct inode *inode = path->dentry->d_inode;
	/* Is this a proxy for a mount namespace? */
	struct inode *inode = dentry->d_inode;
	struct proc_ns *ei;
	struct mnt_namespace *mnt_ns;

	if (!proc_ns_inode(inode))
		return false;
@@ -1365,7 +1368,19 @@ static bool mnt_ns_loop(struct path *path)
	if (ei->ns_ops != &mntns_operations)
		return false;

	mnt_ns = ei->ns;
	return true;
}

static bool mnt_ns_loop(struct dentry *dentry)
{
	/* Could bind mounting the mount namespace inode cause a
	 * mount namespace loop?
	 */
	struct mnt_namespace *mnt_ns;
	if (!is_mnt_ns_file(dentry))
		return false;

	mnt_ns = get_proc_ns(dentry->d_inode)->ns;
	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}

@@ -1374,13 +1389,17 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
{
	struct mount *res, *p, *q, *r, *parent;

	if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
	if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
		return ERR_PTR(-EINVAL);

	if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
		return ERR_PTR(-EINVAL);

	res = q = clone_mnt(mnt, dentry, flag);
	if (IS_ERR(q))
		return q;

	q->mnt.mnt_flags &= ~MNT_LOCKED;
	q->mnt_mountpoint = mnt->mnt_mountpoint;

	p = mnt;
@@ -1390,7 +1409,13 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
			continue;

		for (s = r; s; s = next_mnt(s, r)) {
			if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
			if (!(flag & CL_COPY_UNBINDABLE) &&
			    IS_MNT_UNBINDABLE(s)) {
				s = skip_mnt_tree(s);
				continue;
			}
			if (!(flag & CL_COPY_MNT_NS_FILE) &&
			    is_mnt_ns_file(s->mnt.mnt_root)) {
				s = skip_mnt_tree(s);
				continue;
			}
@@ -1696,6 +1721,19 @@ static int do_change_type(struct path *path, int flag)
	return err;
}

static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
	struct mount *child;
	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
		if (!is_subdir(child->mnt_mountpoint, dentry))
			continue;

		if (child->mnt.mnt_flags & MNT_LOCKED)
			return true;
	}
	return false;
}

/*
 * do loopback mount.
 */
@@ -1713,7 +1751,7 @@ static int do_loopback(struct path *path, const char *old_name,
		return err;

	err = -EINVAL;
	if (mnt_ns_loop(&old_path))
	if (mnt_ns_loop(old_path.dentry))
		goto out; 

	mp = lock_mount(path);
@@ -1731,8 +1769,11 @@ static int do_loopback(struct path *path, const char *old_name,
	if (!check_mnt(parent) || !check_mnt(old))
		goto out2;

	if (!recurse && has_locked_children(old, old_path.dentry))
		goto out2;

	if (recurse)
		mnt = copy_tree(old, old_path.dentry, 0);
		mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
	else
		mnt = clone_mnt(old, old_path.dentry, 0);

@@ -1741,6 +1782,8 @@ static int do_loopback(struct path *path, const char *old_name,
		goto out2;
	}

	mnt->mnt.mnt_flags &= ~MNT_LOCKED;

	err = graft_tree(mnt, parent, mp);
	if (err) {
		br_write_lock(&vfsmount_lock);
@@ -1853,6 +1896,9 @@ static int do_move_mount(struct path *path, const char *old_name)
	if (!check_mnt(p) || !check_mnt(old))
		goto out1;

	if (old->mnt.mnt_flags & MNT_LOCKED)
		goto out1;

	err = -EINVAL;
	if (old_path.dentry != old_path.mnt->mnt_root)
		goto out1;
@@ -2389,7 +2435,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,

	namespace_lock();
	/* First pass: copy the tree topology */
	copy_flags = CL_COPY_ALL | CL_EXPIRE;
	copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
	if (user_ns != mnt_ns->user_ns)
		copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
@@ -2424,6 +2470,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
		}
		p = next_mnt(p, old);
		q = next_mnt(q, new);
		if (!q)
			break;
		while (p->mnt.mnt_root != q->mnt.mnt_root)
			p = next_mnt(p, old);
	}
	namespace_unlock();

@@ -2630,6 +2680,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
		goto out4;
	if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
		goto out4;
	if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
		goto out4;
	error = -ENOENT;
	if (d_unlinked(new.dentry))
		goto out4;
@@ -2653,6 +2705,10 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
	br_write_lock(&vfsmount_lock);
	detach_mnt(new_mnt, &parent_path);
	detach_mnt(root_mnt, &root_parent);
	if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
		new_mnt->mnt.mnt_flags |= MNT_LOCKED;
		root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
	}
	/* mount old root on put_old */
	attach_mnt(root_mnt, old_mnt, old_mp);
	/* mount new_root on / */
@@ -2811,25 +2867,38 @@ bool current_chrooted(void)
	return chrooted;
}

void update_mnt_policy(struct user_namespace *userns)
bool fs_fully_visible(struct file_system_type *type)
{
	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
	struct mount *mnt;
	bool visible = false;

	down_read(&namespace_sem);
	if (unlikely(!ns))
		return false;

	namespace_lock();
	list_for_each_entry(mnt, &ns->list, mnt_list) {
		switch (mnt->mnt.mnt_sb->s_magic) {
		case SYSFS_MAGIC:
			userns->may_mount_sysfs = true;
			break;
		case PROC_SUPER_MAGIC:
			userns->may_mount_proc = true;
			break;
		struct mount *child;
		if (mnt->mnt.mnt_sb->s_type != type)
			continue;

		/* This mount is not fully visible if there are any child mounts
		 * that cover anything except for empty directories.
		 */
		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
			struct inode *inode = child->mnt_mountpoint->d_inode;
			if (!S_ISDIR(inode->i_mode))
				goto next;
			if (inode->i_nlink != 2)
				goto next;
		}
		if (userns->may_mount_sysfs && userns->may_mount_proc)
			break;
		visible = true;
		goto found;
	next:	;
	}
	up_read(&namespace_sem);
found:
	namespace_unlock();
	return visible;
}

static void *mntns_get(struct task_struct *task)
@@ -2860,8 +2929,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
	struct path root;

	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
	    !nsown_capable(CAP_SYS_CHROOT) ||
	    !nsown_capable(CAP_SYS_ADMIN))
	    !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
		return -EPERM;

	if (fs->users != 1)
+1 −1
Original line number Diff line number Diff line
@@ -443,7 +443,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
		goto dput_and_out;

	error = -EPERM;
	if (!nsown_capable(CAP_SYS_CHROOT))
	if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
		goto dput_and_out;
	error = security_path_chroot(&path);
	if (error)
+4 −1
Original line number Diff line number Diff line
@@ -19,11 +19,14 @@

#define CL_EXPIRE    		0x01
#define CL_SLAVE     		0x02
#define CL_COPY_ALL 		0x04
#define CL_COPY_UNBINDABLE	0x04
#define CL_MAKE_SHARED 		0x08
#define CL_PRIVATE 		0x10
#define CL_SHARED_TO_SLAVE	0x20
#define CL_UNPRIVILEGED		0x40
#define CL_COPY_MNT_NS_FILE	0x80

#define CL_COPY_ALL		(CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE)

static inline void set_mnt_shared(struct mount *mnt)
{
+5 −1
Original line number Diff line number Diff line
@@ -110,7 +110,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
		ns = task_active_pid_ns(current);
		options = data;

		if (!current_user_ns()->may_mount_proc)
		if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
			return ERR_PTR(-EPERM);

		/* Does the mounter have privilege over the pid namespace? */
		if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
			return ERR_PTR(-EPERM);
	}

+9 −2
Original line number Diff line number Diff line
@@ -112,9 +112,16 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
	struct super_block *sb;
	int error;

	if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
	if (!(flags & MS_KERNMOUNT)) {
		if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
			return ERR_PTR(-EPERM);

		for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
			if (!kobj_ns_current_may_mount(type))
				return ERR_PTR(-EPERM);
		}
	}

	info = kzalloc(sizeof(*info), GFP_KERNEL);
	if (!info)
		return ERR_PTR(-ENOMEM);
Loading