Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8f502d5b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull usernamespace mount fixes from Eric Biederman:
 "Way back in October Andrey Vagin reported that umount(MNT_DETACH)
  could be used to defeat MNT_LOCKED.  As I worked to fix this I
  discovered that combined with mount propagation and an appropriate
  selection of shared subtrees a reference to a directory on an
  unmounted filesystem is not necessary.

  That MNT_DETACH is allowed in user namespace in a form that can break
  MNT_LOCKED comes from my early misunderstanding what MNT_DETACH does.

  To avoid breaking existing userspace the conflict between MNT_DETACH
  and MNT_LOCKED is fixed by leaving mounts that are locked to their
  parents in the mount hash table until the last reference goes away.

  While investigating this issue I also found an issue with
  __detach_mounts.  The code was unnecessarily and incorrectly
  triggering mount propagation.  Resulting in too many mounts going away
  when a directory is deleted, and too many cpu cycles are burned while
  doing that.

  Looking some more I realized that __detach_mounts by only keeping
  mounts connected that were MNT_LOCKED it had the potential to still
  leak information so I tweaked the code to keep everything locked
  together that possibly could be.

  This code was almost ready last cycle but Al invented fs_pin which
  slightly simplifies this code but required rewrites and retesting, and
  I have not been in top form for a while so it took me a while to get
  all of that done.  Similiarly this pull request is late because I have
  been feeling absolutely miserable all week.

  The issue of being able to escape a bind mount has not yet been
  addressed, as the fixes are not yet mature"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  mnt: Update detach_mounts to leave mounts connected
  mnt: Fix the error check in __detach_mounts
  mnt: Honor MNT_LOCKED when detaching mounts
  fs_pin: Allow for the possibility that m_list or s_list go unused.
  mnt: Factor umount_mnt from umount_tree
  mnt: Factor out unhash_mnt from detach_mnt and umount_tree
  mnt: Fail collect_mounts when applied to unmounted mounts
  mnt: Don't propagate unmounts to locked mounts
  mnt: On an unmount propagate clearing of MNT_LOCKED
  mnt: Delay removal from the mount hash.
  mnt: Add MNT_UMOUNT flag
  mnt: In umount_tree reuse mnt_list instead of mnt_hash
  mnt: Don't propagate umounts in __detach_mounts
  mnt: Improve the umount_tree flags
  mnt: Use hlist_move_list in namespace_unlock
parents 06a60dec e0c9c0af
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -9,8 +9,8 @@ static DEFINE_SPINLOCK(pin_lock);
void pin_remove(struct fs_pin *pin)
{
	spin_lock(&pin_lock);
	hlist_del(&pin->m_list);
	hlist_del(&pin->s_list);
	hlist_del_init(&pin->m_list);
	hlist_del_init(&pin->s_list);
	spin_unlock(&pin_lock);
	spin_lock_irq(&pin->wait.lock);
	pin->done = 1;
+96 −46
Original line number Diff line number Diff line
@@ -632,13 +632,16 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 */
struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
{
	struct mount *p, *res;
	res = p = __lookup_mnt(mnt, dentry);
	struct mount *p, *res = NULL;
	p = __lookup_mnt(mnt, dentry);
	if (!p)
		goto out;
	if (!(p->mnt.mnt_flags & MNT_UMOUNT))
		res = p;
	hlist_for_each_entry_continue(p, mnt_hash) {
		if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
			break;
		if (!(p->mnt.mnt_flags & MNT_UMOUNT))
			res = p;
	}
out:
@@ -795,10 +798,8 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
/*
 * vfsmount lock must be held for write
 */
static void detach_mnt(struct mount *mnt, struct path *old_path)
static void unhash_mnt(struct mount *mnt)
{
	old_path->dentry = mnt->mnt_mountpoint;
	old_path->mnt = &mnt->mnt_parent->mnt;
	mnt->mnt_parent = mnt;
	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
	list_del_init(&mnt->mnt_child);
@@ -808,6 +809,26 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
	mnt->mnt_mp = NULL;
}

/*
 * vfsmount lock must be held for write
 */
static void detach_mnt(struct mount *mnt, struct path *old_path)
{
	old_path->dentry = mnt->mnt_mountpoint;
	old_path->mnt = &mnt->mnt_parent->mnt;
	unhash_mnt(mnt);
}

/*
 * vfsmount lock must be held for write
 */
static void umount_mnt(struct mount *mnt)
{
	/* old mountpoint will be dropped when we can do that */
	mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
	unhash_mnt(mnt);
}

/*
 * vfsmount lock must be held for write
 */
@@ -1078,6 +1099,13 @@ static void mntput_no_expire(struct mount *mnt)
	rcu_read_unlock();

	list_del(&mnt->mnt_instance);

	if (unlikely(!list_empty(&mnt->mnt_mounts))) {
		struct mount *p, *tmp;
		list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts,  mnt_child) {
			umount_mnt(p);
		}
	}
	unlock_mount_hash();

	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
@@ -1298,17 +1326,15 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */

static void namespace_unlock(void)
{
	struct hlist_head head = unmounted;
	struct hlist_head head;

	if (likely(hlist_empty(&head))) {
		up_write(&namespace_sem);
		return;
	}
	hlist_move_list(&unmounted, &head);

	head.first->pprev = &head.first;
	INIT_HLIST_HEAD(&unmounted);
	up_write(&namespace_sem);

	if (likely(hlist_empty(&head)))
		return;

	synchronize_rcu();

	group_pin_kill(&head);
@@ -1319,49 +1345,63 @@ static inline void namespace_lock(void)
	down_write(&namespace_sem);
}

enum umount_tree_flags {
	UMOUNT_SYNC = 1,
	UMOUNT_PROPAGATE = 2,
	UMOUNT_CONNECTED = 4,
};
/*
 * mount_lock must be held
 * namespace_sem must be held for write
 * how = 0 => just this tree, don't propagate
 * how = 1 => propagate; we know that nobody else has reference to any victims
 * how = 2 => lazy umount
 */
void umount_tree(struct mount *mnt, int how)
static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
{
	HLIST_HEAD(tmp_list);
	LIST_HEAD(tmp_list);
	struct mount *p;

	if (how & UMOUNT_PROPAGATE)
		propagate_mount_unlock(mnt);

	/* Gather the mounts to umount */
	for (p = mnt; p; p = next_mnt(p, mnt)) {
		hlist_del_init_rcu(&p->mnt_hash);
		hlist_add_head(&p->mnt_hash, &tmp_list);
		p->mnt.mnt_flags |= MNT_UMOUNT;
		list_move(&p->mnt_list, &tmp_list);
	}

	hlist_for_each_entry(p, &tmp_list, mnt_hash)
	/* Hide the mounts from mnt_mounts */
	list_for_each_entry(p, &tmp_list, mnt_list) {
		list_del_init(&p->mnt_child);
	}

	if (how)
	/* Add propogated mounts to the tmp_list */
	if (how & UMOUNT_PROPAGATE)
		propagate_umount(&tmp_list);

	while (!hlist_empty(&tmp_list)) {
		p = hlist_entry(tmp_list.first, struct mount, mnt_hash);
		hlist_del_init_rcu(&p->mnt_hash);
	while (!list_empty(&tmp_list)) {
		bool disconnect;
		p = list_first_entry(&tmp_list, struct mount, mnt_list);
		list_del_init(&p->mnt_expire);
		list_del_init(&p->mnt_list);
		__touch_mnt_namespace(p->mnt_ns);
		p->mnt_ns = NULL;
		if (how < 2)
		if (how & UMOUNT_SYNC)
			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;

		pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted);
		disconnect = !(((how & UMOUNT_CONNECTED) &&
				mnt_has_parent(p) &&
				(p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) ||
			       IS_MNT_LOCKED_AND_LAZY(p));

		pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
				 disconnect ? &unmounted : NULL);
		if (mnt_has_parent(p)) {
			hlist_del_init(&p->mnt_mp_list);
			put_mountpoint(p->mnt_mp);
			mnt_add_count(p->mnt_parent, -1);
			/* old mountpoint will be dropped when we can do that */
			p->mnt_ex_mountpoint = p->mnt_mountpoint;
			p->mnt_mountpoint = p->mnt.mnt_root;
			p->mnt_parent = p;
			p->mnt_mp = NULL;
			if (!disconnect) {
				/* Don't forget about p */
				list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
			} else {
				umount_mnt(p);
			}
		}
		change_mnt_propagation(p, MS_PRIVATE);
	}
@@ -1447,14 +1487,14 @@ static int do_umount(struct mount *mnt, int flags)

	if (flags & MNT_DETACH) {
		if (!list_empty(&mnt->mnt_list))
			umount_tree(mnt, 2);
			umount_tree(mnt, UMOUNT_PROPAGATE);
		retval = 0;
	} else {
		shrink_submounts(mnt);
		retval = -EBUSY;
		if (!propagate_mount_busy(mnt, 2)) {
			if (!list_empty(&mnt->mnt_list))
				umount_tree(mnt, 1);
				umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
			retval = 0;
		}
	}
@@ -1480,13 +1520,20 @@ void __detach_mounts(struct dentry *dentry)

	namespace_lock();
	mp = lookup_mountpoint(dentry);
	if (!mp)
	if (IS_ERR_OR_NULL(mp))
		goto out_unlock;

	lock_mount_hash();
	while (!hlist_empty(&mp->m_list)) {
		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
		umount_tree(mnt, 2);
		if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
			struct mount *p, *tmp;
			list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts,  mnt_child) {
				hlist_add_head(&p->mnt_umount.s_list, &unmounted);
				umount_mnt(p);
			}
		}
		else umount_tree(mnt, UMOUNT_CONNECTED);
	}
	unlock_mount_hash();
	put_mountpoint(mp);
@@ -1648,7 +1695,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
out:
	if (res) {
		lock_mount_hash();
		umount_tree(res, 0);
		umount_tree(res, UMOUNT_SYNC);
		unlock_mount_hash();
	}
	return q;
@@ -1660,6 +1707,9 @@ struct vfsmount *collect_mounts(struct path *path)
{
	struct mount *tree;
	namespace_lock();
	if (!check_mnt(real_mount(path->mnt)))
		tree = ERR_PTR(-EINVAL);
	else
		tree = copy_tree(real_mount(path->mnt), path->dentry,
				 CL_COPY_ALL | CL_PRIVATE);
	namespace_unlock();
@@ -1672,7 +1722,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
{
	namespace_lock();
	lock_mount_hash();
	umount_tree(real_mount(mnt), 0);
	umount_tree(real_mount(mnt), UMOUNT_SYNC);
	unlock_mount_hash();
	namespace_unlock();
}
@@ -1855,7 +1905,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 out_cleanup_ids:
	while (!hlist_empty(&tree_list)) {
		child = hlist_entry(tree_list.first, struct mount, mnt_hash);
		umount_tree(child, 0);
		umount_tree(child, UMOUNT_SYNC);
	}
	unlock_mount_hash();
	cleanup_group_ids(source_mnt, NULL);
@@ -2035,7 +2085,7 @@ static int do_loopback(struct path *path, const char *old_name,
	err = graft_tree(mnt, parent, mp);
	if (err) {
		lock_mount_hash();
		umount_tree(mnt, 0);
		umount_tree(mnt, UMOUNT_SYNC);
		unlock_mount_hash();
	}
out2:
@@ -2406,7 +2456,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
	while (!list_empty(&graveyard)) {
		mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
		touch_mnt_namespace(mnt->mnt_ns);
		umount_tree(mnt, 1);
		umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
	}
	unlock_mount_hash();
	namespace_unlock();
@@ -2477,7 +2527,7 @@ static void shrink_submounts(struct mount *mnt)
			m = list_first_entry(&graveyard, struct mount,
						mnt_expire);
			touch_mnt_namespace(m->mnt_ns);
			umount_tree(m, 1);
			umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
		}
	}
}
+53 −7
Original line number Diff line number Diff line
@@ -361,6 +361,46 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
	return ret;
}

/*
 * Clear MNT_LOCKED when it can be shown to be safe.
 *
 * mount_lock lock must be held for write
 */
void propagate_mount_unlock(struct mount *mnt)
{
	struct mount *parent = mnt->mnt_parent;
	struct mount *m, *child;

	BUG_ON(parent == mnt);

	for (m = propagation_next(parent, parent); m;
			m = propagation_next(m, parent)) {
		child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
		if (child)
			child->mnt.mnt_flags &= ~MNT_LOCKED;
	}
}

/*
 * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
 */
static void mark_umount_candidates(struct mount *mnt)
{
	struct mount *parent = mnt->mnt_parent;
	struct mount *m;

	BUG_ON(parent == mnt);

	for (m = propagation_next(parent, parent); m;
			m = propagation_next(m, parent)) {
		struct mount *child = __lookup_mnt_last(&m->mnt,
						mnt->mnt_mountpoint);
		if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
			SET_MNT_MARK(child);
		}
	}
}

/*
 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
 * parent propagates to.
@@ -378,13 +418,16 @@ static void __propagate_umount(struct mount *mnt)
		struct mount *child = __lookup_mnt_last(&m->mnt,
						mnt->mnt_mountpoint);
		/*
		 * umount the child only if the child has no
		 * other children
		 * umount the child only if the child has no children
		 * and the child is marked safe to unmount.
		 */
		if (child && list_empty(&child->mnt_mounts)) {
		if (!child || !IS_MNT_MARKED(child))
			continue;
		CLEAR_MNT_MARK(child);
		if (list_empty(&child->mnt_mounts)) {
			list_del_init(&child->mnt_child);
			hlist_del_init_rcu(&child->mnt_hash);
			hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
			child->mnt.mnt_flags |= MNT_UMOUNT;
			list_move_tail(&child->mnt_list, &mnt->mnt_list);
		}
	}
}
@@ -396,11 +439,14 @@ static void __propagate_umount(struct mount *mnt)
 *
 * vfsmount lock must be held for write
 */
int propagate_umount(struct hlist_head *list)
int propagate_umount(struct list_head *list)
{
	struct mount *mnt;

	hlist_for_each_entry(mnt, list, mnt_hash)
	list_for_each_entry_reverse(mnt, list, mnt_list)
		mark_umount_candidates(mnt);

	list_for_each_entry(mnt, list, mnt_list)
		__propagate_umount(mnt);
	return 0;
}
+5 −2
Original line number Diff line number Diff line
@@ -19,6 +19,9 @@
#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED)
#define IS_MNT_LOCKED_AND_LAZY(m) \
	(((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED)

#define CL_EXPIRE    		0x01
#define CL_SLAVE     		0x02
@@ -40,14 +43,14 @@ static inline void set_mnt_shared(struct mount *mnt)
void change_mnt_propagation(struct mount *, int);
int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
		struct hlist_head *);
int propagate_umount(struct hlist_head *);
int propagate_umount(struct list_head *);
int propagate_mount_busy(struct mount *, int);
void propagate_mount_unlock(struct mount *);
void mnt_release_group_id(struct mount *);
int get_dominating_id(struct mount *mnt, const struct path *root);
unsigned int mnt_get_count(struct mount *mnt);
void mnt_set_mountpoint(struct mount *, struct mountpoint *,
			struct mount *);
void umount_tree(struct mount *, int);
struct mount *copy_tree(struct mount *, struct dentry *, int);
bool is_path_reachable(struct mount *, struct dentry *,
			 const struct path *root);
+2 −0
Original line number Diff line number Diff line
@@ -13,6 +13,8 @@ struct vfsmount;
static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
{
	init_waitqueue_head(&p->wait);
	INIT_HLIST_NODE(&p->s_list);
	INIT_HLIST_NODE(&p->m_list);
	p->kill = kill;
}

Loading