Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b12cea91 authored by Al Viro's avatar Al Viro
Browse files

change the locking order for namespace_sem



Have it nested inside ->i_mutex.  Instead of using follow_down()
under namespace_sem, followed by grabbing i_mutex and checking that
mountpoint to be is not dead, do the following:
	grab i_mutex
	check that it's not dead
	grab namespace_sem
	see if anything is mounted there
	if not, we've won
	otherwise
		drop locks
		put_path on what we had
		replace with what's mounted
		retry everything with new mountpoint to be

New helper (lock_mount()) does that.  do_add_mount(), do_move_mount(),
do_loopback() and pivot_root() switched to it; in case of the last
two that eliminates a race we used to have - original code didn't
do follow_down().

Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 27cb1572
Loading
Loading
Loading
Loading
+73 −60
Original line number Original line Diff line number Diff line
@@ -1663,9 +1663,35 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
	return err;
	return err;
}
}


static int lock_mount(struct path *path)
{
	struct vfsmount *mnt;
retry:
	mutex_lock(&path->dentry->d_inode->i_mutex);
	if (unlikely(cant_mount(path->dentry))) {
		mutex_unlock(&path->dentry->d_inode->i_mutex);
		return -ENOENT;
	}
	down_write(&namespace_sem);
	mnt = lookup_mnt(path);
	if (likely(!mnt))
		return 0;
	up_write(&namespace_sem);
	mutex_unlock(&path->dentry->d_inode->i_mutex);
	path_put(path);
	path->mnt = mnt;
	path->dentry = dget(mnt->mnt_root);
	goto retry;
}

static void unlock_mount(struct path *path)
{
	up_write(&namespace_sem);
	mutex_unlock(&path->dentry->d_inode->i_mutex);
}

static int graft_tree(struct vfsmount *mnt, struct path *path)
static int graft_tree(struct vfsmount *mnt, struct path *path)
{
{
	int err;
	if (mnt->mnt_sb->s_flags & MS_NOUSER)
	if (mnt->mnt_sb->s_flags & MS_NOUSER)
		return -EINVAL;
		return -EINVAL;


@@ -1673,16 +1699,10 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
	      S_ISDIR(mnt->mnt_root->d_inode->i_mode))
	      S_ISDIR(mnt->mnt_root->d_inode->i_mode))
		return -ENOTDIR;
		return -ENOTDIR;


	err = -ENOENT;
	if (d_unlinked(path->dentry))
	mutex_lock(&path->dentry->d_inode->i_mutex);
		return -ENOENT;
	if (cant_mount(path->dentry))
		goto out_unlock;


	if (!d_unlinked(path->dentry))
	return attach_recursive_mnt(mnt, path, NULL);
		err = attach_recursive_mnt(mnt, path, NULL);
out_unlock:
	mutex_unlock(&path->dentry->d_inode->i_mutex);
	return err;
}
}


/*
/*
@@ -1745,6 +1765,7 @@ static int do_change_type(struct path *path, int flag)
static int do_loopback(struct path *path, char *old_name,
static int do_loopback(struct path *path, char *old_name,
				int recurse)
				int recurse)
{
{
	LIST_HEAD(umount_list);
	struct path old_path;
	struct path old_path;
	struct vfsmount *mnt = NULL;
	struct vfsmount *mnt = NULL;
	int err = mount_is_safe(path);
	int err = mount_is_safe(path);
@@ -1756,13 +1777,16 @@ static int do_loopback(struct path *path, char *old_name,
	if (err)
	if (err)
		return err;
		return err;


	down_write(&namespace_sem);
	err = lock_mount(path);
	if (err)
		goto out;

	err = -EINVAL;
	err = -EINVAL;
	if (IS_MNT_UNBINDABLE(old_path.mnt))
	if (IS_MNT_UNBINDABLE(old_path.mnt))
		goto out;
		goto out2;


	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
		goto out;
		goto out2;


	err = -ENOMEM;
	err = -ENOMEM;
	if (recurse)
	if (recurse)
@@ -1771,20 +1795,18 @@ static int do_loopback(struct path *path, char *old_name,
		mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
		mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);


	if (!mnt)
	if (!mnt)
		goto out;
		goto out2;


	err = graft_tree(mnt, path);
	err = graft_tree(mnt, path);
	if (err) {
	if (err) {
		LIST_HEAD(umount_list);

		br_write_lock(vfsmount_lock);
		br_write_lock(vfsmount_lock);
		umount_tree(mnt, 0, &umount_list);
		umount_tree(mnt, 0, &umount_list);
		br_write_unlock(vfsmount_lock);
		br_write_unlock(vfsmount_lock);
		release_mounts(&umount_list);
	}
	}

out2:
	unlock_mount(path);
	release_mounts(&umount_list);
out:
out:
	up_write(&namespace_sem);
	path_put(&old_path);
	path_put(&old_path);
	return err;
	return err;
}
}
@@ -1873,18 +1895,12 @@ static int do_move_mount(struct path *path, char *old_name)
	if (err)
	if (err)
		return err;
		return err;


	down_write(&namespace_sem);
	err = lock_mount(path);
	err = follow_down(path, true);
	if (err < 0)
	if (err < 0)
		goto out;
		goto out;


	err = -EINVAL;
	err = -EINVAL;
	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
		goto out;

	err = -ENOENT;
	mutex_lock(&path->dentry->d_inode->i_mutex);
	if (cant_mount(path->dentry))
		goto out1;
		goto out1;


	if (d_unlinked(path->dentry))
	if (d_unlinked(path->dentry))
@@ -1926,9 +1942,8 @@ static int do_move_mount(struct path *path, char *old_name)
	 * automatically */
	 * automatically */
	list_del_init(&old_path.mnt->mnt_expire);
	list_del_init(&old_path.mnt->mnt_expire);
out1:
out1:
	mutex_unlock(&path->dentry->d_inode->i_mutex);
	unlock_mount(path);
out:
out:
	up_write(&namespace_sem);
	if (!err)
	if (!err)
		path_put(&parent_path);
		path_put(&parent_path);
	path_put(&old_path);
	path_put(&old_path);
@@ -1983,11 +1998,9 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag


	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);


	down_write(&namespace_sem);
	err = lock_mount(path);
	/* Something was mounted here while we slept */
	if (err)
	err = follow_down(path, true);
		return err;
	if (err < 0)
		goto unlock;


	err = -EINVAL;
	err = -EINVAL;
	if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
	if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
@@ -2007,7 +2020,7 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag
	err = graft_tree(newmnt, path);
	err = graft_tree(newmnt, path);


unlock:
unlock:
	up_write(&namespace_sem);
	unlock_mount(path);
	return err;
	return err;
}
}


@@ -2575,55 +2588,53 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
		goto out1;
		goto out1;


	error = security_sb_pivotroot(&old, &new);
	error = security_sb_pivotroot(&old, &new);
	if (error) {
	if (error)
		path_put(&old);
		goto out2;
		goto out1;
	}


	get_fs_root(current->fs, &root);
	get_fs_root(current->fs, &root);
	down_write(&namespace_sem);
	error = lock_mount(&old);
	mutex_lock(&old.dentry->d_inode->i_mutex);
	if (error)
		goto out3;

	error = -EINVAL;
	error = -EINVAL;
	if (IS_MNT_SHARED(old.mnt) ||
	if (IS_MNT_SHARED(old.mnt) ||
		IS_MNT_SHARED(new.mnt->mnt_parent) ||
		IS_MNT_SHARED(new.mnt->mnt_parent) ||
		IS_MNT_SHARED(root.mnt->mnt_parent))
		IS_MNT_SHARED(root.mnt->mnt_parent))
		goto out2;
		goto out4;
	if (!check_mnt(root.mnt) || !check_mnt(new.mnt))
	if (!check_mnt(root.mnt) || !check_mnt(new.mnt))
		goto out2;
		goto out4;
	error = -ENOENT;
	error = -ENOENT;
	if (cant_mount(old.dentry))
		goto out2;
	if (d_unlinked(new.dentry))
	if (d_unlinked(new.dentry))
		goto out2;
		goto out4;
	if (d_unlinked(old.dentry))
	if (d_unlinked(old.dentry))
		goto out2;
		goto out4;
	error = -EBUSY;
	error = -EBUSY;
	if (new.mnt == root.mnt ||
	if (new.mnt == root.mnt ||
	    old.mnt == root.mnt)
	    old.mnt == root.mnt)
		goto out2; /* loop, on the same file system  */
		goto out4; /* loop, on the same file system  */
	error = -EINVAL;
	error = -EINVAL;
	if (root.mnt->mnt_root != root.dentry)
	if (root.mnt->mnt_root != root.dentry)
		goto out2; /* not a mountpoint */
		goto out4; /* not a mountpoint */
	if (root.mnt->mnt_parent == root.mnt)
	if (root.mnt->mnt_parent == root.mnt)
		goto out2; /* not attached */
		goto out4; /* not attached */
	if (new.mnt->mnt_root != new.dentry)
	if (new.mnt->mnt_root != new.dentry)
		goto out2; /* not a mountpoint */
		goto out4; /* not a mountpoint */
	if (new.mnt->mnt_parent == new.mnt)
	if (new.mnt->mnt_parent == new.mnt)
		goto out2; /* not attached */
		goto out4; /* not attached */
	/* make sure we can reach put_old from new_root */
	/* make sure we can reach put_old from new_root */
	tmp = old.mnt;
	tmp = old.mnt;
	if (tmp != new.mnt) {
	if (tmp != new.mnt) {
		for (;;) {
		for (;;) {
			if (tmp->mnt_parent == tmp)
			if (tmp->mnt_parent == tmp)
				goto out2; /* already mounted on put_old */
				goto out4; /* already mounted on put_old */
			if (tmp->mnt_parent == new.mnt)
			if (tmp->mnt_parent == new.mnt)
				break;
				break;
			tmp = tmp->mnt_parent;
			tmp = tmp->mnt_parent;
		}
		}
		if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
		if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
			goto out2;
			goto out4;
	} else if (!is_subdir(old.dentry, new.dentry))
	} else if (!is_subdir(old.dentry, new.dentry))
		goto out2;
		goto out4;
	br_write_lock(vfsmount_lock);
	br_write_lock(vfsmount_lock);
	detach_mnt(new.mnt, &parent_path);
	detach_mnt(new.mnt, &parent_path);
	detach_mnt(root.mnt, &root_parent);
	detach_mnt(root.mnt, &root_parent);
@@ -2634,14 +2645,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
	touch_mnt_namespace(current->nsproxy->mnt_ns);
	touch_mnt_namespace(current->nsproxy->mnt_ns);
	br_write_unlock(vfsmount_lock);
	br_write_unlock(vfsmount_lock);
	chroot_fs_refs(&root, &new);
	chroot_fs_refs(&root, &new);

	error = 0;
	error = 0;
out4:
	unlock_mount(&old);
	if (!error) {
		path_put(&root_parent);
		path_put(&root_parent);
		path_put(&parent_path);
		path_put(&parent_path);
out2:
	}
	mutex_unlock(&old.dentry->d_inode->i_mutex);
out3:
	up_write(&namespace_sem);
	path_put(&root);
	path_put(&root);
out2:
	path_put(&old);
	path_put(&old);
out1:
out1:
	path_put(&new);
	path_put(&new);