Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 51b38d92 authored by Serge Hallyn's avatar Serge Hallyn Committed by ivanmeler
Browse files

cgroup: mount cgroupns-root when inside non-init cgroupns



This patch enables cgroup mounting inside userns when a process
as appropriate privileges. The cgroup filesystem mounted is
rooted at the cgroupns-root. Thus, in a container-setup, only
the hierarchy under the cgroupns-root is exposed inside the container.
This allows container management tools to run inside the containers
without depending on any global state.

Signed-off-by: default avatarSerge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent ec6e8f80
Loading
Loading
Loading
Loading
+47 −1
Original line number Diff line number Diff line
@@ -1990,6 +1990,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
{
	bool is_v2 = fs_type == &cgroup2_fs_type;
	struct super_block *pinned_sb = NULL;
	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
	struct cgroup_subsys *ss;
	struct cgroup_root *root;
	struct cgroup_sb_opts opts;
@@ -1998,6 +1999,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
	int i;
	bool new_sb;

	get_cgroup_ns(ns);

	/* Check if the caller has permission to mount. */
	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
		put_cgroup_ns(ns);
		return ERR_PTR(-EPERM);
	}

	/*
	 * The first time anyone tries to mount a cgroup, enable the list
	 * linking each css_set to its tasks and fix up all existing tasks.
@@ -2008,6 +2017,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
	if (is_v2) {
		if (data) {
			pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
			put_cgroup_ns(ns);
			return ERR_PTR(-EINVAL);
		}
		cgrp_dfl_root_visible = true;
@@ -2113,6 +2123,16 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
		goto out_unlock;
	}

	/*
	 * We know this subsystem has not yet been bound.  Users in a non-init
	 * user namespace may only mount hierarchies with no bound subsystems,
	 * i.e. 'none,name=user1'
	 */
	if (!opts.none && !capable(CAP_SYS_ADMIN)) {
		ret = -EPERM;
		goto out_unlock;
	}

	root = kzalloc(sizeof(*root), GFP_KERNEL);
	if (!root) {
		ret = -ENOMEM;
@@ -2131,12 +2151,37 @@ out_free:
	kfree(opts.release_agent);
	kfree(opts.name);

	if (ret)
	if (ret) {
		put_cgroup_ns(ns);
		return ERR_PTR(ret);
	}
out_mount:
	dentry = kernfs_mount(fs_type, flags, root->kf_root,
			      is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC,
			      &new_sb);

	/*
	 * In non-init cgroup namespace, instead of root cgroup's
	 * dentry, we return the dentry corresponding to the
	 * cgroupns->root_cgrp.
	 */
	if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
		struct dentry *nsdentry;
		struct cgroup *cgrp;

		mutex_lock(&cgroup_mutex);
		spin_lock_bh(&css_set_lock);

		cgrp = cset_cgroup_from_root(ns->root_cset, root);

		spin_unlock_bh(&css_set_lock);
		mutex_unlock(&cgroup_mutex);

		nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
		dput(dentry);
		dentry = nsdentry;
	}

	if (IS_ERR(dentry) || !new_sb)
		cgroup_put(&root->cgrp);

@@ -2149,6 +2194,7 @@ out_mount:
		deactivate_super(pinned_sb);
	}

	put_cgroup_ns(ns);
	return dentry;
}