Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 65dff759 authored by Li Zefan's avatar Li Zefan Committed by Tejun Heo
Browse files

cgroup: fix cgroup_path() vs rename() race



rename() will change dentry->d_name. The result of this race can
be worse than seeing partially rewritten name, but we might access
a stale pointer because rename() will re-allocate memory to hold
a longer name.

As accessing dentry->name must be protected by dentry->d_lock or
parent inode's i_mutex, while on the other hand cgroup-path() can
be called with some irq-safe spinlocks held, we can't generate
cgroup path using dentry->d_name.

Alternatively we make a copy of dentry->d_name and save it in
cgrp->name when a cgroup is created, and update cgrp->name at
rename().

v5: use flexible array instead of zero-size array.
v4: - allocate root_cgroup_name and all root_cgroup->name points to it.
    - add cgroup_name() wrapper.
v3: use kfree_rcu() instead of synchronize_rcu() in user-visible path.
v2: make cgrp->name RCU safe.

Signed-off-by: default avatarLi Zefan <lizefan@huawei.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 6dbe51c2
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -247,9 +247,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
{
	int ret;

	rcu_read_lock();
	ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
	rcu_read_unlock();
	if (ret)
		strncpy(buf, "<unavailable>", buflen);
	return ret;
+24 −0
Original line number Diff line number Diff line
@@ -150,6 +150,11 @@ enum {
	CGRP_CPUSET_CLONE_CHILDREN,
};

struct cgroup_name {
	struct rcu_head rcu_head;
	char name[];
};

struct cgroup {
	unsigned long flags;		/* "unsigned long" so bitops work */

@@ -172,6 +177,19 @@ struct cgroup {
	struct cgroup *parent;		/* my parent */
	struct dentry *dentry;		/* cgroup fs entry, RCU protected */

	/*
	 * This is a copy of dentry->d_name, and it's needed because
	 * we can't use dentry->d_name in cgroup_path().
	 *
	 * You must acquire rcu_read_lock() to access cgrp->name, and
	 * the only place that can change it is rename(), which is
	 * protected by parent dir's i_mutex.
	 *
	 * Normally you should use cgroup_name() wrapper rather than
	 * access it directly.
	 */
	struct cgroup_name __rcu *name;

	/* Private pointers for each registered subsystem */
	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];

@@ -404,6 +422,12 @@ struct cgroup_scanner {
	void *data;
};

/* Caller should hold rcu_read_lock() */
static inline const char *cgroup_name(const struct cgroup *cgrp)
{
	return rcu_dereference(cgrp->name)->name;
}

int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);

+76 −30
Original line number Diff line number Diff line
@@ -238,6 +238,8 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
#define dummytop (&rootnode.top_cgroup)

static struct cgroup_name root_cgroup_name = { .name = "/" };

/* This flag indicates whether tasks in the fork and exit paths should
 * check for fork/exit handlers to call. This avoids us having to do
 * extra work in the fork/exit path if none of the subsystems need to
@@ -859,6 +861,17 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
	return inode;
}

static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
{
	struct cgroup_name *name;

	name = kmalloc(sizeof(*name) + dentry->d_name.len + 1, GFP_KERNEL);
	if (!name)
		return NULL;
	strcpy(name->name, dentry->d_name.name);
	return name;
}

static void cgroup_free_fn(struct work_struct *work)
{
	struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
@@ -889,6 +902,7 @@ static void cgroup_free_fn(struct work_struct *work)
	simple_xattrs_free(&cgrp->xattrs);

	ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
	kfree(rcu_dereference_raw(cgrp->name));
	kfree(cgrp);
}

@@ -1421,6 +1435,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
	INIT_LIST_HEAD(&root->allcg_list);
	root->number_of_cgroups = 1;
	cgrp->root = root;
	cgrp->name = &root_cgroup_name;
	cgrp->top_cgroup = cgrp;
	init_cgroup_housekeeping(cgrp);
	list_add_tail(&cgrp->allcg_node, &root->allcg_list);
@@ -1769,49 +1784,45 @@ static struct kobject *cgroup_kobj;
 * @buf: the buffer to write the path into
 * @buflen: the length of the buffer
 *
 * Called with cgroup_mutex held or else with an RCU-protected cgroup
 * reference.  Writes path of cgroup into buf.  Returns 0 on success,
 * -errno on error.
 * Writes path of cgroup into buf.  Returns 0 on success, -errno on error.
 *
 * We can't generate cgroup path using dentry->d_name, as accessing
 * dentry->name must be protected by irq-unsafe dentry->d_lock or parent
 * inode's i_mutex, while on the other hand cgroup_path() can be called
 * with some irq-safe spinlocks held.
 */
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
	struct dentry *dentry = cgrp->dentry;
	int ret = -ENAMETOOLONG;
	char *start;

	rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
			   "cgroup_path() called without proper locking");

	if (cgrp == dummytop) {
		/*
		 * Inactive subsystems have no dentry for their root
		 * cgroup
		 */
		strcpy(buf, "/");
		return 0;
	}

	start = buf + buflen - 1;

	*start = '\0';
	for (;;) {
		int len = dentry->d_name.len;

	rcu_read_lock();
	while (cgrp) {
		const char *name = cgroup_name(cgrp);
		int len;

		len = strlen(name);
		if ((start -= len) < buf)
			return -ENAMETOOLONG;
		memcpy(start, dentry->d_name.name, len);
		cgrp = cgrp->parent;
		if (!cgrp)
			break;
			goto out;
		memcpy(start, name, len);

		dentry = cgrp->dentry;
		if (!cgrp->parent)
			continue;
			break;

		if (--start < buf)
			return -ENAMETOOLONG;
			goto out;
		*start = '/';

		cgrp = cgrp->parent;
	}
	ret = 0;
	memmove(buf, start, buf + buflen - start);
	return 0;
out:
	rcu_read_unlock();
	return ret;
}
EXPORT_SYMBOL_GPL(cgroup_path);

@@ -2537,13 +2548,40 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
			    struct inode *new_dir, struct dentry *new_dentry)
{
	int ret;
	struct cgroup_name *name, *old_name;
	struct cgroup *cgrp;

	/*
	 * It's convinient to use parent dir's i_mutex to protected
	 * cgrp->name.
	 */
	lockdep_assert_held(&old_dir->i_mutex);

	if (!S_ISDIR(old_dentry->d_inode->i_mode))
		return -ENOTDIR;
	if (new_dentry->d_inode)
		return -EEXIST;
	if (old_dir != new_dir)
		return -EIO;
	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);

	cgrp = __d_cgrp(old_dentry);

	name = cgroup_alloc_name(new_dentry);
	if (!name)
		return -ENOMEM;

	ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry);
	if (ret) {
		kfree(name);
		return ret;
	}

	old_name = cgrp->name;
	rcu_assign_pointer(cgrp->name, name);

	kfree_rcu(old_name, rcu_head);
	return 0;
}

static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
@@ -4158,6 +4196,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
			     umode_t mode)
{
	struct cgroup *cgrp;
	struct cgroup_name *name;
	struct cgroupfs_root *root = parent->root;
	int err = 0;
	struct cgroup_subsys *ss;
@@ -4168,9 +4207,14 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
	if (!cgrp)
		return -ENOMEM;

	name = cgroup_alloc_name(dentry);
	if (!name)
		goto err_free_cgrp;
	rcu_assign_pointer(cgrp->name, name);

	cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
	if (cgrp->id < 0)
		goto err_free_cgrp;
		goto err_free_name;

	/*
	 * Only live parents can have children.  Note that the liveliness
@@ -4276,6 +4320,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
	deactivate_super(sb);
err_free_id:
	ida_simple_remove(&root->cgroup_ida, cgrp->id);
err_free_name:
	kfree(rcu_dereference_raw(cgrp->name));
err_free_cgrp:
	kfree(cgrp);
	return err;