Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e61734c5 authored by Tejun Heo's avatar Tejun Heo
Browse files

cgroup: remove cgroup->name



cgroup->name handling became quite complicated over time involving
dedicated struct cgroup_name for RCU protection.  Now that cgroup is
on kernfs, we can drop all of it and simply use kernfs_name/path() and
friends.  Replace cgroup->name and all related code with kernfs
name/path constructs.

* Reimplement cgroup_name() and cgroup_path() as thin wrappers on top
  of kernfs counterparts, which involves semantic changes.
  pr_cont_cgroup_name() and pr_cont_cgroup_path() added.

* cgroup->name handling dropped from cgroup_rename().

* All users of cgroup_name/path() updated to the new semantics.  Users
  which were formatting the string just to printk them are converted
  to use pr_cont_cgroup_name/path() instead, which simplifies things
  quite a bit.  As cgroup_name() no longer requires RCU read lock
  around it, RCU lockings which were protecting only cgroup_name() are
  removed.

v2: Comment above oom_info_lock updated as suggested by Michal.

v3: dummy_top doesn't have a kn associated and
    pr_cont_cgroup_name/path() ended up calling the matching kernfs
    functions with NULL kn leading to oops.  Test for NULL kn and
    print "/" if so.  This issue was reported by Fengguang Wu.

v4: Rebased on top of 0ab02ca8 ("cgroup: protect modifications to
    cgroup_idr with cgroup_mutex").

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarPeter Zijlstra <peterz@infradead.org>
Acked-by: default avatarMichal Hocko <mhocko@suse.cz>
Acked-by: default avatarLi Zefan <lizefan@huawei.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
parent 6f30558f
Loading
Loading
Loading
Loading
+8 −4
Original line number Diff line number Diff line
@@ -241,12 +241,16 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
 */
static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
{
	int ret;
	char *p;

	ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
	if (ret)
	p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
	if (!p) {
		strncpy(buf, "<unavailable>", buflen);
	return ret;
		return -ENAMETOOLONG;
	}

	memmove(buf, p, buf + buflen - p);
	return 0;
}

/**
+1 −0
Original line number Diff line number Diff line
@@ -112,6 +112,7 @@ char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
	spin_unlock_irqrestore(&kernfs_rename_lock, flags);
	return p;
}
EXPORT_SYMBOL_GPL(kernfs_path);

/**
 * pr_cont_kernfs_name - pr_cont name of a kernfs_node
+36 −27
Original line number Diff line number Diff line
@@ -138,11 +138,6 @@ enum {
	CGRP_SANE_BEHAVIOR,
};

struct cgroup_name {
	struct rcu_head rcu_head;
	char name[];
};

struct cgroup {
	unsigned long flags;		/* "unsigned long" so bitops work */

@@ -179,19 +174,6 @@ struct cgroup {
	 */
	u64 serial_nr;

	/*
	 * This is a copy of dentry->d_name, and it's needed because
	 * we can't use dentry->d_name in cgroup_path().
	 *
	 * You must acquire rcu_read_lock() to access cgrp->name, and
	 * the only place that can change it is rename(), which is
	 * protected by parent dir's i_mutex.
	 *
	 * Normally you should use cgroup_name() wrapper rather than
	 * access it directly.
	 */
	struct cgroup_name __rcu *name;

	/* Private pointers for each registered subsystem */
	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];

@@ -479,12 +461,6 @@ static inline bool cgroup_sane_behavior(const struct cgroup *cgrp)
	return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR;
}

/* Caller should hold rcu_read_lock() */
static inline const char *cgroup_name(const struct cgroup *cgrp)
{
	return rcu_dereference(cgrp->name)->name;
}

/* returns ino associated with a cgroup, 0 indicates unmounted root */
static inline ino_t cgroup_ino(struct cgroup *cgrp)
{
@@ -503,14 +479,47 @@ static inline struct cftype *seq_cft(struct seq_file *seq)

struct cgroup_subsys_state *seq_css(struct seq_file *seq);

/*
 * Name / path handling functions.  All are thin wrappers around the kernfs
 * counterparts and can be called under any context.
 */

static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen)
{
	return kernfs_name(cgrp->kn, buf, buflen);
}

static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf,
					      size_t buflen)
{
	return kernfs_path(cgrp->kn, buf, buflen);
}

static inline void pr_cont_cgroup_name(struct cgroup *cgrp)
{
	/* dummy_top doesn't have a kn associated */
	if (cgrp->kn)
		pr_cont_kernfs_name(cgrp->kn);
	else
		pr_cont("/");
}

static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
{
	/* dummy_top doesn't have a kn associated */
	if (cgrp->kn)
		pr_cont_kernfs_path(cgrp->kn);
	else
		pr_cont("/");
}

char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);

int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);

bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);

int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);

int cgroup_task_count(const struct cgroup *cgrp);

/*
+33 −113
Original line number Diff line number Diff line
@@ -145,8 +145,6 @@ static int cgroup_root_count;
/* hierarchy ID allocation and mapping, protected by cgroup_mutex */
static DEFINE_IDR(cgroup_hierarchy_idr);

static struct cgroup_name root_cgroup_name = { .name = "/" };

/*
 * Assign a monotonically increasing serial number to cgroups.  It
 * guarantees cgroups with bigger numbers are newer than those with smaller
@@ -888,17 +886,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
static const struct file_operations proc_cgroupstats_operations;

static struct cgroup_name *cgroup_alloc_name(const char *name_str)
{
	struct cgroup_name *name;

	name = kmalloc(sizeof(*name) + strlen(name_str) + 1, GFP_KERNEL);
	if (!name)
		return NULL;
	strcpy(name->name, name_str);
	return name;
}

static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
			      char *buf)
{
@@ -958,8 +945,6 @@ static void cgroup_free_fn(struct work_struct *work)
	cgroup_pidlist_destroy_all(cgrp);

	kernfs_put(cgrp->kn);

	kfree(rcu_dereference_raw(cgrp->name));
	kfree(cgrp);
}

@@ -1377,7 +1362,6 @@ static void init_cgroup_root(struct cgroupfs_root *root)
	INIT_LIST_HEAD(&root->root_list);
	root->number_of_cgroups = 1;
	cgrp->root = root;
	RCU_INIT_POINTER(cgrp->name, &root_cgroup_name);
	init_cgroup_housekeeping(cgrp);
	idr_init(&root->cgroup_idr);
}
@@ -1597,57 +1581,6 @@ static struct file_system_type cgroup_fs_type = {

static struct kobject *cgroup_kobj;

/**
 * cgroup_path - generate the path of a cgroup
 * @cgrp: the cgroup in question
 * @buf: the buffer to write the path into
 * @buflen: the length of the buffer
 *
 * Writes path of cgroup into buf.  Returns 0 on success, -errno on error.
 *
 * We can't generate cgroup path using dentry->d_name, as accessing
 * dentry->name must be protected by irq-unsafe dentry->d_lock or parent
 * inode's i_mutex, while on the other hand cgroup_path() can be called
 * with some irq-safe spinlocks held.
 */
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
	int ret = -ENAMETOOLONG;
	char *start;

	if (!cgrp->parent) {
		if (strlcpy(buf, "/", buflen) >= buflen)
			return -ENAMETOOLONG;
		return 0;
	}

	start = buf + buflen - 1;
	*start = '\0';

	rcu_read_lock();
	do {
		const char *name = cgroup_name(cgrp);
		int len;

		len = strlen(name);
		if ((start -= len) < buf)
			goto out;
		memcpy(start, name, len);

		if (--start < buf)
			goto out;
		*start = '/';

		cgrp = cgrp->parent;
	} while (cgrp->parent);
	ret = 0;
	memmove(buf, start, buf + buflen - start);
out:
	rcu_read_unlock();
	return ret;
}
EXPORT_SYMBOL_GPL(cgroup_path);

/**
 * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
 * @task: target task
@@ -1659,16 +1592,14 @@ EXPORT_SYMBOL_GPL(cgroup_path);
 * function grabs cgroup_mutex and shouldn't be used inside locks used by
 * cgroup controller callbacks.
 *
 * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short.
 * Return value is the same as kernfs_path().
 */
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
{
	struct cgroupfs_root *root;
	struct cgroup *cgrp;
	int hierarchy_id = 1, ret = 0;

	if (buflen < 2)
		return -ENAMETOOLONG;
	int hierarchy_id = 1;
	char *path = NULL;

	mutex_lock(&cgroup_mutex);

@@ -1676,14 +1607,15 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)

	if (root) {
		cgrp = task_cgroup_from_root(task, root);
		ret = cgroup_path(cgrp, buf, buflen);
		path = cgroup_path(cgrp, buf, buflen);
	} else {
		/* if no hierarchy exists, everyone is in "/" */
		memcpy(buf, "/", 2);
		if (strlcpy(buf, "/", buflen) < buflen)
			path = buf;
	}

	mutex_unlock(&cgroup_mutex);
	return ret;
	return path;
}
EXPORT_SYMBOL_GPL(task_cgroup_path);

@@ -2211,7 +2143,6 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
			 const char *new_name_str)
{
	struct cgroup *cgrp = kn->priv;
	struct cgroup_name *name, *old_name;
	int ret;

	if (kernfs_type(kn) != KERNFS_DIR)
@@ -2226,25 +2157,13 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
	if (cgroup_sane_behavior(cgrp))
		return -EPERM;

	name = cgroup_alloc_name(new_name_str);
	if (!name)
		return -ENOMEM;

	mutex_lock(&cgroup_tree_mutex);
	mutex_lock(&cgroup_mutex);

	ret = kernfs_rename(kn, new_parent, new_name_str);
	if (!ret) {
		old_name = rcu_dereference_protected(cgrp->name, true);
		rcu_assign_pointer(cgrp->name, name);
	} else {
		old_name = name;
	}

	mutex_unlock(&cgroup_mutex);
	mutex_unlock(&cgroup_tree_mutex);

	kfree_rcu(old_name, rcu_head);
	return ret;
}

@@ -3719,14 +3638,13 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
/**
 * cgroup_create - create a cgroup
 * @parent: cgroup that will be parent of the new cgroup
 * @name_str: name of the new cgroup
 * @name: name of the new cgroup
 * @mode: mode to set on new cgroup
 */
static long cgroup_create(struct cgroup *parent, const char *name_str,
static long cgroup_create(struct cgroup *parent, const char *name,
			  umode_t mode)
{
	struct cgroup *cgrp;
	struct cgroup_name *name;
	struct cgroupfs_root *root = parent->root;
	int ssid, err;
	struct cgroup_subsys *ss;
@@ -3737,13 +3655,6 @@ static long cgroup_create(struct cgroup *parent, const char *name_str,
	if (!cgrp)
		return -ENOMEM;

	name = cgroup_alloc_name(name_str);
	if (!name) {
		err = -ENOMEM;
		goto err_free_cgrp;
	}
	rcu_assign_pointer(cgrp->name, name);

	mutex_lock(&cgroup_tree_mutex);

	/*
@@ -3781,7 +3692,7 @@ static long cgroup_create(struct cgroup *parent, const char *name_str,
		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);

	/* create the directory */
	kn = kernfs_create_dir(parent->kn, name->name, mode, cgrp);
	kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
	if (IS_ERR(kn)) {
		err = PTR_ERR(kn);
		goto err_free_id;
@@ -3839,8 +3750,6 @@ static long cgroup_create(struct cgroup *parent, const char *name_str,
	mutex_unlock(&cgroup_mutex);
err_unlock_tree:
	mutex_unlock(&cgroup_tree_mutex);
	kfree(rcu_dereference_raw(cgrp->name));
err_free_cgrp:
	kfree(cgrp);
	return err;

@@ -4304,12 +4213,12 @@ int proc_cgroup_show(struct seq_file *m, void *v)
{
	struct pid *pid;
	struct task_struct *tsk;
	char *buf;
	char *buf, *path;
	int retval;
	struct cgroupfs_root *root;

	retval = -ENOMEM;
	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
	buf = kmalloc(PATH_MAX, GFP_KERNEL);
	if (!buf)
		goto out;

@@ -4337,10 +4246,12 @@ int proc_cgroup_show(struct seq_file *m, void *v)
				   root->name);
		seq_putc(m, ':');
		cgrp = task_cgroup_from_root(tsk, root);
		retval = cgroup_path(cgrp, buf, PAGE_SIZE);
		if (retval < 0)
		path = cgroup_path(cgrp, buf, PATH_MAX);
		if (!path) {
			retval = -ENAMETOOLONG;
			goto out_unlock;
		seq_puts(m, buf);
		}
		seq_puts(m, path);
		seq_putc(m, '\n');
	}

@@ -4588,16 +4499,17 @@ static void cgroup_release_agent(struct work_struct *work)
	while (!list_empty(&release_list)) {
		char *argv[3], *envp[3];
		int i;
		char *pathbuf = NULL, *agentbuf = NULL;
		char *pathbuf = NULL, *agentbuf = NULL, *path;
		struct cgroup *cgrp = list_entry(release_list.next,
						    struct cgroup,
						    release_list);
		list_del_init(&cgrp->release_list);
		raw_spin_unlock(&release_list_lock);
		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
		pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
		if (!pathbuf)
			goto continue_free;
		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
		path = cgroup_path(cgrp, pathbuf, PATH_MAX);
		if (!path)
			goto continue_free;
		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
		if (!agentbuf)
@@ -4605,7 +4517,7 @@ static void cgroup_release_agent(struct work_struct *work)

		i = 0;
		argv[i++] = agentbuf;
		argv[i++] = pathbuf;
		argv[i++] = path;
		argv[i] = NULL;

		i = 0;
@@ -4755,6 +4667,11 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
{
	struct cgrp_cset_link *link;
	struct css_set *cset;
	char *name_buf;

	name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
	if (!name_buf)
		return -ENOMEM;

	read_lock(&css_set_lock);
	rcu_read_lock();
@@ -4763,14 +4680,17 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
		struct cgroup *c = link->cgrp;
		const char *name = "?";

		if (c != cgroup_dummy_top)
			name = cgroup_name(c);
		if (c != cgroup_dummy_top) {
			cgroup_name(c, name_buf, NAME_MAX + 1);
			name = name_buf;
		}

		seq_printf(seq, "Root %d group %s\n",
			   c->root->hierarchy_id, name);
	}
	rcu_read_unlock();
	read_unlock(&css_set_lock);
	kfree(name_buf);
	return 0;
}

+13 −14
Original line number Diff line number Diff line
@@ -2088,10 +2088,9 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
		parent = parent_cs(parent);

	if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
		rcu_read_lock();
		printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
		       cgroup_name(cs->css.cgroup));
		rcu_read_unlock();
		printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset ");
		pr_cont_cgroup_name(cs->css.cgroup);
		pr_cont("\n");
	}
}

@@ -2619,19 +2618,17 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk)
	 /* Statically allocated to prevent using excess stack. */
	static char cpuset_nodelist[CPUSET_NODELIST_LEN];
	static DEFINE_SPINLOCK(cpuset_buffer_lock);

	struct cgroup *cgrp = task_cs(tsk)->css.cgroup;

	rcu_read_lock();
	spin_lock(&cpuset_buffer_lock);

	nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
			   tsk->mems_allowed);
	printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
	       tsk->comm, cgroup_name(cgrp), cpuset_nodelist);
	printk(KERN_INFO "%s cpuset=", tsk->comm);
	pr_cont_cgroup_name(cgrp);
	pr_cont(" mems_allowed=%s\n", cpuset_nodelist);

	spin_unlock(&cpuset_buffer_lock);
	rcu_read_unlock();
}

/*
@@ -2681,12 +2678,12 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
{
	struct pid *pid;
	struct task_struct *tsk;
	char *buf;
	char *buf, *p;
	struct cgroup_subsys_state *css;
	int retval;

	retval = -ENOMEM;
	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
	buf = kmalloc(PATH_MAX, GFP_KERNEL);
	if (!buf)
		goto out;

@@ -2696,14 +2693,16 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
	if (!tsk)
		goto out_free;

	retval = -ENAMETOOLONG;
	rcu_read_lock();
	css = task_css(tsk, cpuset_cgrp_id);
	retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
	p = cgroup_path(css->cgroup, buf, PATH_MAX);
	rcu_read_unlock();
	if (retval < 0)
	if (!p)
		goto out_put_task;
	seq_puts(m, buf);
	seq_puts(m, p);
	seq_putc(m, '\n');
	retval = 0;
out_put_task:
	put_task_struct(tsk);
out_free:
Loading