Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a9986464 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull cgroup changes from Tejun Heo:
 "Not too much activity this time around.  css_id is finally killed and
  a minor update to device_cgroup"

* 'for-3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  device_cgroup: remove can_attach
  cgroup: kill css_id
  memcg: stop using css id
  memcg: fail to create cgroup if the cgroup id is too big
  memcg: convert to use cgroup id
  memcg: convert to use cgroup_is_descendant()
parents 13aa7e0b 73ba3534
Loading
Loading
Loading
Loading
+0 −37
Original line number Diff line number Diff line
@@ -612,11 +612,6 @@ struct cgroup_subsys {
	int subsys_id;
	int disabled;
	int early_init;
	/*
	 * True if this subsys uses ID. ID is not available before cgroup_init()
	 * (not available in early_init time.)
	 */
	bool use_id;

	/*
	 * If %false, this subsystem is properly hierarchical -
@@ -642,9 +637,6 @@ struct cgroup_subsys {
	 */
	struct cgroupfs_root *root;
	struct list_head sibling;
	/* used when use_id == true */
	struct idr idr;
	spinlock_t id_lock;

	/* list of cftype_sets */
	struct list_head cftsets;
@@ -875,35 +867,6 @@ int css_scan_tasks(struct cgroup_subsys_state *css,
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);

/*
 * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
 * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
 * CSS ID is assigned at cgroup allocation (create) automatically
 * and removed when subsys calls free_css_id() function. This is because
 * the lifetime of cgroup_subsys_state is subsys's matter.
 *
 * Looking up and scanning function should be called under rcu_read_lock().
 * Taking cgroup_mutex is not necessary for following calls.
 * But the css returned by this routine can be "not populated yet" or "being
 * destroyed". The caller should check css and cgroup's status.
 */

/*
 * Typically Called at ->destroy(), or somewhere the subsys frees
 * cgroup_subsys_state.
 */
void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);

/* Find a cgroup_subsys_state which has given ID */

struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);

/* Returns true if root is ancestor of cg */
bool css_is_ancestor(struct cgroup_subsys_state *cg,
		     const struct cgroup_subsys_state *root);

/* Get id and depth of css */
unsigned short css_id(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
					 struct cgroup_subsys *ss);

+1 −247
Original line number Diff line number Diff line
@@ -124,38 +124,6 @@ struct cfent {
	struct simple_xattrs		xattrs;
};

/*
 * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
 * cgroup_subsys->use_id != 0.
 */
#define CSS_ID_MAX	(65535)
struct css_id {
	/*
	 * The css to which this ID points. This pointer is set to valid value
	 * after cgroup is populated. If cgroup is removed, this will be NULL.
	 * This pointer is expected to be RCU-safe because destroy()
	 * is called after synchronize_rcu(). But for safe use, css_tryget()
	 * should be used for avoiding race.
	 */
	struct cgroup_subsys_state __rcu *css;
	/*
	 * ID of this css.
	 */
	unsigned short id;
	/*
	 * Depth in hierarchy which this ID belongs to.
	 */
	unsigned short depth;
	/*
	 * ID is freed by RCU. (and lookup routine is RCU safe.)
	 */
	struct rcu_head rcu_head;
	/*
	 * Hierarchy of CSS ID belongs to.
	 */
	unsigned short stack[0]; /* Array of Length (depth+1) */
};

/*
 * cgroup_event represents events which userspace want to receive.
 */
@@ -387,9 +355,6 @@ struct cgrp_cset_link {
static struct css_set init_css_set;
static struct cgrp_cset_link init_cgrp_cset_link;

static int cgroup_init_idr(struct cgroup_subsys *ss,
			   struct cgroup_subsys_state *css);

/*
 * css_set_lock protects the list of css_set objects, and the chain of
 * tasks off each css_set.  Nests outside task->alloc_lock due to
@@ -841,8 +806,6 @@ static struct backing_dev_info cgroup_backing_dev_info = {
	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
};

static int alloc_css_id(struct cgroup_subsys_state *child_css);

static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
{
	struct inode *inode = new_inode(sb);
@@ -4240,21 +4203,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
				goto err;
		}
	}

	/* This cgroup is ready now */
	for_each_root_subsys(cgrp->root, ss) {
		struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
		struct css_id *id = rcu_dereference_protected(css->id, true);

		/*
		 * Update id->css pointer and make this css visible from
		 * CSS ID functions. This pointer will be dereferened
		 * from RCU-read-side without locks.
		 */
		if (id)
			rcu_assign_pointer(id->css, css);
	}

	return 0;
err:
	cgroup_clear_dir(cgrp, subsys_mask);
@@ -4323,7 +4271,6 @@ static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
	css->cgroup = cgrp;
	css->ss = ss;
	css->flags = 0;
	css->id = NULL;

	if (cgrp->parent)
		css->parent = cgroup_css(cgrp->parent, ss);
@@ -4455,12 +4402,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
			goto err_free_all;

		init_css(css, ss, cgrp);

		if (ss->use_id) {
			err = alloc_css_id(css);
			if (err)
				goto err_free_all;
		}
	}

	/*
@@ -4925,12 +4866,6 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)

	/* our new subsystem will be attached to the dummy hierarchy. */
	init_css(css, ss, cgroup_dummy_top);
	/* init_idr must be after init_css() because it sets css->id. */
	if (ss->use_id) {
		ret = cgroup_init_idr(ss, css);
		if (ret)
			goto err_unload;
	}

	/*
	 * Now we need to entangle the css into the existing css_sets. unlike
@@ -4996,9 +4931,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)

	offline_css(cgroup_css(cgroup_dummy_top, ss));

	if (ss->use_id)
		idr_destroy(&ss->idr);

	/* deassign the subsys_id */
	cgroup_subsys[ss->subsys_id] = NULL;

@@ -5025,8 +4957,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
	/*
	 * remove subsystem's css from the cgroup_dummy_top and free it -
	 * need to free before marking as null because ss->css_free needs
	 * the cgrp->subsys pointer to find their state. note that this
	 * also takes care of freeing the css_id.
	 * the cgrp->subsys pointer to find their state.
	 */
	ss->css_free(cgroup_css(cgroup_dummy_top, ss));
	RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
@@ -5097,8 +5028,6 @@ int __init cgroup_init(void)
	for_each_builtin_subsys(ss, i) {
		if (!ss->early_init)
			cgroup_init_subsys(ss);
		if (ss->use_id)
			cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
	}

	/* allocate id for the dummy hierarchy */
@@ -5518,181 +5447,6 @@ static int __init cgroup_disable(char *str)
}
__setup("cgroup_disable=", cgroup_disable);

/*
 * Functons for CSS ID.
 */

/* to get ID other than 0, this should be called when !cgroup_is_dead() */
unsigned short css_id(struct cgroup_subsys_state *css)
{
	struct css_id *cssid;

	/*
	 * This css_id() can return correct value when somone has refcnt
	 * on this or this is under rcu_read_lock(). Once css->id is allocated,
	 * it's unchanged until freed.
	 */
	cssid = rcu_dereference_raw(css->id);

	if (cssid)
		return cssid->id;
	return 0;
}
EXPORT_SYMBOL_GPL(css_id);

/**
 *  css_is_ancestor - test "root" css is an ancestor of "child"
 * @child: the css to be tested.
 * @root: the css supporsed to be an ancestor of the child.
 *
 * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
 * this function reads css->id, the caller must hold rcu_read_lock().
 * But, considering usual usage, the csses should be valid objects after test.
 * Assuming that the caller will do some action to the child if this returns
 * returns true, the caller must take "child";s reference count.
 * If "child" is valid object and this returns true, "root" is valid, too.
 */

bool css_is_ancestor(struct cgroup_subsys_state *child,
		    const struct cgroup_subsys_state *root)
{
	struct css_id *child_id;
	struct css_id *root_id;

	child_id  = rcu_dereference(child->id);
	if (!child_id)
		return false;
	root_id = rcu_dereference(root->id);
	if (!root_id)
		return false;
	if (child_id->depth < root_id->depth)
		return false;
	if (child_id->stack[root_id->depth] != root_id->id)
		return false;
	return true;
}

void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
{
	struct css_id *id = rcu_dereference_protected(css->id, true);

	/* When this is called before css_id initialization, id can be NULL */
	if (!id)
		return;

	BUG_ON(!ss->use_id);

	rcu_assign_pointer(id->css, NULL);
	rcu_assign_pointer(css->id, NULL);
	spin_lock(&ss->id_lock);
	idr_remove(&ss->idr, id->id);
	spin_unlock(&ss->id_lock);
	kfree_rcu(id, rcu_head);
}
EXPORT_SYMBOL_GPL(free_css_id);

/*
 * This is called by init or create(). Then, calls to this function are
 * always serialized (By cgroup_mutex() at create()).
 */

static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
{
	struct css_id *newid;
	int ret, size;

	BUG_ON(!ss->use_id);

	size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
	newid = kzalloc(size, GFP_KERNEL);
	if (!newid)
		return ERR_PTR(-ENOMEM);

	idr_preload(GFP_KERNEL);
	spin_lock(&ss->id_lock);
	/* Don't use 0. allocates an ID of 1-65535 */
	ret = idr_alloc(&ss->idr, newid, 1, CSS_ID_MAX + 1, GFP_NOWAIT);
	spin_unlock(&ss->id_lock);
	idr_preload_end();

	/* Returns error when there are no free spaces for new ID.*/
	if (ret < 0)
		goto err_out;

	newid->id = ret;
	newid->depth = depth;
	return newid;
err_out:
	kfree(newid);
	return ERR_PTR(ret);

}

static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
					    struct cgroup_subsys_state *rootcss)
{
	struct css_id *newid;

	spin_lock_init(&ss->id_lock);
	idr_init(&ss->idr);

	newid = get_new_cssid(ss, 0);
	if (IS_ERR(newid))
		return PTR_ERR(newid);

	newid->stack[0] = newid->id;
	RCU_INIT_POINTER(newid->css, rootcss);
	RCU_INIT_POINTER(rootcss->id, newid);
	return 0;
}

static int alloc_css_id(struct cgroup_subsys_state *child_css)
{
	struct cgroup_subsys_state *parent_css = css_parent(child_css);
	struct css_id *child_id, *parent_id;
	int i, depth;

	parent_id = rcu_dereference_protected(parent_css->id, true);
	depth = parent_id->depth + 1;

	child_id = get_new_cssid(child_css->ss, depth);
	if (IS_ERR(child_id))
		return PTR_ERR(child_id);

	for (i = 0; i < depth; i++)
		child_id->stack[i] = parent_id->stack[i];
	child_id->stack[depth] = child_id->id;
	/*
	 * child_id->css pointer will be set after this cgroup is available
	 * see cgroup_populate_dir()
	 */
	rcu_assign_pointer(child_css->id, child_id);

	return 0;
}

/**
 * css_lookup - lookup css by id
 * @ss: cgroup subsys to be looked into.
 * @id: the id
 *
 * Returns pointer to cgroup_subsys_state if there is valid one with id.
 * NULL if not. Should be called under rcu_read_lock()
 */
struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
{
	struct css_id *cssid = NULL;

	BUG_ON(!ss->use_id);
	cssid = idr_find(&ss->idr, id);

	if (unlikely(!cssid))
		return NULL;

	return rcu_dereference(cssid->css);
}
EXPORT_SYMBOL_GPL(css_lookup);

/**
 * css_from_dir - get corresponding css from the dentry of a cgroup dir
 * @dentry: directory dentry of interest
+40 −26
Original line number Diff line number Diff line
@@ -499,6 +499,29 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
	return (memcg == root_mem_cgroup);
}

/*
 * We restrict the id in the range of [1, 65535], so it can fit into
 * an unsigned short.
 */
#define MEM_CGROUP_ID_MAX	USHRT_MAX

static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
{
	/*
	 * The ID of the root cgroup is 0, but memcg treat 0 as an
	 * invalid ID, so we return (cgroup_id + 1).
	 */
	return memcg->css.cgroup->id + 1;
}

static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
{
	struct cgroup_subsys_state *css;

	css = css_from_id(id - 1, &mem_cgroup_subsys);
	return mem_cgroup_from_css(css);
}

/* Writing them here to avoid exposing memcg's inner layout */
#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)

@@ -570,17 +593,12 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
#ifdef CONFIG_MEMCG_KMEM
/*
 * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
 * There are two main reasons for not using the css_id for this:
 *  1) this works better in sparse environments, where we have a lot of memcgs,
 * The main reason for not using cgroup id for this:
 *  this works better in sparse environments, where we have a lot of memcgs,
 *  but only a few kmem-limited. Or also, if we have, for instance, 200
 *  memcgs, and none but the 200th is kmem-limited, we'd have to have a
 *  200 entry array for that.
 *
 *  2) In order not to violate the cgroup API, we would like to do all memory
 *     allocation in ->create(). At that point, we haven't yet allocated the
 *     css_id. Having a separate index prevents us from messing with the cgroup
 *     core for this
 *
 * The current size of the caches array is stored in
 * memcg_limited_groups_array_size.  It will double each time we have to
 * increase it.
@@ -594,14 +612,14 @@ int memcg_limited_groups_array_size;
 * cgroups is a reasonable guess. In the future, it could be a parameter or
 * tunable, but that is strictly not necessary.
 *
 * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
 * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get
 * this constant directly from cgroup, but it is understandable that this is
 * better kept as an internal representation in cgroup.c. In any case, the
 * css_id space is not getting any smaller, and we don't have to necessarily
 * cgrp_id space is not getting any smaller, and we don't have to necessarily
 * increase ours as well if it increases.
 */
#define MEMCG_CACHES_MIN_SIZE 4
#define MEMCG_CACHES_MAX_SIZE 65535
#define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX

/*
 * A lot of the calls to the cache allocation functions are expected to be
@@ -1408,7 +1426,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
		return true;
	if (!root_memcg->use_hierarchy || !memcg)
		return false;
	return css_is_ancestor(&memcg->css, &root_memcg->css);
	return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
}

static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
@@ -2826,15 +2844,10 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
 */
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
{
	struct cgroup_subsys_state *css;

	/* ID 0 is unused ID */
	if (!id)
		return NULL;
	css = css_lookup(&mem_cgroup_subsys, id);
	if (!css)
		return NULL;
	return mem_cgroup_from_css(css);
	return mem_cgroup_from_id(id);
}

struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
@@ -4350,7 +4363,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
	 * css_get() was called in uncharge().
	 */
	if (do_swap_account && swapout && memcg)
		swap_cgroup_record(ent, css_id(&memcg->css));
		swap_cgroup_record(ent, mem_cgroup_id(memcg));
}
#endif

@@ -4402,8 +4415,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
{
	unsigned short old_id, new_id;

	old_id = css_id(&from->css);
	new_id = css_id(&to->css);
	old_id = mem_cgroup_id(from);
	new_id = mem_cgroup_id(to);

	if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
		mem_cgroup_swap_statistics(from, false);
@@ -6166,7 +6179,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
	size_t size = memcg_size();

	mem_cgroup_remove_from_trees(memcg);
	free_css_id(&mem_cgroup_subsys, &memcg->css);

	for_each_node(node)
		free_mem_cgroup_per_zone_info(memcg, node);
@@ -6269,6 +6281,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
	int error = 0;

	if (css->cgroup->id > MEM_CGROUP_ID_MAX)
		return -ENOSPC;

	if (!parent)
		return 0;

@@ -6540,7 +6555,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
	}
	/* There is a swap entry and a page doesn't exist or isn't charged */
	if (ent.val && !ret &&
			css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) {
	    mem_cgroup_id(mc.from) == lookup_swap_cgroup_id(ent)) {
		ret = MC_TARGET_SWAP;
		if (target)
			target->ent = ent;
@@ -6960,7 +6975,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
	.bind = mem_cgroup_bind,
	.base_cftypes = mem_cgroup_files,
	.early_init = 0,
	.use_id = 1,
};

#ifdef CONFIG_MEMCG_SWAP
+0 −11
Original line number Diff line number Diff line
@@ -63,16 +63,6 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)

struct cgroup_subsys devices_subsys;

static int devcgroup_can_attach(struct cgroup_subsys_state *new_css,
				struct cgroup_taskset *set)
{
	struct task_struct *task = cgroup_taskset_first(set);

	if (current != task && !capable(CAP_SYS_ADMIN))
		return -EPERM;
	return 0;
}

/*
 * called under devcgroup_mutex
 */
@@ -697,7 +687,6 @@ static struct cftype dev_cgroup_files[] = {

struct cgroup_subsys devices_subsys = {
	.name = "devices",
	.can_attach = devcgroup_can_attach,
	.css_alloc = devcgroup_css_alloc,
	.css_free = devcgroup_css_free,
	.css_online = devcgroup_online,