Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 817929ec authored by Paul Menage's avatar Paul Menage Committed by Linus Torvalds
Browse files

Task Control Groups: shared cgroup subsystem group arrays



Replace the struct css_set embedded in task_struct with a pointer; all tasks
that have the same set of memberships across all hierarchies will share a
css_set object, and will be linked via their css_sets field to the "tasks"
list_head in the css_set.

Assuming that many tasks share the same cgroup assignments, this reduces
overall space usage and keeps the size of the task_struct down (three pointers
added to task_struct compared to a non-cgroups kernel, no matter how many
subsystems are registered).

[akpm@linux-foundation.org: fix a printk]
[akpm@linux-foundation.org: build fix]
Signed-off-by: default avatarPaul Menage <menage@google.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a424316c
Loading
Loading
Loading
Loading
+13 −1
Original line number Diff line number Diff line
@@ -176,7 +176,9 @@ Control Groups extends the kernel as follows:
   subsystem state is something that's expected to happen frequently
   and in performance-critical code, whereas operations that require a
   task's actual cgroup assignments (in particular, moving between
   cgroups) are less common.
   cgroups) are less common. A linked list runs through the cg_list
   field of each task_struct using the css_set, anchored at
   css_set->tasks.

 - A cgroup hierarchy filesystem can be mounted  for browsing and
   manipulation from user space.
@@ -252,6 +254,16 @@ linear search to locate an appropriate existing css_set, so isn't
very efficient. A future version will use a hash table for better
performance.

To allow access from a cgroup to the css_sets (and hence tasks)
that comprise it, a set of cg_cgroup_link objects form a lattice;
each cg_cgroup_link is linked into a list of cg_cgroup_links for
a single cgroup on its cont_link_list field, and a list of
cg_cgroup_links for a single css_set on its cg_link_list.

Thus the set of tasks in a cgroup can be listed by iterating over
each css_set that references the cgroup, and sub-iterating over
each css_set's task set.

The use of a Linux virtual file system (vfs) to represent the
cgroup hierarchy provides for a familiar permission and name space
for cgroups, with a minimum of additional kernel code.
+79 −10
Original line number Diff line number Diff line
@@ -27,10 +27,19 @@ extern void cgroup_lock(void);
extern void cgroup_unlock(void);
extern void cgroup_fork(struct task_struct *p);
extern void cgroup_fork_callbacks(struct task_struct *p);
extern void cgroup_post_fork(struct task_struct *p);
extern void cgroup_exit(struct task_struct *p, int run_callbacks);

extern struct file_operations proc_cgroup_operations;

/* Define the enumeration of all cgroup subsystems */
#define SUBSYS(_x) _x ## _subsys_id,
enum cgroup_subsys_id {
#include <linux/cgroup_subsys.h>
	CGROUP_SUBSYS_COUNT
};
#undef SUBSYS

/* Per-subsystem/per-cgroup state maintained by the system. */
struct cgroup_subsys_state {
	/* The cgroup that this subsystem is attached to. Useful
@@ -97,6 +106,52 @@ struct cgroup {

	struct cgroupfs_root *root;
	struct cgroup *top_cgroup;

	/*
	 * List of cg_cgroup_links pointing at css_sets with
	 * tasks in this cgroup. Protected by css_set_lock
	 */
	struct list_head css_sets;
};

/* A css_set is a structure holding pointers to a set of
 * cgroup_subsys_state objects. This saves space in the task struct
 * object and speeds up fork()/exit(), since a single inc/dec and a
 * list_add()/del() can bump the reference count on the entire
 * cgroup set for a task.
 */

struct css_set {

	/* Reference count */
	struct kref ref;

	/*
	 * List running through all cgroup groups. Protected by
	 * css_set_lock
	 */
	struct list_head list;

	/*
	 * List running through all tasks using this cgroup
	 * group. Protected by css_set_lock
	 */
	struct list_head tasks;

	/*
	 * List of cg_cgroup_link objects on link chains from
	 * cgroups referenced from this css_set. Protected by
	 * css_set_lock
	 */
	struct list_head cg_links;

	/*
	 * Set of subsystem states, one for each subsystem. This array
	 * is immutable after creation apart from the init_css_set
	 * during subsystem registration (at boot time).
	 */
	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];

};

/* struct cftype:
@@ -157,15 +212,7 @@ int cgroup_is_removed(const struct cgroup *cont);

int cgroup_path(const struct cgroup *cont, char *buf, int buflen);

int __cgroup_task_count(const struct cgroup *cont);
static inline int cgroup_task_count(const struct cgroup *cont)
{
	int task_count;
	rcu_read_lock();
	task_count = __cgroup_task_count(cont);
	rcu_read_unlock();
	return task_count;
}
int cgroup_task_count(const struct cgroup *cont);

/* Return true if the cgroup is a descendant of the current cgroup */
int cgroup_is_descendant(const struct cgroup *cont);
@@ -213,7 +260,7 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
static inline struct cgroup_subsys_state *task_subsys_state(
	struct task_struct *task, int subsys_id)
{
	return rcu_dereference(task->cgroups.subsys[subsys_id]);
	return rcu_dereference(task->cgroups->subsys[subsys_id]);
}

static inline struct cgroup* task_cgroup(struct task_struct *task,
@@ -226,6 +273,27 @@ int cgroup_path(const struct cgroup *cont, char *buf, int buflen);

int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);

/* A cgroup_iter should be treated as an opaque object */
struct cgroup_iter {
	struct list_head *cg_link;
	struct list_head *task;
};

/* To iterate across the tasks in a cgroup:
 *
 * 1) call cgroup_iter_start to intialize an iterator
 *
 * 2) call cgroup_iter_next() to retrieve member tasks until it
 *    returns NULL or until you want to end the iteration
 *
 * 3) call cgroup_iter_end() to destroy the iterator.
 */
void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it);
struct task_struct *cgroup_iter_next(struct cgroup *cont,
					struct cgroup_iter *it);
void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it);


#else /* !CONFIG_CGROUPS */

static inline int cgroup_init_early(void) { return 0; }
@@ -233,6 +301,7 @@ static inline int cgroup_init(void) { return 0; }
static inline void cgroup_init_smp(void) {}
static inline void cgroup_fork(struct task_struct *p) {}
static inline void cgroup_fork_callbacks(struct task_struct *p) {}
static inline void cgroup_post_fork(struct task_struct *p) {}
static inline void cgroup_exit(struct task_struct *p, int callbacks) {}

static inline void cgroup_lock(void) {}
+4 −29
Original line number Diff line number Diff line
@@ -894,34 +894,6 @@ struct sched_entity {
#endif
};

#ifdef CONFIG_CGROUPS

#define SUBSYS(_x) _x ## _subsys_id,
enum cgroup_subsys_id {
#include <linux/cgroup_subsys.h>
	CGROUP_SUBSYS_COUNT
};
#undef SUBSYS

/* A css_set is a structure holding pointers to a set of
 * cgroup_subsys_state objects.
 */

struct css_set {

	/* Set of subsystem states, one for each subsystem. NULL for
	 * subsystems that aren't part of this hierarchy. These
	 * pointers reduce the number of dereferences required to get
	 * from a task to its state for a given cgroup, but result
	 * in increased space usage if tasks are in wildly different
	 * groupings across different hierarchies. This array is
	 * immutable after creation */
	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];

};

#endif /* CONFIG_CGROUPS */

struct task_struct {
	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
	void *stack;
@@ -1159,7 +1131,10 @@ struct task_struct {
	int cpuset_mem_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
	struct css_set cgroups;
	/* Control Group info protected by css_set_lock */
	struct css_set *cgroups;
	/* cg_list protected by css_set_lock and tsk->alloc_lock */
	struct list_head cg_list;
#endif
#ifdef CONFIG_FUTEX
	struct robust_list_head __user *robust_list;
+535 −114

File changed.

Preview size limit exceeded, changes collapsed.

+1 −0
Original line number Diff line number Diff line
@@ -1301,6 +1301,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
	spin_unlock(&current->sighand->siglock);
	write_unlock_irq(&tasklist_lock);
	proc_fork_connector(p);
	cgroup_post_fork(p);
	return p;

bad_fork_cleanup_namespaces: