Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit edab9510 authored by Tejun Heo's avatar Tejun Heo
Browse files

cgroup: Merge branch 'memcg_event' into for-3.14



Merge v3.12 based patch series to move cgroup_event implementation to
memcg into for-3.14.  The following two commits cause a conflict in
kernel/cgroup.c

  2ff2a7d0 ("cgroup: kill css_id")
  79bd9814 ("cgroup, memcg: move cgroup_event implementation to memcg")

Each patch removes a struct definition from kernel/cgroup.c.  As the
two are adjacent, they cause a context conflict.  Easily resolved by
removing both structs.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parents e5fca243 b36824c7
Loading
Loading
Loading
Loading
+0 −20
Original line number Original line Diff line number Diff line
@@ -24,7 +24,6 @@ CONTENTS:
  2.1 Basic Usage
  2.1 Basic Usage
  2.2 Attaching processes
  2.2 Attaching processes
  2.3 Mounting hierarchies by name
  2.3 Mounting hierarchies by name
  2.4 Notification API
3. Kernel API
3. Kernel API
  3.1 Overview
  3.1 Overview
  3.2 Synchronization
  3.2 Synchronization
@@ -472,25 +471,6 @@ you give a subsystem a name.
The name of the subsystem appears as part of the hierarchy description
The name of the subsystem appears as part of the hierarchy description
in /proc/mounts and /proc/<pid>/cgroups.
in /proc/mounts and /proc/<pid>/cgroups.


2.4 Notification API
--------------------

There is mechanism which allows to get notifications about changing
status of a cgroup.

To register a new notification handler you need to:
 - create a file descriptor for event notification using eventfd(2);
 - open a control file to be monitored (e.g. memory.usage_in_bytes);
 - write "<event_fd> <control_fd> <args>" to cgroup.event_control.
   Interpretation of args is defined by control file implementation;

eventfd will be woken up by control file implementation or when the
cgroup is removed.

To unregister a notification handler just close eventfd.

NOTE: Support of notifications should be implemented for the control
file. See documentation for the subsystem.


3. Kernel API
3. Kernel API
=============
=============
+0 −24
Original line number Original line Diff line number Diff line
@@ -29,7 +29,6 @@ struct cgroup_subsys;
struct inode;
struct inode;
struct cgroup;
struct cgroup;
struct css_id;
struct css_id;
struct eventfd_ctx;


extern int cgroup_init_early(void);
extern int cgroup_init_early(void);
extern int cgroup_init(void);
extern int cgroup_init(void);
@@ -239,10 +238,6 @@ struct cgroup {
	struct rcu_head rcu_head;
	struct rcu_head rcu_head;
	struct work_struct destroy_work;
	struct work_struct destroy_work;


	/* List of events which userspace want to receive */
	struct list_head event_list;
	spinlock_t event_list_lock;

	/* directory xattrs */
	/* directory xattrs */
	struct simple_xattrs xattrs;
	struct simple_xattrs xattrs;
};
};
@@ -506,25 +501,6 @@ struct cftype {
	int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
	int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);


	int (*release)(struct inode *inode, struct file *file);
	int (*release)(struct inode *inode, struct file *file);

	/*
	 * register_event() callback will be used to add new userspace
	 * waiter for changes related to the cftype. Implement it if
	 * you want to provide this functionality. Use eventfd_signal()
	 * on eventfd to send notification to userspace.
	 */
	int (*register_event)(struct cgroup_subsys_state *css,
			      struct cftype *cft, struct eventfd_ctx *eventfd,
			      const char *args);
	/*
	 * unregister_event() callback will be called when userspace
	 * closes the eventfd or on cgroup removing.
	 * This callback must be implemented, if you want provide
	 * notification functionality.
	 */
	void (*unregister_event)(struct cgroup_subsys_state *css,
				 struct cftype *cft,
				 struct eventfd_ctx *eventfd);
};
};


/*
/*
+3 −5
Original line number Original line Diff line number Diff line
@@ -7,6 +7,7 @@
#include <linux/gfp.h>
#include <linux/gfp.h>
#include <linux/types.h>
#include <linux/types.h>
#include <linux/cgroup.h>
#include <linux/cgroup.h>
#include <linux/eventfd.h>


struct vmpressure {
struct vmpressure {
	unsigned long scanned;
	unsigned long scanned;
@@ -33,13 +34,10 @@ extern void vmpressure_init(struct vmpressure *vmpr);
extern void vmpressure_cleanup(struct vmpressure *vmpr);
extern void vmpressure_cleanup(struct vmpressure *vmpr);
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
extern int vmpressure_register_event(struct mem_cgroup *memcg,
extern int vmpressure_register_event(struct cgroup_subsys_state *css,
				     struct cftype *cft,
				     struct eventfd_ctx *eventfd,
				     struct eventfd_ctx *eventfd,
				     const char *args);
				     const char *args);
extern void vmpressure_unregister_event(struct cgroup_subsys_state *css,
extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
					struct cftype *cft,
					struct eventfd_ctx *eventfd);
					struct eventfd_ctx *eventfd);
#else
#else
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
+1 −2
Original line number Original line Diff line number Diff line
@@ -848,7 +848,6 @@ config NUMA_BALANCING


menuconfig CGROUPS
menuconfig CGROUPS
	boolean "Control Group support"
	boolean "Control Group support"
	depends on EVENTFD
	help
	help
	  This option adds support for grouping sets of processes together, for
	  This option adds support for grouping sets of processes together, for
	  use with process control subsystems such as Cpusets, CFS, memory
	  use with process control subsystems such as Cpusets, CFS, memory
@@ -915,6 +914,7 @@ config MEMCG
	bool "Memory Resource Controller for Control Groups"
	bool "Memory Resource Controller for Control Groups"
	depends on RESOURCE_COUNTERS
	depends on RESOURCE_COUNTERS
	select MM_OWNER
	select MM_OWNER
	select EVENTFD
	help
	help
	  Provides a memory resource controller that manages both anonymous
	  Provides a memory resource controller that manages both anonymous
	  memory and page cache. (See Documentation/cgroups/memory.txt)
	  memory and page cache. (See Documentation/cgroups/memory.txt)
@@ -1154,7 +1154,6 @@ config UIDGID_STRICT_TYPE_CHECKS


config SCHED_AUTOGROUP
config SCHED_AUTOGROUP
	bool "Automatic process group scheduling"
	bool "Automatic process group scheduling"
	select EVENTFD
	select CGROUPS
	select CGROUPS
	select CGROUP_SCHED
	select CGROUP_SCHED
	select FAIR_GROUP_SCHED
	select FAIR_GROUP_SCHED
+0 −259
Original line number Original line Diff line number Diff line
@@ -56,11 +56,8 @@
#include <linux/pid_namespace.h>
#include <linux/pid_namespace.h>
#include <linux/idr.h>
#include <linux/idr.h>
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
#include <linux/eventfd.h>
#include <linux/poll.h>
#include <linux/flex_array.h> /* used in cgroup_attach_task */
#include <linux/flex_array.h> /* used in cgroup_attach_task */
#include <linux/kthread.h>
#include <linux/kthread.h>
#include <linux/file.h>


#include <linux/atomic.h>
#include <linux/atomic.h>


@@ -132,36 +129,6 @@ struct cfent {
	struct simple_xattrs		xattrs;
	struct simple_xattrs		xattrs;
};
};


/*
 * cgroup_event represents events which userspace want to receive.
 */
struct cgroup_event {
	/*
	 * css which the event belongs to.
	 */
	struct cgroup_subsys_state *css;
	/*
	 * Control file which the event associated.
	 */
	struct cftype *cft;
	/*
	 * eventfd to signal userspace about the event.
	 */
	struct eventfd_ctx *eventfd;
	/*
	 * Each of these stored in a list by the cgroup.
	 */
	struct list_head list;
	/*
	 * All fields below needed to unregister event when
	 * userspace closes eventfd.
	 */
	poll_table pt;
	wait_queue_head_t *wqh;
	wait_queue_t wait;
	struct work_struct remove;
};

/* The list of hierarchy roots */
/* The list of hierarchy roots */


static LIST_HEAD(cgroup_roots);
static LIST_HEAD(cgroup_roots);
@@ -1351,8 +1318,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
	INIT_LIST_HEAD(&cgrp->pidlists);
	INIT_LIST_HEAD(&cgrp->pidlists);
	mutex_init(&cgrp->pidlist_mutex);
	mutex_init(&cgrp->pidlist_mutex);
	cgrp->dummy_css.cgroup = cgrp;
	cgrp->dummy_css.cgroup = cgrp;
	INIT_LIST_HEAD(&cgrp->event_list);
	spin_lock_init(&cgrp->event_list_lock);
	simple_xattrs_init(&cgrp->xattrs);
	simple_xattrs_init(&cgrp->xattrs);
}
}


@@ -2626,16 +2591,6 @@ static const struct inode_operations cgroup_dir_inode_operations = {
	.removexattr = cgroup_removexattr,
	.removexattr = cgroup_removexattr,
};
};


/*
 * Check if a file is a control file
 */
static inline struct cftype *__file_cft(struct file *file)
{
	if (file_inode(file)->i_fop != &cgroup_file_operations)
		return ERR_PTR(-EINVAL);
	return __d_cft(file->f_dentry);
}

static int cgroup_create_file(struct dentry *dentry, umode_t mode,
static int cgroup_create_file(struct dentry *dentry, umode_t mode,
				struct super_block *sb)
				struct super_block *sb)
{
{
@@ -3915,202 +3870,6 @@ static void cgroup_dput(struct cgroup *cgrp)
	deactivate_super(sb);
	deactivate_super(sb);
}
}


/*
 * Unregister event and free resources.
 *
 * Gets called from workqueue.
 */
static void cgroup_event_remove(struct work_struct *work)
{
	struct cgroup_event *event = container_of(work, struct cgroup_event,
			remove);
	struct cgroup_subsys_state *css = event->css;

	remove_wait_queue(event->wqh, &event->wait);

	event->cft->unregister_event(css, event->cft, event->eventfd);

	/* Notify userspace the event is going away. */
	eventfd_signal(event->eventfd, 1);

	eventfd_ctx_put(event->eventfd);
	kfree(event);
	css_put(css);
}

/*
 * Gets called on POLLHUP on eventfd when user closes it.
 *
 * Called with wqh->lock held and interrupts disabled.
 */
static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
		int sync, void *key)
{
	struct cgroup_event *event = container_of(wait,
			struct cgroup_event, wait);
	struct cgroup *cgrp = event->css->cgroup;
	unsigned long flags = (unsigned long)key;

	if (flags & POLLHUP) {
		/*
		 * If the event has been detached at cgroup removal, we
		 * can simply return knowing the other side will cleanup
		 * for us.
		 *
		 * We can't race against event freeing since the other
		 * side will require wqh->lock via remove_wait_queue(),
		 * which we hold.
		 */
		spin_lock(&cgrp->event_list_lock);
		if (!list_empty(&event->list)) {
			list_del_init(&event->list);
			/*
			 * We are in atomic context, but cgroup_event_remove()
			 * may sleep, so we have to call it in workqueue.
			 */
			schedule_work(&event->remove);
		}
		spin_unlock(&cgrp->event_list_lock);
	}

	return 0;
}

static void cgroup_event_ptable_queue_proc(struct file *file,
		wait_queue_head_t *wqh, poll_table *pt)
{
	struct cgroup_event *event = container_of(pt,
			struct cgroup_event, pt);

	event->wqh = wqh;
	add_wait_queue(wqh, &event->wait);
}

/*
 * Parse input and register new cgroup event handler.
 *
 * Input must be in format '<event_fd> <control_fd> <args>'.
 * Interpretation of args is defined by control file implementation.
 */
static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
				      struct cftype *cft, const char *buffer)
{
	struct cgroup *cgrp = dummy_css->cgroup;
	struct cgroup_event *event;
	struct cgroup_subsys_state *cfile_css;
	unsigned int efd, cfd;
	struct fd efile;
	struct fd cfile;
	char *endp;
	int ret;

	efd = simple_strtoul(buffer, &endp, 10);
	if (*endp != ' ')
		return -EINVAL;
	buffer = endp + 1;

	cfd = simple_strtoul(buffer, &endp, 10);
	if ((*endp != ' ') && (*endp != '\0'))
		return -EINVAL;
	buffer = endp + 1;

	event = kzalloc(sizeof(*event), GFP_KERNEL);
	if (!event)
		return -ENOMEM;

	INIT_LIST_HEAD(&event->list);
	init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
	init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
	INIT_WORK(&event->remove, cgroup_event_remove);

	efile = fdget(efd);
	if (!efile.file) {
		ret = -EBADF;
		goto out_kfree;
	}

	event->eventfd = eventfd_ctx_fileget(efile.file);
	if (IS_ERR(event->eventfd)) {
		ret = PTR_ERR(event->eventfd);
		goto out_put_efile;
	}

	cfile = fdget(cfd);
	if (!cfile.file) {
		ret = -EBADF;
		goto out_put_eventfd;
	}

	/* the process need read permission on control file */
	/* AV: shouldn't we check that it's been opened for read instead? */
	ret = inode_permission(file_inode(cfile.file), MAY_READ);
	if (ret < 0)
		goto out_put_cfile;

	event->cft = __file_cft(cfile.file);
	if (IS_ERR(event->cft)) {
		ret = PTR_ERR(event->cft);
		goto out_put_cfile;
	}

	if (!event->cft->ss) {
		ret = -EBADF;
		goto out_put_cfile;
	}

	/*
	 * Determine the css of @cfile, verify it belongs to the same
	 * cgroup as cgroup.event_control, and associate @event with it.
	 * Remaining events are automatically removed on cgroup destruction
	 * but the removal is asynchronous, so take an extra ref.
	 */
	rcu_read_lock();

	ret = -EINVAL;
	event->css = cgroup_css(cgrp, event->cft->ss);
	cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
	if (event->css && event->css == cfile_css && css_tryget(event->css))
		ret = 0;

	rcu_read_unlock();
	if (ret)
		goto out_put_cfile;

	if (!event->cft->register_event || !event->cft->unregister_event) {
		ret = -EINVAL;
		goto out_put_css;
	}

	ret = event->cft->register_event(event->css, event->cft,
			event->eventfd, buffer);
	if (ret)
		goto out_put_css;

	efile.file->f_op->poll(efile.file, &event->pt);

	spin_lock(&cgrp->event_list_lock);
	list_add(&event->list, &cgrp->event_list);
	spin_unlock(&cgrp->event_list_lock);

	fdput(cfile);
	fdput(efile);

	return 0;

out_put_css:
	css_put(event->css);
out_put_cfile:
	fdput(cfile);
out_put_eventfd:
	eventfd_ctx_put(event->eventfd);
out_put_efile:
	fdput(efile);
out_kfree:
	kfree(event);

	return ret;
}

static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
				      struct cftype *cft)
				      struct cftype *cft)
{
{
@@ -4135,11 +3894,6 @@ static struct cftype cgroup_base_files[] = {
		.release = cgroup_pidlist_release,
		.release = cgroup_pidlist_release,
		.mode = S_IRUGO | S_IWUSR,
		.mode = S_IRUGO | S_IWUSR,
	},
	},
	{
		.name = "cgroup.event_control",
		.write_string = cgroup_write_event_control,
		.mode = S_IWUGO,
	},
	{
	{
		.name = "cgroup.clone_children",
		.name = "cgroup.clone_children",
		.flags = CFTYPE_INSANE,
		.flags = CFTYPE_INSANE,
@@ -4610,7 +4364,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
{
{
	struct dentry *d = cgrp->dentry;
	struct dentry *d = cgrp->dentry;
	struct cgroup_event *event, *tmp;
	struct cgroup_subsys *ss;
	struct cgroup_subsys *ss;
	struct cgroup *child;
	struct cgroup *child;
	bool empty;
	bool empty;
@@ -4685,18 +4438,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
	dget(d);
	dget(d);
	cgroup_d_remove_dir(d);
	cgroup_d_remove_dir(d);


	/*
	 * Unregister events and notify userspace.
	 * Notify userspace about cgroup removing only after rmdir of cgroup
	 * directory to avoid race between userspace and kernelspace.
	 */
	spin_lock(&cgrp->event_list_lock);
	list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
		list_del_init(&event->list);
		schedule_work(&event->remove);
	}
	spin_unlock(&cgrp->event_list_lock);

	return 0;
	return 0;
};
};


Loading