cgroup: Merge branch 'memcg_event' into for-3.14 (edab9510) · Commits · e / devices / android_kernel_xiaomi_nabu

Documentation/cgroups/cgroups.txt

+0 −20

Original line number	Diff line number	Diff line
		@@ -24,7 +24,6 @@ CONTENTS:
		2.1 Basic Usage
		2.2 Attaching processes
		2.3 Mounting hierarchies by name
		2.4 Notification API
		3. Kernel API
		3.1 Overview
		3.2 Synchronization
		@@ -472,25 +471,6 @@ you give a subsystem a name.
		The name of the subsystem appears as part of the hierarchy description
		in /proc/mounts and /proc/<pid>/cgroups.

		2.4 Notification API
		--------------------

		There is mechanism which allows to get notifications about changing
		status of a cgroup.

		To register a new notification handler you need to:
		- create a file descriptor for event notification using eventfd(2);
		- open a control file to be monitored (e.g. memory.usage_in_bytes);
		- write "<event_fd> <control_fd> <args>" to cgroup.event_control.
		Interpretation of args is defined by control file implementation;

		eventfd will be woken up by control file implementation or when the
		cgroup is removed.

		To unregister a notification handler just close eventfd.

		NOTE: Support of notifications should be implemented for the control
		file. See documentation for the subsystem.

		3. Kernel API
		=============

include/linux/cgroup.h

+0 −24

Original line number	Diff line number	Diff line
		@@ -29,7 +29,6 @@ struct cgroup_subsys;
		struct inode;
		struct cgroup;
		struct css_id;
		struct eventfd_ctx;

		extern int cgroup_init_early(void);
		extern int cgroup_init(void);
		@@ -239,10 +238,6 @@ struct cgroup {
		struct rcu_head rcu_head;
		struct work_struct destroy_work;

		/* List of events which userspace want to receive */
		struct list_head event_list;
		spinlock_t event_list_lock;

		/* directory xattrs */
		struct simple_xattrs xattrs;
		};
		@@ -506,25 +501,6 @@ struct cftype {
		int (trigger)(struct cgroup_subsys_state css, unsigned int event);

		int (release)(struct inode inode, struct file *file);

		/*
		* register_event() callback will be used to add new userspace
		* waiter for changes related to the cftype. Implement it if
		* you want to provide this functionality. Use eventfd_signal()
		* on eventfd to send notification to userspace.
		*/
		int (register_event)(struct cgroup_subsys_state css,
		struct cftype cft, struct eventfd_ctx eventfd,
		const char *args);
		/*
		* unregister_event() callback will be called when userspace
		* closes the eventfd or on cgroup removing.
		* This callback must be implemented, if you want provide
		* notification functionality.
		*/
		void (unregister_event)(struct cgroup_subsys_state css,
		struct cftype *cft,
		struct eventfd_ctx *eventfd);
		};

		/*

include/linux/vmpressure.h

+3 −5

Original line number	Diff line number	Diff line
		@@ -7,6 +7,7 @@
		#include <linux/gfp.h>
		#include <linux/types.h>
		#include <linux/cgroup.h>
		#include <linux/eventfd.h>

		struct vmpressure {
		unsigned long scanned;
		@@ -33,13 +34,10 @@ extern void vmpressure_init(struct vmpressure *vmpr);
		extern void vmpressure_cleanup(struct vmpressure *vmpr);
		extern struct vmpressure memcg_to_vmpressure(struct mem_cgroup memcg);
		extern struct cgroup_subsys_state vmpressure_to_css(struct vmpressure vmpr);
		extern struct vmpressure css_to_vmpressure(struct cgroup_subsys_state css);
		extern int vmpressure_register_event(struct cgroup_subsys_state *css,
		struct cftype *cft,
		extern int vmpressure_register_event(struct mem_cgroup *memcg,
		struct eventfd_ctx *eventfd,
		const char *args);
		extern void vmpressure_unregister_event(struct cgroup_subsys_state *css,
		struct cftype *cft,
		extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
		struct eventfd_ctx *eventfd);
		#else
		static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,

init/Kconfig

+1 −2

Original line number	Diff line number	Diff line
		@@ -848,7 +848,6 @@ config NUMA_BALANCING

		menuconfig CGROUPS
		boolean "Control Group support"
		depends on EVENTFD
		help
		This option adds support for grouping sets of processes together, for
		use with process control subsystems such as Cpusets, CFS, memory
		@@ -915,6 +914,7 @@ config MEMCG
		bool "Memory Resource Controller for Control Groups"
		depends on RESOURCE_COUNTERS
		select MM_OWNER
		select EVENTFD
		help
		Provides a memory resource controller that manages both anonymous
		memory and page cache. (See Documentation/cgroups/memory.txt)
		@@ -1154,7 +1154,6 @@ config UIDGID_STRICT_TYPE_CHECKS

		config SCHED_AUTOGROUP
		bool "Automatic process group scheduling"
		select EVENTFD
		select CGROUPS
		select CGROUP_SCHED
		select FAIR_GROUP_SCHED

kernel/cgroup.c

+0 −259

Original line number	Diff line number	Diff line
		@@ -56,11 +56,8 @@
		#include <linux/pid_namespace.h>
		#include <linux/idr.h>
		#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
		#include <linux/eventfd.h>
		#include <linux/poll.h>
		#include <linux/flex_array.h> /* used in cgroup_attach_task */
		#include <linux/kthread.h>
		#include <linux/file.h>

		#include <linux/atomic.h>

		@@ -132,36 +129,6 @@ struct cfent {
		struct simple_xattrs xattrs;
		};

		/*
		* cgroup_event represents events which userspace want to receive.
		*/
		struct cgroup_event {
		/*
		* css which the event belongs to.
		*/
		struct cgroup_subsys_state *css;
		/*
		* Control file which the event associated.
		*/
		struct cftype *cft;
		/*
		* eventfd to signal userspace about the event.
		*/
		struct eventfd_ctx *eventfd;
		/*
		* Each of these stored in a list by the cgroup.
		*/
		struct list_head list;
		/*
		* All fields below needed to unregister event when
		* userspace closes eventfd.
		*/
		poll_table pt;
		wait_queue_head_t *wqh;
		wait_queue_t wait;
		struct work_struct remove;
		};

		/* The list of hierarchy roots */

		static LIST_HEAD(cgroup_roots);
		@@ -1351,8 +1318,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
		INIT_LIST_HEAD(&cgrp->pidlists);
		mutex_init(&cgrp->pidlist_mutex);
		cgrp->dummy_css.cgroup = cgrp;
		INIT_LIST_HEAD(&cgrp->event_list);
		spin_lock_init(&cgrp->event_list_lock);
		simple_xattrs_init(&cgrp->xattrs);
		}

		@@ -2626,16 +2591,6 @@ static const struct inode_operations cgroup_dir_inode_operations = {
		.removexattr = cgroup_removexattr,
		};

		/*
		* Check if a file is a control file
		*/
		static inline struct cftype __file_cft(struct file file)
		{
		if (file_inode(file)->i_fop != &cgroup_file_operations)
		return ERR_PTR(-EINVAL);
		return __d_cft(file->f_dentry);
		}

		static int cgroup_create_file(struct dentry *dentry, umode_t mode,
		struct super_block *sb)
		{
		@@ -3915,202 +3870,6 @@ static void cgroup_dput(struct cgroup *cgrp)
		deactivate_super(sb);
		}

		/*
		* Unregister event and free resources.
		*
		* Gets called from workqueue.
		*/
		static void cgroup_event_remove(struct work_struct *work)
		{
		struct cgroup_event *event = container_of(work, struct cgroup_event,
		remove);
		struct cgroup_subsys_state *css = event->css;

		remove_wait_queue(event->wqh, &event->wait);

		event->cft->unregister_event(css, event->cft, event->eventfd);

		/* Notify userspace the event is going away. */
		eventfd_signal(event->eventfd, 1);

		eventfd_ctx_put(event->eventfd);
		kfree(event);
		css_put(css);
		}

		/*
		* Gets called on POLLHUP on eventfd when user closes it.
		*
		* Called with wqh->lock held and interrupts disabled.
		*/
		static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
		int sync, void *key)
		{
		struct cgroup_event *event = container_of(wait,
		struct cgroup_event, wait);
		struct cgroup *cgrp = event->css->cgroup;
		unsigned long flags = (unsigned long)key;

		if (flags & POLLHUP) {
		/*
		* If the event has been detached at cgroup removal, we
		* can simply return knowing the other side will cleanup
		* for us.
		*
		* We can't race against event freeing since the other
		* side will require wqh->lock via remove_wait_queue(),
		* which we hold.
		*/
		spin_lock(&cgrp->event_list_lock);
		if (!list_empty(&event->list)) {
		list_del_init(&event->list);
		/*
		* We are in atomic context, but cgroup_event_remove()
		* may sleep, so we have to call it in workqueue.
		*/
		schedule_work(&event->remove);
		}
		spin_unlock(&cgrp->event_list_lock);
		}

		return 0;
		}

		static void cgroup_event_ptable_queue_proc(struct file *file,
		wait_queue_head_t wqh, poll_table pt)
		{
		struct cgroup_event *event = container_of(pt,
		struct cgroup_event, pt);

		event->wqh = wqh;
		add_wait_queue(wqh, &event->wait);
		}

		/*
		* Parse input and register new cgroup event handler.
		*
		* Input must be in format '<event_fd> <control_fd> <args>'.
		* Interpretation of args is defined by control file implementation.
		*/
		static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
		struct cftype cft, const char buffer)
		{
		struct cgroup *cgrp = dummy_css->cgroup;
		struct cgroup_event *event;
		struct cgroup_subsys_state *cfile_css;
		unsigned int efd, cfd;
		struct fd efile;
		struct fd cfile;
		char *endp;
		int ret;

		efd = simple_strtoul(buffer, &endp, 10);
		if (*endp != ' ')
		return -EINVAL;
		buffer = endp + 1;

		cfd = simple_strtoul(buffer, &endp, 10);
		if ((endp != ' ') && (endp != '\0'))
		return -EINVAL;
		buffer = endp + 1;

		event = kzalloc(sizeof(*event), GFP_KERNEL);
		if (!event)
		return -ENOMEM;

		INIT_LIST_HEAD(&event->list);
		init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
		init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
		INIT_WORK(&event->remove, cgroup_event_remove);

		efile = fdget(efd);
		if (!efile.file) {
		ret = -EBADF;
		goto out_kfree;
		}

		event->eventfd = eventfd_ctx_fileget(efile.file);
		if (IS_ERR(event->eventfd)) {
		ret = PTR_ERR(event->eventfd);
		goto out_put_efile;
		}

		cfile = fdget(cfd);
		if (!cfile.file) {
		ret = -EBADF;
		goto out_put_eventfd;
		}

		/* the process need read permission on control file */
		/* AV: shouldn't we check that it's been opened for read instead? */
		ret = inode_permission(file_inode(cfile.file), MAY_READ);
		if (ret < 0)
		goto out_put_cfile;

		event->cft = __file_cft(cfile.file);
		if (IS_ERR(event->cft)) {
		ret = PTR_ERR(event->cft);
		goto out_put_cfile;
		}

		if (!event->cft->ss) {
		ret = -EBADF;
		goto out_put_cfile;
		}

		/*
		* Determine the css of @cfile, verify it belongs to the same
		* cgroup as cgroup.event_control, and associate @event with it.
		* Remaining events are automatically removed on cgroup destruction
		* but the removal is asynchronous, so take an extra ref.
		*/
		rcu_read_lock();

		ret = -EINVAL;
		event->css = cgroup_css(cgrp, event->cft->ss);
		cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
		if (event->css && event->css == cfile_css && css_tryget(event->css))
		ret = 0;

		rcu_read_unlock();
		if (ret)
		goto out_put_cfile;

		if (!event->cft->register_event \|\| !event->cft->unregister_event) {
		ret = -EINVAL;
		goto out_put_css;
		}

		ret = event->cft->register_event(event->css, event->cft,
		event->eventfd, buffer);
		if (ret)
		goto out_put_css;

		efile.file->f_op->poll(efile.file, &event->pt);

		spin_lock(&cgrp->event_list_lock);
		list_add(&event->list, &cgrp->event_list);
		spin_unlock(&cgrp->event_list_lock);

		fdput(cfile);
		fdput(efile);

		return 0;

		out_put_css:
		css_put(event->css);
		out_put_cfile:
		fdput(cfile);
		out_put_eventfd:
		eventfd_ctx_put(event->eventfd);
		out_put_efile:
		fdput(efile);
		out_kfree:
		kfree(event);

		return ret;
		}

		static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
		struct cftype *cft)
		{
		@@ -4135,11 +3894,6 @@ static struct cftype cgroup_base_files[] = {
		.release = cgroup_pidlist_release,
		.mode = S_IRUGO \| S_IWUSR,
		},
		{
		.name = "cgroup.event_control",
		.write_string = cgroup_write_event_control,
		.mode = S_IWUGO,
		},
		{
		.name = "cgroup.clone_children",
		.flags = CFTYPE_INSANE,
		@@ -4610,7 +4364,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
		__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
		{
		struct dentry *d = cgrp->dentry;
		struct cgroup_event event, tmp;
		struct cgroup_subsys *ss;
		struct cgroup *child;
		bool empty;
		@@ -4685,18 +4438,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
		dget(d);
		cgroup_d_remove_dir(d);

		/*
		* Unregister events and notify userspace.
		* Notify userspace about cgroup removing only after rmdir of cgroup
		* directory to avoid race between userspace and kernelspace.
		*/
		spin_lock(&cgrp->event_list_lock);
		list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
		list_del_init(&event->list);
		schedule_work(&event->remove);
		}
		spin_unlock(&cgrp->event_list_lock);

		return 0;
		};