Merge branch 'cgroup-rmdir-updates' into cgroup/for-3.8 (1db1e31b) · Commits · e / devices / android_kernel_fairphone_FP5

block/blk-cgroup.c

+1 −2

Original line number	Diff line number	Diff line
		@@ -600,7 +600,7 @@ struct cftype blkcg_files[] = {
		*
		* This is the blkcg counterpart of ioc_release_fn().
		*/
		static int blkcg_pre_destroy(struct cgroup *cgroup)
		static void blkcg_pre_destroy(struct cgroup *cgroup)
		{
		struct blkcg *blkcg = cgroup_to_blkcg(cgroup);

		@@ -622,7 +622,6 @@ static int blkcg_pre_destroy(struct cgroup *cgroup)
		}

		spin_unlock_irq(&blkcg->lock);
		return 0;
		}

		static void blkcg_destroy(struct cgroup *cgroup)

include/linux/cgroup.h

+1 −40

Original line number	Diff line number	Diff line
		@@ -81,8 +81,6 @@ struct cgroup_subsys_state {
		/* bits in struct cgroup_subsys_state flags field */
		enum {
		CSS_ROOT, /* This CSS is the root of the subsystem */
		CSS_REMOVED, /* This CSS is dead */
		CSS_CLEAR_CSS_REFS, /* @ss->__DEPRECATED_clear_css_refs */
		};

		/* Caller must verify that the css is not for root cgroup */
		@@ -105,11 +103,6 @@ static inline void css_get(struct cgroup_subsys_state *css)
		__css_get(css, 1);
		}

		static inline bool css_is_removed(struct cgroup_subsys_state *css)
		{
		return test_bit(CSS_REMOVED, &css->flags);
		}

		/*
		* Call css_tryget() to take a reference on a css if your existing
		* (known-valid) reference isn't already ref-counted. Returns false if
		@@ -147,10 +140,6 @@ enum {
		CGRP_RELEASABLE,
		/* Control Group requires release notifications to userspace */
		CGRP_NOTIFY_ON_RELEASE,
		/*
		* A thread in rmdir() is wating for this cgroup.
		*/
		CGRP_WAIT_ON_RMDIR,
		/*
		* Clone cgroup values when creating a new child cgroup
		*/
		@@ -420,23 +409,6 @@ int cgroup_task_count(const struct cgroup *cgrp);
		/* Return true if cgrp is a descendant of the task's cgroup */
		int cgroup_is_descendant(const struct cgroup cgrp, struct task_struct task);

		/*
		* When the subsys has to access css and may add permanent refcnt to css,
		* it should take care of racy conditions with rmdir(). Following set of
		* functions, is for stop/restart rmdir if necessary.
		* Because these will call css_get/put, "css" should be alive css.
		*
		* cgroup_exclude_rmdir();
		* ...do some jobs which may access arbitrary empty cgroup
		* cgroup_release_and_wakeup_rmdir();
		*
		* When someone removes a cgroup while cgroup_exclude_rmdir() holds it,
		* it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up.
		*/

		void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
		void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);

		/*
		* Control Group taskset, used to pass around set of tasks to cgroup_subsys
		* methods.
		@@ -466,7 +438,7 @@ int cgroup_taskset_size(struct cgroup_taskset *tset);

		struct cgroup_subsys {
		struct cgroup_subsys_state (create)(struct cgroup *cgrp);
		int (pre_destroy)(struct cgroup cgrp);
		void (pre_destroy)(struct cgroup cgrp);
		void (destroy)(struct cgroup cgrp);
		int (can_attach)(struct cgroup cgrp, struct cgroup_taskset *tset);
		void (cancel_attach)(struct cgroup cgrp, struct cgroup_taskset *tset);
		@@ -487,17 +459,6 @@ struct cgroup_subsys {
		*/
		bool use_id;

		/*
		* If %true, cgroup removal will try to clear css refs by retrying
		* ss->pre_destroy() until there's no css ref left. This behavior
		* is strictly for backward compatibility and will be removed as
		* soon as the current user (memcg) is updated.
		*
		* If %false, ss->pre_destroy() can't fail and cgroup removal won't
		* wait for css refs to drop to zero before proceeding.
		*/
		bool __DEPRECATED_clear_css_refs;

		/*
		* If %false, this subsystem is properly hierarchical -
		* configuration, resource accounting and restriction on a parent

kernel/cgroup.c

+61 −195

Original line number	Diff line number	Diff line
		@@ -171,8 +171,8 @@ struct css_id {
		* The css to which this ID points. This pointer is set to valid value
		* after cgroup is populated. If cgroup is removed, this will be NULL.
		* This pointer is expected to be RCU-safe because destroy()
		* is called after synchronize_rcu(). But for safe use, css_is_removed()
		* css_tryget() should be used for avoiding race.
		* is called after synchronize_rcu(). But for safe use, css_tryget()
		* should be used for avoiding race.
		*/
		struct cgroup_subsys_state __rcu *css;
		/*
		@@ -854,30 +854,6 @@ static struct inode cgroup_new_inode(umode_t mode, struct super_block sb)
		return inode;
		}

		/*
		* Call subsys's pre_destroy handler.
		* This is called before css refcnt check.
		*/
		static int cgroup_call_pre_destroy(struct cgroup *cgrp)
		{
		struct cgroup_subsys *ss;
		int ret = 0;

		for_each_subsys(cgrp->root, ss) {
		if (!ss->pre_destroy)
		continue;

		ret = ss->pre_destroy(cgrp);
		if (ret) {
		/* ->pre_destroy() failure is being deprecated */
		WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs);
		break;
		}
		}

		return ret;
		}

		static void cgroup_diput(struct dentry dentry, struct inode inode)
		{
		/* is dentry a directory ? if so, kfree() associated cgroup */
		@@ -1014,33 +990,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
		remove_dir(dentry);
		}

		/*
		* A queue for waiters to do rmdir() cgroup. A tasks will sleep when
		* cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
		* reference to css->refcnt. In general, this refcnt is expected to goes down
		* to zero, soon.
		*
		* CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
		*/
		static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);

		static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
		{
		if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
		wake_up_all(&cgroup_rmdir_waitq);
		}

		void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
		{
		css_get(css);
		}

		void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
		{
		cgroup_wakeup_rmdir_waiter(css->cgroup);
		css_put(css);
		}

		/*
		* Call with cgroup_mutex held. Drops reference counts on modules, including
		* any duplicate ones that parse_cgroupfs_options took. If this function
		@@ -2026,12 +1975,6 @@ int cgroup_attach_task(struct cgroup cgrp, struct task_struct tsk)
		}

		synchronize_rcu();

		/*
		* wake up rmdir() waiter. the rmdir should fail since the cgroup
		* is no longer empty.
		*/
		cgroup_wakeup_rmdir_waiter(cgrp);
		out:
		if (retval) {
		for_each_subsys(root, ss) {
		@@ -2201,7 +2144,6 @@ static int cgroup_attach_proc(struct cgroup cgrp, struct task_struct leader)
		* step 5: success! and cleanup
		*/
		synchronize_rcu();
		cgroup_wakeup_rmdir_waiter(cgrp);
		retval = 0;
		out_put_css_set_refs:
		if (retval) {
		@@ -4023,14 +3965,12 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
		cgrp->subsys[ss->subsys_id] = css;

		/*
		* If !clear_css_refs, css holds an extra ref to @cgrp->dentry
		* which is put on the last css_put(). dput() requires process
		* context, which css_put() may be called without. @css->dput_work
		* will be used to invoke dput() asynchronously from css_put().
		* css holds an extra ref to @cgrp->dentry which is put on the last
		* css_put(). dput() requires process context, which css_put() may
		* be called without. @css->dput_work will be used to invoke
		* dput() asynchronously from css_put().
		*/
		INIT_WORK(&css->dput_work, css_dput_fn);
		if (ss->__DEPRECATED_clear_css_refs)
		set_bit(CSS_CLEAR_CSS_REFS, &css->flags);
		}

		/*
		@@ -4054,6 +3994,18 @@ static long cgroup_create(struct cgroup parent, struct dentry dentry,
		if (!cgrp)
		return -ENOMEM;

		/*
		* Only live parents can have children. Note that the liveliness
		* check isn't strictly necessary because cgroup_mkdir() and
		* cgroup_rmdir() are fully synchronized by i_mutex; however, do it
		* anyway so that locking is contained inside cgroup proper and we
		* don't get nasty surprises if we ever grow another caller.
		*/
		if (!cgroup_lock_live_group(parent)) {
		err = -ENODEV;
		goto err_free;
		}

		/* Grab a reference on the superblock so the hierarchy doesn't
		* get deleted on unmount if there are child cgroups. This
		* can be done outside cgroup_mutex, since the sb can't
		@@ -4061,8 +4013,6 @@ static long cgroup_create(struct cgroup parent, struct dentry dentry,
		* fs */
		atomic_inc(&sb->s_active);

		mutex_lock(&cgroup_mutex);

		init_cgroup_housekeeping(cgrp);

		cgrp->parent = parent;
		@@ -4110,9 +4060,8 @@ static long cgroup_create(struct cgroup parent, struct dentry dentry,
		if (err < 0)
		goto err_remove;

		/* If !clear_css_refs, each css holds a ref to the cgroup's dentry */
		/* each css holds a ref to the cgroup's dentry */
		for_each_subsys(root, ss)
		if (!ss->__DEPRECATED_clear_css_refs)
		dget(dentry);

		/* The cgroup directory was pre-locked for us */
		@@ -4144,7 +4093,7 @@ static long cgroup_create(struct cgroup parent, struct dentry dentry,

		/* Release the reference count that we took on the superblock */
		deactivate_super(sb);

		err_free:
		kfree(cgrp);
		return err;
		}
		@@ -4198,71 +4147,6 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
		return 0;
		}

		/*
		* Atomically mark all (or else none) of the cgroup's CSS objects as
		* CSS_REMOVED. Return true on success, or false if the cgroup has
		* busy subsystems. Call with cgroup_mutex held
		*
		* Depending on whether a subsys has __DEPRECATED_clear_css_refs set or
		* not, cgroup removal behaves differently.
		*
		* If clear is set, css refcnt for the subsystem should be zero before
		* cgroup removal can be committed. This is implemented by
		* CGRP_WAIT_ON_RMDIR and retry logic around ->pre_destroy(), which may be
		* called multiple times until all css refcnts reach zero and is allowed to
		* veto removal on any invocation. This behavior is deprecated and will be
		* removed as soon as the existing user (memcg) is updated.
		*
		* If clear is not set, each css holds an extra reference to the cgroup's
		* dentry and cgroup removal proceeds regardless of css refs.
		* ->pre_destroy() will be called at least once and is not allowed to fail.
		* On the last put of each css, whenever that may be, the extra dentry ref
		* is put so that dentry destruction happens only after all css's are
		* released.
		*/
		static int cgroup_clear_css_refs(struct cgroup *cgrp)
		{
		struct cgroup_subsys *ss;
		unsigned long flags;
		bool failed = false;

		local_irq_save(flags);

		/*
		* Block new css_tryget() by deactivating refcnt. If all refcnts
		* for subsystems w/ clear_css_refs set were 1 at the moment of
		* deactivation, we succeeded.
		*/
		for_each_subsys(cgrp->root, ss) {
		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];

		WARN_ON(atomic_read(&css->refcnt) < 0);
		atomic_add(CSS_DEACT_BIAS, &css->refcnt);

		if (ss->__DEPRECATED_clear_css_refs)
		failed \|= css_refcnt(css) != 1;
		}

		/*
		* If succeeded, set REMOVED and put all the base refs; otherwise,
		* restore refcnts to positive values. Either way, all in-progress
		* css_tryget() will be released.
		*/
		for_each_subsys(cgrp->root, ss) {
		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];

		if (!failed) {
		set_bit(CSS_REMOVED, &css->flags);
		css_put(css);
		} else {
		atomic_sub(CSS_DEACT_BIAS, &css->refcnt);
		}
		}

		local_irq_restore(flags);
		return !failed;
		}

		static int cgroup_rmdir(struct inode unused_dir, struct dentry dentry)
		{
		struct cgroup *cgrp = dentry->d_fsdata;
		@@ -4270,70 +4154,52 @@ static int cgroup_rmdir(struct inode unused_dir, struct dentry dentry)
		struct cgroup *parent;
		DEFINE_WAIT(wait);
		struct cgroup_event event, tmp;
		int ret;
		struct cgroup_subsys *ss;

		/* the vfs holds both inode->i_mutex already */
		again:
		mutex_lock(&cgroup_mutex);
		if (atomic_read(&cgrp->count) != 0) {
		mutex_unlock(&cgroup_mutex);
		return -EBUSY;
		}
		if (!list_empty(&cgrp->children)) {
		parent = cgrp->parent;
		if (atomic_read(&cgrp->count) \|\| !list_empty(&cgrp->children)) {
		mutex_unlock(&cgroup_mutex);
		return -EBUSY;
		}
		mutex_unlock(&cgroup_mutex);

		/*
		* In general, subsystem has no css->refcnt after pre_destroy(). But
		* in racy cases, subsystem may have to get css->refcnt after
		* pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
		* make rmdir return -EBUSY too often. To avoid that, we use waitqueue
		* for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
		* and subsystem's reference count handling. Please see css_get/put
		* and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
		* Block new css_tryget() by deactivating refcnt and mark @cgrp
		* removed. This makes future css_tryget() and child creation
		* attempts fail thus maintaining the removal conditions verified
		* above.
		*/
		set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
		for_each_subsys(cgrp->root, ss) {
		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];

		/*
		* Call pre_destroy handlers of subsys. Notify subsystems
		* that rmdir() request comes.
		*/
		ret = cgroup_call_pre_destroy(cgrp);
		if (ret) {
		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
		return ret;
		WARN_ON(atomic_read(&css->refcnt) < 0);
		atomic_add(CSS_DEACT_BIAS, &css->refcnt);
		}
		set_bit(CGRP_REMOVED, &cgrp->flags);

		mutex_lock(&cgroup_mutex);
		parent = cgrp->parent;
		if (atomic_read(&cgrp->count) \|\| !list_empty(&cgrp->children)) {
		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
		mutex_unlock(&cgroup_mutex);
		return -EBUSY;
		}
		prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
		if (!cgroup_clear_css_refs(cgrp)) {
		/*
		* Tell subsystems to initate destruction. pre_destroy() should be
		* called with cgroup_mutex unlocked. See 3fa59dfbc3 ("cgroup: fix
		* potential deadlock in pre_destroy") for details.
		*/
		mutex_unlock(&cgroup_mutex);
		for_each_subsys(cgrp->root, ss)
		if (ss->pre_destroy)
		ss->pre_destroy(cgrp);
		mutex_lock(&cgroup_mutex);

		/*
		* Because someone may call cgroup_wakeup_rmdir_waiter() before
		* prepare_to_wait(), we need to check this flag.
		* Put all the base refs. Each css holds an extra reference to the
		* cgroup's dentry and cgroup removal proceeds regardless of css
		* refs. On the last put of each css, whenever that may be, the
		* extra dentry ref is put so that dentry destruction happens only
		* after all css's are released.
		*/
		if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
		schedule();
		finish_wait(&cgroup_rmdir_waitq, &wait);
		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
		if (signal_pending(current))
		return -EINTR;
		goto again;
		}
		/* NO css_tryget() can success after here. */
		finish_wait(&cgroup_rmdir_waitq, &wait);
		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
		for_each_subsys(cgrp->root, ss)
		css_put(cgrp->subsys[ss->subsys_id]);

		raw_spin_lock(&release_list_lock);
		set_bit(CGRP_REMOVED, &cgrp->flags);
		if (!list_empty(&cgrp->release_list))
		list_del_init(&cgrp->release_list);
		raw_spin_unlock(&release_list_lock);
		@@ -5041,15 +4907,17 @@ static void check_for_release(struct cgroup *cgrp)
		/* Caller must verify that the css is not for root cgroup */
		bool __css_tryget(struct cgroup_subsys_state *css)
		{
		do {
		int v = css_refcnt(css);
		while (true) {
		int t, v;

		if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v)
		v = css_refcnt(css);
		t = atomic_cmpxchg(&css->refcnt, v, v + 1);
		if (likely(t == v))
		return true;
		cpu_relax();
		} while (!test_bit(CSS_REMOVED, &css->flags));

		else if (t < 0)
		return false;
		cpu_relax();
		}
		}
		EXPORT_SYMBOL_GPL(__css_tryget);

		@@ -5068,10 +4936,8 @@ void __css_put(struct cgroup_subsys_state *css)
		set_bit(CGRP_RELEASABLE, &cgrp->flags);
		check_for_release(cgrp);
		}
		cgroup_wakeup_rmdir_waiter(cgrp);
		break;
		case 0:
		if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags))
		schedule_work(&css->dput_work);
		break;
		}

mm/hugetlb_cgroup.c

+2 −9

Original line number	Diff line number	Diff line
		@@ -155,18 +155,13 @@ static void hugetlb_cgroup_move_parent(int idx, struct cgroup *cgroup,
		* Force the hugetlb cgroup to empty the hugetlb resources by moving them to
		* the parent cgroup.
		*/
		static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
		static void hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
		{
		struct hstate *h;
		struct page *page;
		int ret = 0, idx = 0;
		int idx = 0;

		do {
		if (cgroup_task_count(cgroup) \|\|
		!list_empty(&cgroup->children)) {
		ret = -EBUSY;
		goto out;
		}
		for_each_hstate(h) {
		spin_lock(&hugetlb_lock);
		list_for_each_entry(page, &h->hugepage_activelist, lru)
		@@ -177,8 +172,6 @@ static int hugetlb_cgroup_pre_destroy(struct cgroup *cgroup)
		}
		cond_resched();
		} while (hugetlb_cgroup_have_usage(cgroup));
		out:
		return ret;
		}

		int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,

mm/memcontrol.c

+91 −90

Original line number	Diff line number	Diff line
		@@ -2337,7 +2337,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
		again:
		if (ptr) { / css should be a valid one */
		memcg = *ptr;
		VM_BUG_ON(css_is_removed(&memcg->css));
		if (mem_cgroup_is_root(memcg))
		goto done;
		if (nr_pages == 1 && consume_stock(memcg))
		@@ -2477,9 +2476,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,

		/*
		* A helper function to get mem_cgroup from ID. must be called under
		* rcu_read_lock(). The caller must check css_is_removed() or some if
		* it's concern. (dropping refcnt from swap can be called against removed
		* memcg.)
		* rcu_read_lock(). The caller is responsible for calling css_tryget if
		* the mem_cgroup is used for charging. (dropping refcnt from swap can be
		* called against removed memcg.)
		*/
		static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
		{
		@@ -2676,13 +2675,6 @@ static int mem_cgroup_move_account(struct page *page,
		/* caller should have done css_get */
		pc->mem_cgroup = to;
		mem_cgroup_charge_statistics(to, anon, nr_pages);
		/*
		* We charges against "to" which may not have any tasks. Then, "to"
		* can be under rmdir(). But in current implementation, caller of
		* this function is just force_empty() and move charge, so it's
		* guaranteed that "to" is never removed. So, we don't check rmdir
		* status here.
		*/
		move_unlock_mem_cgroup(from, &flags);
		ret = 0;
		unlock:
		@@ -2696,10 +2688,27 @@ static int mem_cgroup_move_account(struct page *page,
		return ret;
		}

		/*
		* move charges to its parent.
		/**
		* mem_cgroup_move_parent - moves page to the parent group
		* @page: the page to move
		* @pc: page_cgroup of the page
		* @child: page's cgroup
		*
		* move charges to its parent or the root cgroup if the group has no
		* parent (aka use_hierarchy==0).
		* Although this might fail (get_page_unless_zero, isolate_lru_page or
		* mem_cgroup_move_account fails) the failure is always temporary and
		* it signals a race with a page removal/uncharge or migration. In the
		* first case the page is on the way out and it will vanish from the LRU
		* on the next attempt and the call should be retried later.
		* Isolation from the LRU fails only if page has been isolated from
		* the LRU since we looked at it and that usually means either global
		* reclaim or migration going on. The page will either get back to the
		* LRU or vanish.
		* Finaly mem_cgroup_move_account fails only if the page got uncharged
		* (!PageCgroupUsed) or moved to a different group. The page will
		* disappear in the next attempt.
		*/

		static int mem_cgroup_move_parent(struct page *page,
		struct page_cgroup *pc,
		struct mem_cgroup *child)
		@@ -2709,9 +2718,7 @@ static int mem_cgroup_move_parent(struct page *page,
		unsigned long uninitialized_var(flags);
		int ret;

		/* Is ROOT ? */
		if (mem_cgroup_is_root(child))
		return -EINVAL;
		VM_BUG_ON(mem_cgroup_is_root(child));

		ret = -EBUSY;
		if (!get_page_unless_zero(page))
		@@ -2728,8 +2735,10 @@ static int mem_cgroup_move_parent(struct page *page,
		if (!parent)
		parent = root_mem_cgroup;

		if (nr_pages > 1)
		if (nr_pages > 1) {
		VM_BUG_ON(!PageTransHuge(page));
		flags = compound_lock_irqsave(page);
		}

		ret = mem_cgroup_move_account(page, nr_pages,
		pc, child, parent);
		@@ -2871,7 +2880,6 @@ __mem_cgroup_commit_charge_swapin(struct page page, struct mem_cgroup memcg,
		return;
		if (!memcg)
		return;
		cgroup_exclude_rmdir(&memcg->css);

		__mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
		/*
		@@ -2885,12 +2893,6 @@ __mem_cgroup_commit_charge_swapin(struct page page, struct mem_cgroup memcg,
		swp_entry_t ent = {.val = page_private(page)};
		mem_cgroup_uncharge_swap(ent);
		}
		/*
		* At swapin, we may charge account against cgroup which has no tasks.
		* So, rmdir()->pre_destroy() can be called while we do this charge.
		* In that case, we need to call pre_destroy() again. check it here.
		*/
		cgroup_release_and_wakeup_rmdir(&memcg->css);
		}

		void mem_cgroup_commit_charge_swapin(struct page *page,
		@@ -3338,8 +3340,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,

		if (!memcg)
		return;
		/* blocks rmdir() */
		cgroup_exclude_rmdir(&memcg->css);

		if (!migration_ok) {
		used = oldpage;
		unused = newpage;
		@@ -3373,13 +3374,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
		*/
		if (anon)
		mem_cgroup_uncharge_page(used);
		/*
		* At migration, we may charge account against cgroup which has no
		* tasks.
		* So, rmdir()->pre_destroy() can be called while we do this charge.
		* In that case, we need to call pre_destroy() again. check it here.
		*/
		cgroup_release_and_wakeup_rmdir(&memcg->css);
		}

		/*
		@@ -3679,17 +3673,22 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
		return nr_reclaimed;
		}

		/*
		/**
		* mem_cgroup_force_empty_list - clears LRU of a group
		* @memcg: group to clear
		* @node: NUMA node
		* @zid: zone id
		* @lru: lru to to clear
		*
		* Traverse a specified page_cgroup list and try to drop them all. This doesn't
		* reclaim the pages page themselves - it just removes the page_cgroups.
		* Returns true if some page_cgroups were not freed, indicating that the caller
		* must retry this operation.
		* reclaim the pages page themselves - pages are moved to the parent (or root)
		* group.
		*/
		static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
		static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
		int node, int zid, enum lru_list lru)
		{
		struct mem_cgroup_per_zone *mz;
		unsigned long flags, loop;
		unsigned long flags;
		struct list_head *list;
		struct page *busy;
		struct zone *zone;
		@@ -3698,11 +3697,8 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
		mz = mem_cgroup_zoneinfo(memcg, node, zid);
		list = &mz->lruvec.lists[lru];

		loop = mz->lru_size[lru];
		/* give some margin against EBUSY etc...*/
		loop += 256;
		busy = NULL;
		while (loop--) {
		do {
		struct page_cgroup *pc;
		struct page *page;

		@@ -3728,76 +3724,72 @@ static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
		cond_resched();
		} else
		busy = NULL;
		}
		return !list_empty(list);
		} while (!list_empty(list));
		}

		/*
		* make mem_cgroup's charge to be 0 if there is no task.
		* make mem_cgroup's charge to be 0 if there is no task by moving
		* all the charges and pages to the parent.
		* This enables deleting this mem_cgroup.
		*
		* Caller is responsible for holding css reference on the memcg.
		*/
		static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all)
		static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
		{
		int ret;
		int node, zid, shrink;
		int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
		struct cgroup *cgrp = memcg->css.cgroup;

		css_get(&memcg->css);
		int node, zid;

		shrink = 0;
		/* should free all ? */
		if (free_all)
		goto try_to_free;
		move_account:
		do {
		ret = -EBUSY;
		if (cgroup_task_count(cgrp) \|\| !list_empty(&cgrp->children))
		goto out;
		/* This is for making all used pages to be on LRU. */
		lru_add_drain_all();
		drain_all_stock_sync(memcg);
		ret = 0;
		mem_cgroup_start_move(memcg);
		for_each_node_state(node, N_HIGH_MEMORY) {
		for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
		for (zid = 0; zid < MAX_NR_ZONES; zid++) {
		enum lru_list lru;
		for_each_lru(lru) {
		ret = mem_cgroup_force_empty_list(memcg,
		mem_cgroup_force_empty_list(memcg,
		node, zid, lru);
		if (ret)
		break;
		}
		}
		if (ret)
		break;
		}
		mem_cgroup_end_move(memcg);
		memcg_oom_recover(memcg);
		cond_resched();
		/* "ret" should also be checked to ensure all lists are empty. */
		} while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 \|\| ret);
		out:
		css_put(&memcg->css);
		return ret;

		try_to_free:
		/* returns EBUSY if there is a task or if we come here twice. */
		if (cgroup_task_count(cgrp) \|\| !list_empty(&cgrp->children) \|\| shrink) {
		ret = -EBUSY;
		goto out;
		/*
		* This is a safety check because mem_cgroup_force_empty_list
		* could have raced with mem_cgroup_replace_page_cache callers
		* so the lru seemed empty but the page could have been added
		* right after the check. RES_USAGE should be safe as we always
		* charge before adding to the LRU.
		*/
		} while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0);
		}

		/*
		* Reclaims as many pages from the given memcg as possible and moves
		* the rest to the parent.
		*
		* Caller is responsible for holding css reference for memcg.
		*/
		static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
		{
		int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
		struct cgroup *cgrp = memcg->css.cgroup;

		/* returns EBUSY if there is a task or if we come here twice. */
		if (cgroup_task_count(cgrp) \|\| !list_empty(&cgrp->children))
		return -EBUSY;

		/* we call try-to-free pages for make this cgroup empty */
		lru_add_drain_all();
		/* try to free all pages in this cgroup */
		shrink = 1;
		while (nr_retries && res_counter_read_u64(&memcg->res, RES_USAGE) > 0) {
		int progress;

		if (signal_pending(current)) {
		ret = -EINTR;
		goto out;
		}
		if (signal_pending(current))
		return -EINTR;

		progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL,
		false);
		if (!progress) {
		@@ -3808,13 +3800,23 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all)

		}
		lru_add_drain();
		/* try move_account...there may be some locked pages. */
		goto move_account;
		mem_cgroup_reparent_charges(memcg);

		return 0;
		}

		static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
		{
		return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);
		struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
		int ret;

		if (mem_cgroup_is_root(memcg))
		return -EINVAL;
		css_get(&memcg->css);
		ret = mem_cgroup_force_empty(memcg);
		css_put(&memcg->css);

		return ret;
		}


		@@ -5001,11 +5003,11 @@ mem_cgroup_create(struct cgroup *cont)
		return ERR_PTR(error);
		}

		static int mem_cgroup_pre_destroy(struct cgroup *cont)
		static void mem_cgroup_pre_destroy(struct cgroup *cont)
		{
		struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);

		return mem_cgroup_force_empty(memcg, false);
		mem_cgroup_reparent_charges(memcg);
		}

		static void mem_cgroup_destroy(struct cgroup *cont)
		@@ -5607,7 +5609,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
		.base_cftypes = mem_cgroup_files,
		.early_init = 0,
		.use_id = 1,
		.__DEPRECATED_clear_css_refs = true,
		};

		#ifdef CONFIG_MEMCG_SWAP