Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 54595fe2 authored by KAMEZAWA Hiroyuki's avatar KAMEZAWA Hiroyuki Committed by Linus Torvalds
Browse files

memcg: use css_tryget in memcg



From:KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

css_tryget() newly is added and we can know css is alive or not and get
refcnt of css in very safe way.  ("alive" here means "rmdir/destroy" is
not called.)

This patch replaces css_get() to css_tryget(), where I cannot explain
why css_get() is safe. And removes memcg->obsolete flag.

Reviewed-by: default avatarDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a7ba0eef
Loading
Loading
Loading
Loading
+62 −36
Original line number Diff line number Diff line
@@ -162,7 +162,6 @@ struct mem_cgroup {
	 */
	bool use_hierarchy;
	unsigned long	last_oom_jiffies;
	int		obsolete;
	atomic_t	refcnt;

	unsigned int	swappiness;
@@ -283,6 +282,31 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
				struct mem_cgroup, css);
}

static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
{
	struct mem_cgroup *mem = NULL;
	/*
	 * Because we have no locks, mm->owner's may be being moved to other
	 * cgroup. We use css_tryget() here even if this looks
	 * pessimistic (rather than adding locks here).
	 */
	rcu_read_lock();
	do {
		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
		if (unlikely(!mem))
			break;
	} while (!css_tryget(&mem->css));
	rcu_read_unlock();
	return mem;
}

static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem)
{
	if (!mem)
		return true;
	return css_is_removed(&mem->css);
}

/*
 * Following LRU functions are allowed to be used without PCG_LOCK.
 * Operations are called by routine of global LRU independently from memcg.
@@ -622,8 +646,9 @@ mem_cgroup_get_first_node(struct mem_cgroup *root_mem)
{
	struct cgroup *cgroup;
	struct mem_cgroup *ret;
	bool obsolete = (root_mem->last_scanned_child &&
				root_mem->last_scanned_child->obsolete);
	bool obsolete;

	obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);

	/*
	 * Scan all children under the mem_cgroup mem
@@ -636,7 +661,7 @@ mem_cgroup_get_first_node(struct mem_cgroup *root_mem)

	if (!root_mem->last_scanned_child || obsolete) {

		if (obsolete)
		if (obsolete && root_mem->last_scanned_child)
			mem_cgroup_put(root_mem->last_scanned_child);

		cgroup = list_first_entry(&root_mem->css.cgroup->children,
@@ -711,7 +736,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
	next_mem = mem_cgroup_get_first_node(root_mem);

	while (next_mem != root_mem) {
		if (next_mem->obsolete) {
		if (mem_cgroup_is_obsolete(next_mem)) {
			mem_cgroup_put(next_mem);
			cgroup_lock();
			next_mem = mem_cgroup_get_first_node(root_mem);
@@ -769,23 +794,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
	 * thread group leader migrates. It's possible that mm is not
	 * set, if so charge the init_mm (happens for pagecache usage).
	 */
	if (likely(!*memcg)) {
		rcu_read_lock();
		mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
		if (unlikely(!mem)) {
			rcu_read_unlock();
			return 0;
		}
		/*
		 * For every charge from the cgroup, increment reference count
		 */
		css_get(&mem->css);
	mem = *memcg;
	if (likely(!mem)) {
		mem = try_get_mem_cgroup_from_mm(mm);
		*memcg = mem;
		rcu_read_unlock();
	} else {
		mem = *memcg;
		css_get(&mem->css);
	}
	if (unlikely(!mem))
		return 0;

	VM_BUG_ON(mem_cgroup_is_obsolete(mem));

	while (1) {
		int ret;
@@ -1072,12 +1091,19 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
				MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
}

/*
 * While swap-in, try_charge -> commit or cancel, the page is locked.
 * And when try_charge() successfully returns, one refcnt to memcg without
 * struct page_cgroup is aquired. This refcnt will be cumsumed by
 * "commit()" or removed by "cancel()"
 */
int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
				 struct page *page,
				 gfp_t mask, struct mem_cgroup **ptr)
{
	struct mem_cgroup *mem;
	swp_entry_t     ent;
	int ret;

	if (mem_cgroup_disabled())
		return 0;
@@ -1096,10 +1122,15 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
	ent.val = page_private(page);

	mem = lookup_swap_cgroup(ent);
	if (!mem || mem->obsolete)
	if (!mem)
		goto charge_cur_mm;
	if (!css_tryget(&mem->css))
		goto charge_cur_mm;
	*ptr = mem;
	return __mem_cgroup_try_charge(NULL, mask, ptr, true);
	ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
	/* drop extra refcnt from tryget */
	css_put(&mem->css);
	return ret;
charge_cur_mm:
	if (unlikely(!mm))
		mm = &init_mm;
@@ -1130,13 +1161,18 @@ int mem_cgroup_cache_charge_swapin(struct page *page,
		ent.val = page_private(page);
		if (do_swap_account) {
			mem = lookup_swap_cgroup(ent);
			if (mem && mem->obsolete)
				mem = NULL;
			if (mem)
				mm = NULL;
			if (mem) {
				if (css_tryget(&mem->css))
					mm = NULL; /* charge to recorded */
				else
					mem = NULL; /* charge to current */
			}
		}
		ret = mem_cgroup_charge_common(page, mm, mask,
				MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
		/* drop extra refcnt from tryget */
		if (mem)
			css_put(&mem->css);

		if (!ret && do_swap_account) {
			/* avoid double counting */
@@ -1178,7 +1214,6 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
		struct mem_cgroup *memcg;
		memcg = swap_cgroup_record(ent, NULL);
		if (memcg) {
			/* If memcg is obsolete, memcg can be != ptr */
			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
			mem_cgroup_put(memcg);
		}
@@ -1421,14 +1456,9 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
	if (!mm)
		return 0;

	rcu_read_lock();
	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
	if (unlikely(!mem)) {
		rcu_read_unlock();
	mem = try_get_mem_cgroup_from_mm(mm);
	if (unlikely(!mem))
		return 0;
	}
	css_get(&mem->css);
	rcu_read_unlock();

	do {
		progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true);
@@ -2086,9 +2116,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 * the number of reference from swap_cgroup and free mem_cgroup when
 * it goes down to 0.
 *
 * When mem_cgroup is destroyed, mem->obsolete will be set to 0 and
 * entry which points to this memcg will be ignore at swapin.
 *
 * Removal of cgroup itself succeeds regardless of refs from swap.
 */

@@ -2174,7 +2201,6 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
					struct cgroup *cont)
{
	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
	mem->obsolete = 1;
	mem_cgroup_force_empty(mem, false);
}