Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6d1fdc48 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds
Browse files

memcg: sanitize __mem_cgroup_try_charge() call protocol



Some callsites pass a memcg directly, some callsites pass an mm that
then has to be translated to a memcg.  This makes for a terrible
function interface.

Just push the mm-to-memcg translation into the respective callsites and
always pass a memcg to mem_cgroup_try_charge().

[mhocko@suse.cz: add charge mm helper]
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarMichal Hocko <mhocko@suse.cz>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent b6b6cc72
Loading
Loading
Loading
Loading
+102 −105
Original line number Diff line number Diff line
@@ -2575,7 +2575,7 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
}


/* See __mem_cgroup_try_charge() for details */
/* See mem_cgroup_try_charge() for details */
enum {
	CHARGE_OK,		/* success */
	CHARGE_RETRY,		/* need to retry but retry is not bad */
@@ -2648,45 +2648,34 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
	return CHARGE_NOMEM;
}

/*
 * __mem_cgroup_try_charge() does
 * 1. detect memcg to be charged against from passed *mm and *ptr,
 * 2. update res_counter
 * 3. call memory reclaim if necessary.
 *
 * In some special case, if the task is fatal, fatal_signal_pending() or
 * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
 * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
 * as possible without any hazards. 2: all pages should have a valid
 * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
 * pointer, that is treated as a charge to root_mem_cgroup.
 *
 * So __mem_cgroup_try_charge() will return
 *  0       ...  on success, filling *ptr with a valid memcg pointer.
 *  -ENOMEM ...  charge failure because of resource limits.
 *  -EINTR  ...  if thread is fatal. *ptr is filled with root_mem_cgroup.
/**
 * mem_cgroup_try_charge - try charging a memcg
 * @memcg: memcg to charge
 * @nr_pages: number of pages to charge
 * @oom: trigger OOM if reclaim fails
 *
 * Unlike the exported interface, an "oom" parameter is added. if oom==true,
 * the oom-killer can be invoked.
 * Returns 0 if @memcg was charged successfully, -EINTR if the charge
 * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
 */
static int __mem_cgroup_try_charge(struct mm_struct *mm,
static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
				 gfp_t gfp_mask,
				 unsigned int nr_pages,
				   struct mem_cgroup **ptr,
				 bool oom)
{
	unsigned int batch = max(CHARGE_BATCH, nr_pages);
	int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
	struct mem_cgroup *memcg = NULL;
	int ret;

	if (mem_cgroup_is_root(memcg))
		goto done;
	/*
	 * Unlike gloval-vm's OOM-kill, we're not in memory shortage
	 * in system level. So, allow to go ahead dying process in addition to
	 * MEMDIE process.
	 * Unlike in global OOM situations, memcg is not in a physical
	 * memory shortage.  Allow dying and OOM-killed tasks to
	 * bypass the last charges so that they can exit quickly and
	 * free their memory.
	 */
	if (unlikely(test_thread_flag(TIF_MEMDIE)
		     || fatal_signal_pending(current)))
	if (unlikely(test_thread_flag(TIF_MEMDIE) ||
		     fatal_signal_pending(current)))
		goto bypass;

	if (unlikely(task_in_memcg_oom(current)))
@@ -2695,14 +2684,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
	if (gfp_mask & __GFP_NOFAIL)
		oom = false;
again:
	if (*ptr) { /* css should be a valid one */
		memcg = *ptr;
		css_get(&memcg->css);
	} else {
		memcg = get_mem_cgroup_from_mm(mm);
	}
	if (mem_cgroup_is_root(memcg))
		goto done;
	if (consume_stock(memcg, nr_pages))
		goto done;

@@ -2710,10 +2691,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
		bool invoke_oom = oom && !nr_oom_retries;

		/* If killed, bypass charge */
		if (fatal_signal_pending(current)) {
			css_put(&memcg->css);
		if (fatal_signal_pending(current))
			goto bypass;
		}

		ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
					   nr_pages, invoke_oom);
@@ -2722,17 +2701,12 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
			break;
		case CHARGE_RETRY: /* not in OOM situation but retry */
			batch = nr_pages;
			css_put(&memcg->css);
			memcg = NULL;
			goto again;
		case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
			css_put(&memcg->css);
			goto nomem;
		case CHARGE_NOMEM: /* OOM routine works */
			if (!oom || invoke_oom) {
				css_put(&memcg->css);
			if (!oom || invoke_oom)
				goto nomem;
			}
			nr_oom_retries--;
			break;
		}
@@ -2741,19 +2715,43 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
	if (batch > nr_pages)
		refill_stock(memcg, batch - nr_pages);
done:
	css_put(&memcg->css);
	*ptr = memcg;
	return 0;
nomem:
	if (!(gfp_mask & __GFP_NOFAIL)) {
		*ptr = NULL;
	if (!(gfp_mask & __GFP_NOFAIL))
		return -ENOMEM;
	}
bypass:
	*ptr = root_mem_cgroup;
	return -EINTR;
}

/**
 * mem_cgroup_try_charge_mm - try charging a mm
 * @mm: mm_struct to charge
 * @nr_pages: number of pages to charge
 * @oom: trigger OOM if reclaim fails
 *
 * Returns the charged mem_cgroup associated with the given mm_struct or
 * NULL the charge failed.
 */
static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
				 gfp_t gfp_mask,
				 unsigned int nr_pages,
				 bool oom)

{
	struct mem_cgroup *memcg;
	int ret;

	memcg = get_mem_cgroup_from_mm(mm);
	ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
	css_put(&memcg->css);
	if (ret == -EINTR)
		memcg = root_mem_cgroup;
	else if (ret)
		memcg = NULL;

	return memcg;
}

/*
 * Somemtimes we have to undo a charge we got by try_charge().
 * This function is for that and do uncharge, put css's refcnt.
@@ -2949,20 +2947,17 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
{
	struct res_counter *fail_res;
	struct mem_cgroup *_memcg;
	int ret = 0;

	ret = res_counter_charge(&memcg->kmem, size, &fail_res);
	if (ret)
		return ret;

	_memcg = memcg;
	ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
				      &_memcg, oom_gfp_allowed(gfp));

	ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
				    oom_gfp_allowed(gfp));
	if (ret == -EINTR)  {
		/*
		 * __mem_cgroup_try_charge() chosed to bypass to root due to
		 * mem_cgroup_try_charge() chosed to bypass to root due to
		 * OOM kill or fatal signal.  Since our only options are to
		 * either fail the allocation or charge it to this cgroup, do
		 * it as a temporary condition. But we can't fail. From a
@@ -2972,7 +2967,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
		 *
		 * This condition will only trigger if the task entered
		 * memcg_charge_kmem in a sane state, but was OOM-killed during
		 * __mem_cgroup_try_charge() above. Tasks that were already
		 * mem_cgroup_try_charge() above. Tasks that were already
		 * dying when the allocation triggers should have been already
		 * directed to the root cgroup in memcontrol.h
		 */
@@ -3826,10 +3821,9 @@ static int mem_cgroup_move_parent(struct page *page,
int mem_cgroup_newpage_charge(struct page *page,
			      struct mm_struct *mm, gfp_t gfp_mask)
{
	struct mem_cgroup *memcg = NULL;
	unsigned int nr_pages = 1;
	struct mem_cgroup *memcg;
	bool oom = true;
	int ret;

	if (mem_cgroup_disabled())
		return 0;
@@ -3848,9 +3842,9 @@ int mem_cgroup_newpage_charge(struct page *page,
		oom = false;
	}

	ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
	if (ret == -ENOMEM)
		return ret;
	memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
	if (!memcg)
		return -ENOMEM;
	__mem_cgroup_commit_charge(memcg, page, nr_pages,
				   MEM_CGROUP_CHARGE_TYPE_ANON, false);
	return 0;
@@ -3867,7 +3861,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
					  gfp_t mask,
					  struct mem_cgroup **memcgp)
{
	struct mem_cgroup *memcg;
	struct mem_cgroup *memcg = NULL;
	struct page_cgroup *pc;
	int ret;

@@ -3880,31 +3874,29 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
	 * in turn serializes uncharging.
	 */
	if (PageCgroupUsed(pc))
		return 0;
	if (!do_swap_account)
		goto charge_cur_mm;
		goto out;
	if (do_swap_account)
		memcg = try_get_mem_cgroup_from_page(page);
	if (!memcg)
		goto charge_cur_mm;
	*memcgp = memcg;
	ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
		memcg = get_mem_cgroup_from_mm(mm);
	ret = mem_cgroup_try_charge(memcg, mask, 1, true);
	css_put(&memcg->css);
	if (ret == -EINTR)
		ret = 0;
	return ret;
charge_cur_mm:
	ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
	if (ret == -EINTR)
		ret = 0;
		memcg = root_mem_cgroup;
	else if (ret)
		return ret;
out:
	*memcgp = memcg;
	return 0;
}

int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
				 gfp_t gfp_mask, struct mem_cgroup **memcgp)
{
	if (mem_cgroup_disabled()) {
		*memcgp = NULL;
	if (mem_cgroup_disabled())
		return 0;
	}
	/*
	 * A racing thread's fault, or swapoff, may have already
	 * updated the pte, and even removed page from swap cache: in
@@ -3912,12 +3904,13 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
	 * there's also a KSM case which does need to charge the page.
	 */
	if (!PageSwapCache(page)) {
		int ret;
		struct mem_cgroup *memcg;

		ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true);
		if (ret == -EINTR)
			ret = 0;
		return ret;
		memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
		if (!memcg)
			return -ENOMEM;
		*memcgp = memcg;
		return 0;
	}
	return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
}
@@ -3964,8 +3957,8 @@ void mem_cgroup_commit_charge_swapin(struct page *page,
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
				gfp_t gfp_mask)
{
	struct mem_cgroup *memcg = NULL;
	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
	struct mem_cgroup *memcg;
	int ret;

	if (mem_cgroup_disabled())
@@ -3973,23 +3966,28 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
	if (PageCompound(page))
		return 0;

	if (!PageSwapCache(page)) {
		/*
		 * Page cache insertions can happen without an actual
		 * task context, e.g. during disk probing on boot.
		 */
		if (!mm)
			memcg = root_mem_cgroup;
		ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true);
		if (ret != -ENOMEM)
			__mem_cgroup_commit_charge(memcg, page, 1, type, false);
	} else { /* page is swapcache/shmem */
	if (PageSwapCache(page)) { /* shmem */
		ret = __mem_cgroup_try_charge_swapin(mm, page,
						     gfp_mask, &memcg);
		if (!ret)
		if (ret)
			return ret;
		__mem_cgroup_commit_charge_swapin(page, memcg, type);
		return 0;
	}
	return ret;

	/*
	 * Page cache insertions can happen without an actual mm
	 * context, e.g. during disk probing on boot.
	 */
	if (unlikely(!mm))
		memcg = root_mem_cgroup;
	else {
		memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
		if (!memcg)
			return -ENOMEM;
	}
	__mem_cgroup_commit_charge(memcg, page, 1, type, false);
	return 0;
}

static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
@@ -6601,8 +6599,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
			batch_count = PRECHARGE_COUNT_AT_ONCE;
			cond_resched();
		}
		ret = __mem_cgroup_try_charge(NULL,
					GFP_KERNEL, 1, &memcg, false);
		ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
		if (ret)
			/* mem_cgroup_clear_mc() will do uncharge later */
			return ret;