Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5660048c authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds
Browse files

mm: move memcg hierarchy reclaim to generic reclaim code



Memory cgroup limit reclaim and traditional global pressure reclaim will
soon share the same code to reclaim from a hierarchical tree of memory
cgroups.

In preparation of this, move the two right next to each other in
shrink_zone().

The mem_cgroup_hierarchical_reclaim() polymath is split into a soft
limit reclaim function, which still does hierarchy walking on its own,
and a limit (shrinking) reclaim function, which relies on generic
reclaim code to walk the hierarchy.

Signed-off-by: default avatarJohannes Weiner <jweiner@redhat.com>
Reviewed-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: default avatarMichal Hocko <mhocko@suse.cz>
Reviewed-by: default avatarKirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 527a5ec9
Loading
Loading
Loading
Loading
+24 −0
Original line number Diff line number Diff line
@@ -40,6 +40,12 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
					struct mem_cgroup *mem_cont,
					int active, int file);

struct mem_cgroup_reclaim_cookie {
	struct zone *zone;
	int priority;
	unsigned int generation;
};

#ifdef CONFIG_CGROUP_MEM_RES_CTLR
/*
 * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -106,6 +112,11 @@ mem_cgroup_prepare_migration(struct page *page,
extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
	struct page *oldpage, struct page *newpage, bool migration_ok);

struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
				   struct mem_cgroup *,
				   struct mem_cgroup_reclaim_cookie *);
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);

/*
 * For memory reclaim.
 */
@@ -281,6 +292,19 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
{
}

static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
		struct mem_cgroup *prev,
		struct mem_cgroup_reclaim_cookie *reclaim)
{
	return NULL;
}

static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
					 struct mem_cgroup *prev)
{
}

static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
{
	return 0;
+85 −84
Original line number Diff line number Diff line
@@ -370,8 +370,6 @@ enum charge_type {
#define MEM_CGROUP_RECLAIM_NOSWAP	(1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
#define MEM_CGROUP_RECLAIM_SHRINK_BIT	0x1
#define MEM_CGROUP_RECLAIM_SHRINK	(1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
#define MEM_CGROUP_RECLAIM_SOFT_BIT	0x2
#define MEM_CGROUP_RECLAIM_SOFT		(1 << MEM_CGROUP_RECLAIM_SOFT_BIT)

static void mem_cgroup_get(struct mem_cgroup *memcg);
static void mem_cgroup_put(struct mem_cgroup *memcg);
@@ -857,20 +855,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
	return memcg;
}

struct mem_cgroup_reclaim_cookie {
	struct zone *zone;
	int priority;
	unsigned int generation;
};

static struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
/**
 * mem_cgroup_iter - iterate over memory cgroup hierarchy
 * @root: hierarchy root
 * @prev: previously returned memcg, NULL on first invocation
 * @reclaim: cookie for shared reclaim walks, NULL for full walks
 *
 * Returns references to children of the hierarchy below @root, or
 * @root itself, or %NULL after a full round-trip.
 *
 * Caller must pass the return value in @prev on subsequent
 * invocations for reference counting, or use mem_cgroup_iter_break()
 * to cancel a hierarchy walk before the round-trip is complete.
 *
 * Reclaimers can specify a zone and a priority level in @reclaim to
 * divide up the memcgs in the hierarchy among all concurrent
 * reclaimers operating on the same zone and priority.
 */
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
				   struct mem_cgroup *prev,
				   struct mem_cgroup_reclaim_cookie *reclaim)
{
	struct mem_cgroup *memcg = NULL;
	int id = 0;

	if (mem_cgroup_disabled())
		return NULL;

	if (!root)
		root = root_mem_cgroup;

@@ -926,7 +937,12 @@ mem_cgroup_iter(struct mem_cgroup *root,
	return memcg;
}

static void mem_cgroup_iter_break(struct mem_cgroup *root,
/**
 * mem_cgroup_iter_break - abort a hierarchy walk prematurely
 * @root: hierarchy root
 * @prev: last visited hierarchy member as returned by mem_cgroup_iter()
 */
void mem_cgroup_iter_break(struct mem_cgroup *root,
			   struct mem_cgroup *prev)
{
	if (!root)
@@ -1555,6 +1571,42 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
	return min(limit, memsw);
}

static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
					gfp_t gfp_mask,
					unsigned long flags)
{
	unsigned long total = 0;
	bool noswap = false;
	int loop;

	if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
		noswap = true;
	if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum)
		noswap = true;

	for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
		if (loop)
			drain_all_stock_async(memcg);
		total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
		/*
		 * Allow limit shrinkers, which are triggered directly
		 * by userspace, to catch signals and stop reclaim
		 * after minimal progress, regardless of the margin.
		 */
		if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
			break;
		if (mem_cgroup_margin(memcg))
			break;
		/*
		 * If nothing was reclaimed after two attempts, there
		 * may be no reclaimable pages in this hierarchy.
		 */
		if (loop && !total)
			break;
	}
	return total;
}

/**
 * test_mem_cgroup_node_reclaimable
 * @mem: the target memcg
@@ -1692,30 +1744,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
}
#endif

/*
 * Scan the hierarchy if needed to reclaim memory. We remember the last child
 * we reclaimed from, so that we don't end up penalizing one child extensively
 * based on its position in the children list.
 *
 * root_memcg is the original ancestor that we've been reclaim from.
 *
 * We give up and return to the caller when we visit root_memcg twice.
 * (other groups can be removed while we're walking....)
 *
 * If shrink==true, for avoiding to free too much, this returns immedieately.
 */
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
				   struct zone *zone,
				   gfp_t gfp_mask,
						unsigned long reclaim_options,
				   unsigned long *total_scanned)
{
	struct mem_cgroup *victim = NULL;
	int ret, total = 0;
	int total = 0;
	int loop = 0;
	bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
	bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
	bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
	unsigned long excess;
	unsigned long nr_scanned;
	struct mem_cgroup_reclaim_cookie reclaim = {
@@ -1725,29 +1761,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,

	excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;

	/* If memsw_is_minimum==1, swap-out is of-no-use. */
	if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
		noswap = true;

	while (1) {
		victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
		if (!victim) {
			loop++;
			/*
			 * We are not draining per cpu cached charges during
			 * soft limit reclaim  because global reclaim doesn't
			 * care about charges. It tries to free some memory and
			 * charges will not give any.
			 */
			if (!check_soft && loop >= 1)
				drain_all_stock_async(root_memcg);
			if (loop >= 2) {
				/*
				 * If we have not been able to reclaim
				 * anything, it might because there are
				 * no reclaimable pages under this hierarchy
				 */
				if (!check_soft || !total)
				if (!total)
					break;
				/*
				 * We want to do more targeted reclaim.
@@ -1761,31 +1785,13 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
			}
			continue;
		}
		if (!mem_cgroup_reclaimable(victim, noswap)) {
			/* this cgroup's local usage == 0 */
		if (!mem_cgroup_reclaimable(victim, false))
			continue;
		}
		/* we use swappiness of local cgroup */
		if (check_soft) {
			ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
				noswap, zone, &nr_scanned);
		total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
						     zone, &nr_scanned);
		*total_scanned += nr_scanned;
		} else
			ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
						noswap);
		total += ret;
		/*
		 * At shrinking usage, we can't check we should stop here or
		 * reclaim more. It's depends on callers. last_scanned_child
		 * will work enough for keeping fairness under tree.
		 */
		if (shrink)
			break;
		if (check_soft) {
		if (!res_counter_soft_limit_excess(&root_memcg->res))
			break;
		} else if (mem_cgroup_margin(root_memcg))
			break;
	}
	mem_cgroup_iter_break(root_memcg, victim);
	return total;
@@ -2281,8 +2287,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
	if (!(gfp_mask & __GFP_WAIT))
		return CHARGE_WOULDBLOCK;

	ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
					      gfp_mask, flags, NULL);
	ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
		return CHARGE_RETRY;
	/*
@@ -3559,9 +3564,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
		if (!ret)
			break;

		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
						MEM_CGROUP_RECLAIM_SHRINK,
						NULL);
		mem_cgroup_reclaim(memcg, GFP_KERNEL,
				   MEM_CGROUP_RECLAIM_SHRINK);
		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
		/* Usage is reduced ? */
  		if (curusage >= oldusage)
@@ -3619,10 +3623,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
		if (!ret)
			break;

		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
		mem_cgroup_reclaim(memcg, GFP_KERNEL,
				   MEM_CGROUP_RECLAIM_NOSWAP |
						MEM_CGROUP_RECLAIM_SHRINK,
						NULL);
				   MEM_CGROUP_RECLAIM_SHRINK);
		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
		/* Usage is reduced ? */
		if (curusage >= oldusage)
@@ -3665,10 +3668,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
			break;

		nr_scanned = 0;
		reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
						gfp_mask,
						MEM_CGROUP_RECLAIM_SOFT,
						&nr_scanned);
		reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone,
						    gfp_mask, &nr_scanned);
		nr_reclaimed += reclaimed;
		*total_scanned += nr_scanned;
		spin_lock(&mctz->lock);
+39 −4
Original line number Diff line number Diff line
@@ -2104,12 +2104,43 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
static void shrink_zone(int priority, struct zone *zone,
			struct scan_control *sc)
{
	struct mem_cgroup *root = sc->target_mem_cgroup;
	struct mem_cgroup_reclaim_cookie reclaim = {
		.zone = zone,
		.priority = priority,
	};
	struct mem_cgroup *memcg;

	if (global_reclaim(sc)) {
		struct mem_cgroup_zone mz = {
		.mem_cgroup = sc->target_mem_cgroup,
			.mem_cgroup = NULL,
			.zone = zone,
		};

		shrink_mem_cgroup_zone(priority, &mz, sc);
		return;
	}

	memcg = mem_cgroup_iter(root, NULL, &reclaim);
	do {
		struct mem_cgroup_zone mz = {
			.mem_cgroup = memcg,
			.zone = zone,
		};

		shrink_mem_cgroup_zone(priority, &mz, sc);
		/*
		 * Limit reclaim has historically picked one memcg and
		 * scanned it with decreasing priority levels until
		 * nr_to_reclaim had been reclaimed.  This priority
		 * cycle is thus over after a single memcg.
		 */
		if (!global_reclaim(sc)) {
			mem_cgroup_iter_break(root, memcg);
			break;
		}
		memcg = mem_cgroup_iter(root, memcg, &reclaim);
	} while (memcg);
}

/*
@@ -2374,6 +2405,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
		.order = 0,
		.target_mem_cgroup = mem,
	};
	struct mem_cgroup_zone mz = {
		.mem_cgroup = mem,
		.zone = zone,
	};

	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2389,7 +2424,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
	 * will pick up pages from other mem cgroup's as well. We hack
	 * the priority and make it zero.
	 */
	shrink_zone(0, zone, &sc);
	shrink_mem_cgroup_zone(0, &mz, &sc);

	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);