Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6536de82 authored by Johannes Weiner's avatar Johannes Weiner Committed by Greg Kroah-Hartman
Browse files

mm: fix inactive list balancing between NUMA nodes and cgroups

[ Upstream commit 3b991208b897f52507168374033771a984b947b1 ]

During !CONFIG_CGROUP reclaim, we expand the inactive list size if it's
thrashing on the node that is about to be reclaimed.  But when cgroups
are enabled, we suddenly ignore the node scope and use the cgroup scope
only.  The result is that pressure bleeds between NUMA nodes depending
on whether cgroups are merely compiled into Linux.  This behavioral
difference is unexpected and undesirable.

When the refault adaptivity of the inactive list was first introduced,
there were no statistics at the lruvec level - the intersection of node
and memcg - so it was better than nothing.

But now that we have that infrastructure, use lruvec_page_state() to
make the list balancing decision always NUMA aware.

[hannes@cmpxchg.org: fix bisection hole]
  Link: http://lkml.kernel.org/r/20190417155241.GB23013@cmpxchg.org
Link: http://lkml.kernel.org/r/20190412144438.2645-1-hannes@cmpxchg.org


Fixes: 2a2e4885 ("mm: vmscan: fix IO/refault regression in cache workingset transition")
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent 11347368
Loading
Loading
Loading
Loading
+9 −20
Original line number Original line Diff line number Diff line
@@ -2190,7 +2190,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
 *   10TB     320        32GB
 *   10TB     320        32GB
 */
 */
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
				 struct mem_cgroup *memcg,
				 struct scan_control *sc, bool actual_reclaim)
				 struct scan_control *sc, bool actual_reclaim)
{
{
	enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
	enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
@@ -2211,16 +2210,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
	inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
	inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
	active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
	active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);


	if (memcg)
		refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
	else
		refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);

	/*
	/*
	 * When refaults are being observed, it means a new workingset
	 * When refaults are being observed, it means a new workingset
	 * is being established. Disable active list protection to get
	 * is being established. Disable active list protection to get
	 * rid of the stale workingset quickly.
	 * rid of the stale workingset quickly.
	 */
	 */
	refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
	if (file && actual_reclaim && lruvec->refaults != refaults) {
	if (file && actual_reclaim && lruvec->refaults != refaults) {
		inactive_ratio = 0;
		inactive_ratio = 0;
	} else {
	} else {
@@ -2241,12 +2236,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
}
}


static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
				 struct lruvec *lruvec, struct mem_cgroup *memcg,
				 struct lruvec *lruvec, struct scan_control *sc)
				 struct scan_control *sc)
{
{
	if (is_active_lru(lru)) {
	if (is_active_lru(lru)) {
		if (inactive_list_is_low(lruvec, is_file_lru(lru),
		if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
					 memcg, sc, true))
			shrink_active_list(nr_to_scan, lruvec, sc, lru);
			shrink_active_list(nr_to_scan, lruvec, sc, lru);
		return 0;
		return 0;
	}
	}
@@ -2346,7 +2339,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
			 * anonymous pages on the LRU in eligible zones.
			 * anonymous pages on the LRU in eligible zones.
			 * Otherwise, the small LRU gets thrashed.
			 * Otherwise, the small LRU gets thrashed.
			 */
			 */
			if (!inactive_list_is_low(lruvec, false, memcg, sc, false) &&
			if (!inactive_list_is_low(lruvec, false, sc, false) &&
			    lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
			    lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
					>> sc->priority) {
					>> sc->priority) {
				scan_balance = SCAN_ANON;
				scan_balance = SCAN_ANON;
@@ -2364,7 +2357,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
	 * lruvec even if it has plenty of old anonymous pages unless the
	 * lruvec even if it has plenty of old anonymous pages unless the
	 * system is under heavy pressure.
	 * system is under heavy pressure.
	 */
	 */
	if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
	if (!inactive_list_is_low(lruvec, true, sc, false) &&
	    lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
	    lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
		scan_balance = SCAN_FILE;
		scan_balance = SCAN_FILE;
		goto out;
		goto out;
@@ -2517,7 +2510,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
				nr[lru] -= nr_to_scan;
				nr[lru] -= nr_to_scan;


				nr_reclaimed += shrink_list(lru, nr_to_scan,
				nr_reclaimed += shrink_list(lru, nr_to_scan,
							    lruvec, memcg, sc);
							    lruvec, sc);
			}
			}
		}
		}


@@ -2584,7 +2577,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
	 * Even if we did not try to evict anon pages at all, we want to
	 * Even if we did not try to evict anon pages at all, we want to
	 * rebalance the anon lru active/inactive ratio.
	 * rebalance the anon lru active/inactive ratio.
	 */
	 */
	if (inactive_list_is_low(lruvec, false, memcg, sc, true))
	if (inactive_list_is_low(lruvec, false, sc, true))
		shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
		shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
				   sc, LRU_ACTIVE_ANON);
				   sc, LRU_ACTIVE_ANON);
}
}
@@ -2982,12 +2975,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
		unsigned long refaults;
		unsigned long refaults;
		struct lruvec *lruvec;
		struct lruvec *lruvec;


		if (memcg)
			refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
		else
			refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);

		lruvec = mem_cgroup_lruvec(pgdat, memcg);
		lruvec = mem_cgroup_lruvec(pgdat, memcg);
		refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
		lruvec->refaults = refaults;
		lruvec->refaults = refaults;
	} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
	} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
}
}
@@ -3344,7 +3333,7 @@ static void age_active_anon(struct pglist_data *pgdat,
	do {
	do {
		struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
		struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);


		if (inactive_list_is_low(lruvec, false, memcg, sc, true))
		if (inactive_list_is_low(lruvec, false, sc, true))
			shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
			shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
					   sc, LRU_ACTIVE_ANON);
					   sc, LRU_ACTIVE_ANON);