Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f1f59ece authored by Vinayak Menon's avatar Vinayak Menon Committed by Charan Teja Reddy
Browse files

mm: vmscan: fix the page state calculation in too_many_isolated



It is observed that sometimes multiple tasks get blocked in
the congestion_wait loop below, in shrink_inactive_list.

(__schedule) from [<c0a03328>]
(schedule_timeout) from [<c0a04940>]
(io_schedule_timeout) from [<c01d585c>]
(congestion_wait) from [<c01cc9d8>]
(shrink_inactive_list) from [<c01cd034>]
(shrink_zone) from [<c01cdd08>]
(try_to_free_pages) from [<c01c442c>]
(__alloc_pages_nodemask) from [<c01f1884>]
(new_slab) from [<c09fcf60>]
(__slab_alloc) from [<c01f1a6c>]

In one such instance, zone_page_state(zone, NR_ISOLATED_FILE)
had returned 14, zone_page_state(zone, NR_INACTIVE_FILE)
returned 92, and the gfp_flag was GFP_KERNEL which resulted
in too_many_isolated to return true. But one of the CPU pageset
vmstat diff had NR_ISOLATED_FILE as -14. As there weren't any more
update to per cpu pageset, the threshold wasn't met, and the
tasks were blocked in the congestion wait.

This patch uses zone_page_state_snapshot instead, but restricts
its usage to avoid performance penalty.

Change-Id: Iec767a548e524729c7ed79a92fe4718cdd08ce69
Signed-off-by: default avatarVinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: default avatarCharan Teja Reddy <charante@codeaurora.org>
parent 64766974
Loading
Loading
Loading
Loading
+46 −19
Original line number Diff line number Diff line
@@ -1637,31 +1637,32 @@ int isolate_lru_page(struct page *page)
	return ret;
}

/*
 * A direct reclaimer may isolate SWAP_CLUSTER_MAX pages from the LRU list and
 * then get resheduled. When there are massive number of tasks doing page
 * allocation, such sleeping direct reclaimers may keep piling up on each CPU,
 * the LRU list will go small and be scanned faster than necessary, leading to
 * unnecessary swapping, thrashing and OOM.
 */
static int too_many_isolated(struct pglist_data *pgdat, int file,
		struct scan_control *sc)
static int __too_many_isolated(struct pglist_data *pgdat, int file,
	struct scan_control *sc, bool stalled)
{
	unsigned long inactive, isolated;

	if (current_is_kswapd())
		return 0;

	if (!sane_reclaim(sc))
		return 0;

	if (file) {
		if (stalled) {
			inactive = node_page_state_snapshot(pgdat,
					NR_INACTIVE_FILE);
			isolated = node_page_state_snapshot(pgdat,
					NR_ISOLATED_FILE);
		} else {
			inactive = node_page_state(pgdat, NR_INACTIVE_FILE);
			isolated = node_page_state(pgdat, NR_ISOLATED_FILE);
		}
	} else {
		if (stalled) {
			inactive = node_page_state_snapshot(pgdat,
					NR_INACTIVE_ANON);
			isolated = node_page_state_snapshot(pgdat,
					NR_ISOLATED_ANON);
		} else {
			inactive = node_page_state(pgdat, NR_INACTIVE_ANON);
			isolated = node_page_state(pgdat, NR_ISOLATED_ANON);
		}
	}

	/*
	 * GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they
@@ -1674,6 +1675,32 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
	return isolated > inactive;
}

/*
 * A direct reclaimer may isolate SWAP_CLUSTER_MAX pages from the LRU list and
 * then get resheduled. When there are massive number of tasks doing page
 * allocation, such sleeping direct reclaimers may keep piling up on each CPU,
 * the LRU list will go small and be scanned faster than necessary, leading to
 * unnecessary swapping, thrashing and OOM.
 */
static int too_many_isolated(struct pglist_data *pgdat, int file,
		struct scan_control *sc, bool stalled)
{
	if (current_is_kswapd())
		return 0;

	if (!sane_reclaim(sc))
		return 0;

	if (unlikely(__too_many_isolated(pgdat, file, sc, false))) {
		if (stalled)
			return __too_many_isolated(pgdat, file, sc, stalled);
		else
			return 1;
	}

	return 0;
}

static noinline_for_stack void
putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
{
@@ -1761,7 +1788,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
	struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
	bool stalled = false;

	while (unlikely(too_many_isolated(pgdat, file, sc))) {
	while (unlikely(too_many_isolated(pgdat, file, sc, stalled))) {
		if (stalled)
			return 0;