Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dcb29524 authored by Linux Build Service Account's avatar Linux Build Service Account Committed by Gerrit - the friendly Code Review server
Browse files

Merge "mm: vmscan: block kswapd if it is encountering pages under writeback"

parents faf56be4 488cb10a
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -527,6 +527,9 @@ typedef enum {
					 * many dirty file pages at the tail
					 * of the LRU.
					 */
	ZONE_WRITEBACK,			/* reclaim scanning has recently found
					 * many pages under writeback
					 */
} zone_flags_t;

static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -554,6 +557,11 @@ static inline int zone_is_reclaim_dirty(const struct zone *zone)
	return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags);
}

static inline int zone_is_reclaim_writeback(const struct zone *zone)
{
	return test_bit(ZONE_WRITEBACK, &zone->flags);
}

static inline int zone_is_reclaim_locked(const struct zone *zone)
{
	return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
+60 −22
Original line number Diff line number Diff line
@@ -812,25 +812,55 @@ static unsigned long shrink_page_list(struct list_head *page_list,
		may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
			(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));

		if (PageWriteback(page)) {
		/*
			 * memcg doesn't have any dirty pages throttling so we
			 * could easily OOM just because too many pages are in
			 * writeback and there is nothing else to reclaim.
			 *
			 * Check __GFP_IO, certainly because a loop driver
			 * thread might enter reclaim, and deadlock if it waits
			 * on a page for which it is needed to do the write
			 * (loop masks off __GFP_IO|__GFP_FS for this reason);
			 * but more thought would probably show more reasons.
			 *
			 * Don't require __GFP_FS, since we're not going into
			 * the FS, just waiting on its writeback completion.
			 * Worryingly, ext4 gfs2 and xfs allocate pages with
			 * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so
			 * testing may_enter_fs here is liable to OOM on them.
		 * If a page at the tail of the LRU is under writeback, there
		 * are three cases to consider.
		 *
		 * 1) If reclaim is encountering an excessive number of pages
		 *    under writeback and this page is both under writeback and
		 *    PageReclaim then it indicates that pages are being queued
		 *    for IO but are being recycled through the LRU before the
		 *    IO can complete. Waiting on the page itself risks an
		 *    indefinite stall if it is impossible to writeback the
		 *    page due to IO error or disconnected storage so instead
		 *    block for HZ/10 or until some IO completes then clear the
		 *    ZONE_WRITEBACK flag to recheck if the condition exists.
		 *
		 * 2) Global reclaim encounters a page, memcg encounters a
		 *    page that is not marked for immediate reclaim or
		 *    the caller does not have __GFP_IO. In this case mark
		 *    the page for immediate reclaim and continue scanning.
		 *
		 *    __GFP_IO is checked  because a loop driver thread might
		 *    enter reclaim, and deadlock if it waits on a page for
		 *    which it is needed to do the write (loop masks off
		 *    __GFP_IO|__GFP_FS for this reason); but more thought
		 *    would probably show more reasons.
		 *
		 *    Don't require __GFP_FS, since we're not going into the
		 *    FS, just waiting on its writeback completion. Worryingly,
		 *    ext4 gfs2 and xfs allocate pages with
		 *    grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing
		 *    may_enter_fs here is liable to OOM on them.
		 *
		 * 3) memcg encounters a page that is not already marked
		 *    PageReclaim. memcg does not have any dirty pages
		 *    throttling so we could easily OOM just because too many
		 *    pages are in writeback and there is nothing else to
		 *    reclaim. Wait for the writeback to complete.
		 */
			if (global_reclaim(sc) ||
		if (PageWriteback(page)) {
			/* Case 1 above */
			if (current_is_kswapd() &&
			    PageReclaim(page) &&
			    zone_is_reclaim_writeback(zone)) {
				unlock_page(page);
				congestion_wait(BLK_RW_ASYNC, HZ/10);
				zone_clear_flag(zone, ZONE_WRITEBACK);
				goto keep;

			/* Case 2 above */
			} else if (global_reclaim(sc) ||
			    !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) {
				/*
				 * This is slightly racy - end_page_writeback()
@@ -845,10 +875,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
				 */
				SetPageReclaim(page);
				nr_writeback++;

				goto keep_locked;
			}

			/* Case 3 above */
			} else {
				wait_on_page_writeback(page);
			}
		}

		if (!force_reclaim)
			references = page_check_references(page, sc);
@@ -1463,8 +1497,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
	 *                     isolated page is PageWriteback
	 */
	if (nr_writeback && nr_writeback >=
			(nr_taken >> (DEF_PRIORITY - sc->priority)))
			(nr_taken >> (DEF_PRIORITY - sc->priority))) {
		wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
		zone_set_flag(zone, ZONE_WRITEBACK);
	}

	/*
	 * Similarly, if many dirty pages are encountered that are not
@@ -2785,8 +2821,8 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
 * the high watermark.
 *
 * Returns true if kswapd scanned at least the requested number of pages to
 * reclaim. This is used to determine if the scanning priority needs to be
 * raised.
 * reclaim or if the lack of progress was due to pages under writeback.
 * This is used to determine if the scanning priority needs to be raised.
 */
static bool kswapd_shrink_zone(struct zone *zone,
			       struct scan_control *sc,
@@ -2810,6 +2846,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
	/* Account for the number of pages attempted to reclaim */
	*nr_attempted += sc->nr_to_reclaim;

	zone_clear_flag(zone, ZONE_WRITEBACK);

	return sc->nr_scanned >= sc->nr_to_reclaim;
}