Merge "mm: vmscan: block kswapd if it is encountering pages under writeback" (dcb29524) · Commits · e / devices / android_kernel_sony_msm8994

include/linux/mmzone.h

+8 −0

Original line number	Diff line number	Diff line
		@@ -527,6 +527,9 @@ typedef enum {
		* many dirty file pages at the tail
		* of the LRU.
		*/
		ZONE_WRITEBACK, /* reclaim scanning has recently found
		* many pages under writeback
		*/
		} zone_flags_t;

		static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
		@@ -554,6 +557,11 @@ static inline int zone_is_reclaim_dirty(const struct zone *zone)
		return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags);
		}

		static inline int zone_is_reclaim_writeback(const struct zone *zone)
		{
		return test_bit(ZONE_WRITEBACK, &zone->flags);
		}

		static inline int zone_is_reclaim_locked(const struct zone *zone)
		{
		return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);

mm/vmscan.c

+60 −22

Original line number	Diff line number	Diff line
		@@ -812,25 +812,55 @@ static unsigned long shrink_page_list(struct list_head *page_list,
		may_enter_fs = (sc->gfp_mask & __GFP_FS) \|\|
		(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));

		if (PageWriteback(page)) {
		/*
		* memcg doesn't have any dirty pages throttling so we
		* could easily OOM just because too many pages are in
		* writeback and there is nothing else to reclaim.
		*
		* Check __GFP_IO, certainly because a loop driver
		* thread might enter reclaim, and deadlock if it waits
		* on a page for which it is needed to do the write
		* (loop masks off __GFP_IO\|__GFP_FS for this reason);
		* but more thought would probably show more reasons.
		*
		* Don't require __GFP_FS, since we're not going into
		* the FS, just waiting on its writeback completion.
		* Worryingly, ext4 gfs2 and xfs allocate pages with
		* grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so
		* testing may_enter_fs here is liable to OOM on them.
		* If a page at the tail of the LRU is under writeback, there
		* are three cases to consider.
		*
		* 1) If reclaim is encountering an excessive number of pages
		* under writeback and this page is both under writeback and
		* PageReclaim then it indicates that pages are being queued
		* for IO but are being recycled through the LRU before the
		* IO can complete. Waiting on the page itself risks an
		* indefinite stall if it is impossible to writeback the
		* page due to IO error or disconnected storage so instead
		* block for HZ/10 or until some IO completes then clear the
		* ZONE_WRITEBACK flag to recheck if the condition exists.
		*
		* 2) Global reclaim encounters a page, memcg encounters a
		* page that is not marked for immediate reclaim or
		* the caller does not have __GFP_IO. In this case mark
		* the page for immediate reclaim and continue scanning.
		*
		* __GFP_IO is checked because a loop driver thread might
		* enter reclaim, and deadlock if it waits on a page for
		* which it is needed to do the write (loop masks off
		* __GFP_IO\|__GFP_FS for this reason); but more thought
		* would probably show more reasons.
		*
		* Don't require __GFP_FS, since we're not going into the
		* FS, just waiting on its writeback completion. Worryingly,
		* ext4 gfs2 and xfs allocate pages with
		* grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing
		* may_enter_fs here is liable to OOM on them.
		*
		* 3) memcg encounters a page that is not already marked
		* PageReclaim. memcg does not have any dirty pages
		* throttling so we could easily OOM just because too many
		* pages are in writeback and there is nothing else to
		* reclaim. Wait for the writeback to complete.
		*/
		if (global_reclaim(sc) \|\|
		if (PageWriteback(page)) {
		/* Case 1 above */
		if (current_is_kswapd() &&
		PageReclaim(page) &&
		zone_is_reclaim_writeback(zone)) {
		unlock_page(page);
		congestion_wait(BLK_RW_ASYNC, HZ/10);
		zone_clear_flag(zone, ZONE_WRITEBACK);
		goto keep;

		/* Case 2 above */
		} else if (global_reclaim(sc) \|\|
		!PageReclaim(page) \|\| !(sc->gfp_mask & __GFP_IO)) {
		/*
		* This is slightly racy - end_page_writeback()
		@@ -845,10 +875,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
		*/
		SetPageReclaim(page);
		nr_writeback++;

		goto keep_locked;
		}

		/* Case 3 above */
		} else {
		wait_on_page_writeback(page);
		}
		}

		if (!force_reclaim)
		references = page_check_references(page, sc);
		@@ -1463,8 +1497,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
		* isolated page is PageWriteback
		*/
		if (nr_writeback && nr_writeback >=
		(nr_taken >> (DEF_PRIORITY - sc->priority)))
		(nr_taken >> (DEF_PRIORITY - sc->priority))) {
		wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
		zone_set_flag(zone, ZONE_WRITEBACK);
		}

		/*
		* Similarly, if many dirty pages are encountered that are not
		@@ -2785,8 +2821,8 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
		* the high watermark.
		*
		* Returns true if kswapd scanned at least the requested number of pages to
		* reclaim. This is used to determine if the scanning priority needs to be
		* raised.
		* reclaim or if the lack of progress was due to pages under writeback.
		* This is used to determine if the scanning priority needs to be raised.
		*/
		static bool kswapd_shrink_zone(struct zone *zone,
		struct scan_control *sc,
		@@ -2810,6 +2846,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
		/* Account for the number of pages attempted to reclaim */
		*nr_attempted += sc->nr_to_reclaim;

		zone_clear_flag(zone, ZONE_WRITEBACK);

		return sc->nr_scanned >= sc->nr_to_reclaim;
		}