Loading include/linux/mmzone.h +8 −0 Original line number Diff line number Diff line Loading @@ -527,6 +527,9 @@ typedef enum { * many dirty file pages at the tail * of the LRU. */ ZONE_WRITEBACK, /* reclaim scanning has recently found * many pages under writeback */ } zone_flags_t; static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) Loading Loading @@ -554,6 +557,11 @@ static inline int zone_is_reclaim_dirty(const struct zone *zone) return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags); } static inline int zone_is_reclaim_writeback(const struct zone *zone) { return test_bit(ZONE_WRITEBACK, &zone->flags); } static inline int zone_is_reclaim_locked(const struct zone *zone) { return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); Loading mm/vmscan.c +60 −22 Original line number Diff line number Diff line Loading @@ -812,25 +812,55 @@ static unsigned long shrink_page_list(struct list_head *page_list, may_enter_fs = (sc->gfp_mask & __GFP_FS) || (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); if (PageWriteback(page)) { /* * memcg doesn't have any dirty pages throttling so we * could easily OOM just because too many pages are in * writeback and there is nothing else to reclaim. * * Check __GFP_IO, certainly because a loop driver * thread might enter reclaim, and deadlock if it waits * on a page for which it is needed to do the write * (loop masks off __GFP_IO|__GFP_FS for this reason); * but more thought would probably show more reasons. * * Don't require __GFP_FS, since we're not going into * the FS, just waiting on its writeback completion. * Worryingly, ext4 gfs2 and xfs allocate pages with * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so * testing may_enter_fs here is liable to OOM on them. * If a page at the tail of the LRU is under writeback, there * are three cases to consider. * * 1) If reclaim is encountering an excessive number of pages * under writeback and this page is both under writeback and * PageReclaim then it indicates that pages are being queued * for IO but are being recycled through the LRU before the * IO can complete. Waiting on the page itself risks an * indefinite stall if it is impossible to writeback the * page due to IO error or disconnected storage so instead * block for HZ/10 or until some IO completes then clear the * ZONE_WRITEBACK flag to recheck if the condition exists. * * 2) Global reclaim encounters a page, memcg encounters a * page that is not marked for immediate reclaim or * the caller does not have __GFP_IO. In this case mark * the page for immediate reclaim and continue scanning. * * __GFP_IO is checked because a loop driver thread might * enter reclaim, and deadlock if it waits on a page for * which it is needed to do the write (loop masks off * __GFP_IO|__GFP_FS for this reason); but more thought * would probably show more reasons. * * Don't require __GFP_FS, since we're not going into the * FS, just waiting on its writeback completion. Worryingly, * ext4 gfs2 and xfs allocate pages with * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing * may_enter_fs here is liable to OOM on them. * * 3) memcg encounters a page that is not already marked * PageReclaim. memcg does not have any dirty pages * throttling so we could easily OOM just because too many * pages are in writeback and there is nothing else to * reclaim. Wait for the writeback to complete. */ if (global_reclaim(sc) || if (PageWriteback(page)) { /* Case 1 above */ if (current_is_kswapd() && PageReclaim(page) && zone_is_reclaim_writeback(zone)) { unlock_page(page); congestion_wait(BLK_RW_ASYNC, HZ/10); zone_clear_flag(zone, ZONE_WRITEBACK); goto keep; /* Case 2 above */ } else if (global_reclaim(sc) || !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) { /* * This is slightly racy - end_page_writeback() Loading @@ -845,10 +875,14 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ SetPageReclaim(page); nr_writeback++; goto keep_locked; } /* Case 3 above */ } else { wait_on_page_writeback(page); } } if (!force_reclaim) references = page_check_references(page, sc); Loading Loading @@ -1463,8 +1497,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * isolated page is PageWriteback */ if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY - sc->priority))) (nr_taken >> (DEF_PRIORITY - sc->priority))) { wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); zone_set_flag(zone, ZONE_WRITEBACK); } /* * Similarly, if many dirty pages are encountered that are not Loading Loading @@ -2785,8 +2821,8 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, * the high watermark. * * Returns true if kswapd scanned at least the requested number of pages to * reclaim. This is used to determine if the scanning priority needs to be * raised. * reclaim or if the lack of progress was due to pages under writeback. * This is used to determine if the scanning priority needs to be raised. */ static bool kswapd_shrink_zone(struct zone *zone, struct scan_control *sc, Loading @@ -2810,6 +2846,8 @@ static bool kswapd_shrink_zone(struct zone *zone, /* Account for the number of pages attempted to reclaim */ *nr_attempted += sc->nr_to_reclaim; zone_clear_flag(zone, ZONE_WRITEBACK); return sc->nr_scanned >= sc->nr_to_reclaim; } Loading Loading
include/linux/mmzone.h +8 −0 Original line number Diff line number Diff line Loading @@ -527,6 +527,9 @@ typedef enum { * many dirty file pages at the tail * of the LRU. */ ZONE_WRITEBACK, /* reclaim scanning has recently found * many pages under writeback */ } zone_flags_t; static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) Loading Loading @@ -554,6 +557,11 @@ static inline int zone_is_reclaim_dirty(const struct zone *zone) return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags); } static inline int zone_is_reclaim_writeback(const struct zone *zone) { return test_bit(ZONE_WRITEBACK, &zone->flags); } static inline int zone_is_reclaim_locked(const struct zone *zone) { return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); Loading
mm/vmscan.c +60 −22 Original line number Diff line number Diff line Loading @@ -812,25 +812,55 @@ static unsigned long shrink_page_list(struct list_head *page_list, may_enter_fs = (sc->gfp_mask & __GFP_FS) || (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); if (PageWriteback(page)) { /* * memcg doesn't have any dirty pages throttling so we * could easily OOM just because too many pages are in * writeback and there is nothing else to reclaim. * * Check __GFP_IO, certainly because a loop driver * thread might enter reclaim, and deadlock if it waits * on a page for which it is needed to do the write * (loop masks off __GFP_IO|__GFP_FS for this reason); * but more thought would probably show more reasons. * * Don't require __GFP_FS, since we're not going into * the FS, just waiting on its writeback completion. * Worryingly, ext4 gfs2 and xfs allocate pages with * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so * testing may_enter_fs here is liable to OOM on them. * If a page at the tail of the LRU is under writeback, there * are three cases to consider. * * 1) If reclaim is encountering an excessive number of pages * under writeback and this page is both under writeback and * PageReclaim then it indicates that pages are being queued * for IO but are being recycled through the LRU before the * IO can complete. Waiting on the page itself risks an * indefinite stall if it is impossible to writeback the * page due to IO error or disconnected storage so instead * block for HZ/10 or until some IO completes then clear the * ZONE_WRITEBACK flag to recheck if the condition exists. * * 2) Global reclaim encounters a page, memcg encounters a * page that is not marked for immediate reclaim or * the caller does not have __GFP_IO. In this case mark * the page for immediate reclaim and continue scanning. * * __GFP_IO is checked because a loop driver thread might * enter reclaim, and deadlock if it waits on a page for * which it is needed to do the write (loop masks off * __GFP_IO|__GFP_FS for this reason); but more thought * would probably show more reasons. * * Don't require __GFP_FS, since we're not going into the * FS, just waiting on its writeback completion. Worryingly, * ext4 gfs2 and xfs allocate pages with * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing * may_enter_fs here is liable to OOM on them. * * 3) memcg encounters a page that is not already marked * PageReclaim. memcg does not have any dirty pages * throttling so we could easily OOM just because too many * pages are in writeback and there is nothing else to * reclaim. Wait for the writeback to complete. */ if (global_reclaim(sc) || if (PageWriteback(page)) { /* Case 1 above */ if (current_is_kswapd() && PageReclaim(page) && zone_is_reclaim_writeback(zone)) { unlock_page(page); congestion_wait(BLK_RW_ASYNC, HZ/10); zone_clear_flag(zone, ZONE_WRITEBACK); goto keep; /* Case 2 above */ } else if (global_reclaim(sc) || !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) { /* * This is slightly racy - end_page_writeback() Loading @@ -845,10 +875,14 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ SetPageReclaim(page); nr_writeback++; goto keep_locked; } /* Case 3 above */ } else { wait_on_page_writeback(page); } } if (!force_reclaim) references = page_check_references(page, sc); Loading Loading @@ -1463,8 +1497,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * isolated page is PageWriteback */ if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY - sc->priority))) (nr_taken >> (DEF_PRIORITY - sc->priority))) { wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); zone_set_flag(zone, ZONE_WRITEBACK); } /* * Similarly, if many dirty pages are encountered that are not Loading Loading @@ -2785,8 +2821,8 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, * the high watermark. * * Returns true if kswapd scanned at least the requested number of pages to * reclaim. This is used to determine if the scanning priority needs to be * raised. * reclaim or if the lack of progress was due to pages under writeback. * This is used to determine if the scanning priority needs to be raised. */ static bool kswapd_shrink_zone(struct zone *zone, struct scan_control *sc, Loading @@ -2810,6 +2846,8 @@ static bool kswapd_shrink_zone(struct zone *zone, /* Account for the number of pages attempted to reclaim */ *nr_attempted += sc->nr_to_reclaim; zone_clear_flag(zone, ZONE_WRITEBACK); return sc->nr_scanned >= sc->nr_to_reclaim; } Loading