Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ed7f159d authored by Linux Build Service Account's avatar Linux Build Service Account Committed by Gerrit - the friendly Code Review server
Browse files

Merge "mm, vmscan: prevent kswapd sleeping prematurely due to mismatched classzone_idx"

parents 31e0e7fe 7fcc2025
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -1201,7 +1201,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)

		arch_refresh_nodedata(nid, pgdat);
	} else {
		/* Reset the nr_zones, order and classzone_idx before reuse */
		/*
		 * Reset the nr_zones, order and classzone_idx before reuse.
		 * Note that kswapd will init kswapd_classzone_idx properly
		 * when it starts in the near future.
		 */
		pgdat->nr_zones = 0;
		pgdat->kswapd_order = 0;
		pgdat->kswapd_classzone_idx = 0;
+66 −54
Original line number Diff line number Diff line
@@ -3144,14 +3144,36 @@ static void age_active_anon(struct pglist_data *pgdat,
	} while (memcg);
}

static bool zone_balanced(struct zone *zone, int order, int classzone_idx)
/*
 * Returns true if there is an eligible zone balanced for the request order
 * and classzone_idx
 */
static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
{
	unsigned long mark = high_wmark_pages(zone);
	int i;
	unsigned long mark = -1;
	struct zone *zone;

	if (!zone_watermark_ok_safe(zone, order, mark, classzone_idx))
		return false;
	for (i = 0; i <= classzone_idx; i++) {
		zone = pgdat->node_zones + i;

		if (!managed_zone(zone))
			continue;

		mark = high_wmark_pages(zone);
		if (zone_watermark_ok_safe(zone, order, mark, classzone_idx))
			return true;
	}

	/*
	 * If a node has no populated zone within classzone_idx, it does not
	 * need balancing by definition. This can happen if a zone-restricted
	 * allocation tries to wake a remote kswapd.
	 */
	if (mark == -1)
		return true;

	return false;
}

/* Clear pgdat state for congested, dirty or under writeback. */
@@ -3170,8 +3192,6 @@ static void clear_pgdat_congested(pg_data_t *pgdat)
 */
static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
{
	int i;

	/*
	 * The throttled processes are normally woken up in balance_pgdat() as
	 * soon as allow_direct_reclaim() is true. But there is a potential
@@ -3192,17 +3212,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
	if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
		return true;

	for (i = 0; i <= classzone_idx; i++) {
		struct zone *zone = pgdat->node_zones + i;

		if (!managed_zone(zone))
			continue;

		if (zone_balanced(zone, order, classzone_idx)) {
	if (pgdat_balanced(pgdat, order, classzone_idx)) {
		clear_pgdat_congested(pgdat);
		return true;
	}
	}

	return false;
}
@@ -3307,23 +3320,12 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
		}

		/*
		 * Only reclaim if there are no eligible zones. Check from
		 * high to low zone as allocations prefer higher zones.
		 * Scanning from low to high zone would allow congestion to be
		 * cleared during a very small window when a small low
		 * zone was balanced even under extreme pressure when the
		 * overall node may be congested. Note that sc.reclaim_idx
		 * is not used as buffer_heads_over_limit may have adjusted
		 * it.
		 * Only reclaim if there are no eligible zones. Note that
		 * sc.reclaim_idx is not used as buffer_heads_over_limit may
		 * have adjusted it.
		 */
		for (i = classzone_idx; i >= 0; i--) {
			zone = pgdat->node_zones + i;
			if (!managed_zone(zone))
				continue;

			if (zone_balanced(zone, sc.order, classzone_idx))
		if (pgdat_balanced(pgdat, sc.order, classzone_idx))
			goto out;
		}

		/*
		 * Do some background aging of the anon list, to give
@@ -3390,6 +3392,22 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
	return sc.order;
}

/*
 * pgdat->kswapd_classzone_idx is the highest zone index that a recent
 * allocation request woke kswapd for. When kswapd has not woken recently,
 * the value is MAX_NR_ZONES which is not a valid index. This compares a
 * given classzone and returns it or the highest classzone index kswapd
 * was recently woke for.
 */
static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
					   enum zone_type classzone_idx)
{
	if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
		return classzone_idx;

	return max(pgdat->kswapd_classzone_idx, classzone_idx);
}

static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
				unsigned int classzone_idx)
{
@@ -3431,7 +3449,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
		 * the previous request that slept prematurely.
		 */
		if (remaining) {
			pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, classzone_idx);
			pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
			pgdat->kswapd_order = max(pgdat->kswapd_order, reclaim_order);
		}

@@ -3485,7 +3503,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
 */
static int kswapd(void *p)
{
	unsigned int alloc_order, reclaim_order, classzone_idx;
	unsigned int alloc_order, reclaim_order;
	unsigned int classzone_idx = MAX_NR_ZONES - 1;
	pg_data_t *pgdat = (pg_data_t*)p;
	struct task_struct *tsk = current;

@@ -3515,20 +3534,23 @@ static int kswapd(void *p)
	tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
	set_freezable();

	pgdat->kswapd_order = alloc_order = reclaim_order = 0;
	pgdat->kswapd_classzone_idx = classzone_idx = 0;
	pgdat->kswapd_order = 0;
	pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
	for ( ; ; ) {
		bool ret;

		alloc_order = reclaim_order = pgdat->kswapd_order;
		classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);

kswapd_try_sleep:
		kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order,
					classzone_idx);

		/* Read the new order and classzone_idx */
		alloc_order = reclaim_order = pgdat->kswapd_order;
		classzone_idx = pgdat->kswapd_classzone_idx;
		classzone_idx = kswapd_classzone_idx(pgdat, 0);
		pgdat->kswapd_order = 0;
		pgdat->kswapd_classzone_idx = 0;
		pgdat->kswapd_classzone_idx = MAX_NR_ZONES;

		ret = try_to_freeze();
		if (kthread_should_stop())
@@ -3554,9 +3576,6 @@ static int kswapd(void *p)
		reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
		if (reclaim_order < alloc_order)
			goto kswapd_try_sleep;

		alloc_order = reclaim_order = pgdat->kswapd_order;
		classzone_idx = pgdat->kswapd_classzone_idx;
	}

	tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
@@ -3572,7 +3591,6 @@ static int kswapd(void *p)
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
{
	pg_data_t *pgdat;
	int z;

	if (!managed_zone(zone))
		return;
@@ -3580,7 +3598,8 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
	if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
		return;
	pgdat = zone->zone_pgdat;
	pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, classzone_idx);
	pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat,
							   classzone_idx);
	pgdat->kswapd_order = max(pgdat->kswapd_order, order);
	if (!waitqueue_active(&pgdat->kswapd_wait))
		return;
@@ -3589,17 +3608,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
	if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
		return;

	/* Only wake kswapd if all zones are unbalanced */
	for (z = 0; z <= classzone_idx; z++) {
		zone = pgdat->node_zones + z;
		if (!managed_zone(zone))
			continue;

		if (zone_balanced(zone, order, classzone_idx))
	if (pgdat_balanced(pgdat, order, classzone_idx))
		return;
	}

	trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
	trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, classzone_idx, order);
	wake_up_interruptible(&pgdat->kswapd_wait);
}