Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dbe53691 authored by Brendan Jackman's avatar Brendan Jackman Committed by Chris Redpath
Browse files

FROMLIST: sched/fair: Update blocked load from newly idle balance



We now have a NOHZ kick to avoid the load of idle CPUs becoming stale. This is
good, but it brings about CPU wakeups, which have an energy cost. As an
alternative to waking CPUs up to do decay blocked load, we can sometimes do it
from newly idle balance. If the newly idle balance is on a domain that covers
all the currently nohz-idle CPUs, we push the value of nohz.next_update into the
future. That means that if such newly idle balances happen often enough, we
never need wake up a CPU just to update load.

Since we're doing this new update inside a for_each_domain, we need to do
something to avoid doing multiple updates on the same CPU in the same
idle_balance. A tick stamp is set on the rq in update_blocked_averages as a
simple way to do this. Using a simple jiffies-based timestamp, as opposed to the
last_update_time of the root cfs_rq's sched_avg, means we can do this without
taking the rq lock.

Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: default avatarBrendan Jackman <brendan.jackman@arm.com>
Change-Id: I39423091e6bf789c1579cb431930c449a3c8239a
[merge conflicts]
Signed-off-by: default avatarChris Redpath <chris.redpath@arm.com>
parent 3a5ef9da
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -5923,6 +5923,7 @@ void __init sched_init(void)
		rq_attach_root(rq, &def_root_domain);
#ifdef CONFIG_NO_HZ_COMMON
		rq->last_load_update_tick = jiffies;
		rq->last_blocked_load_update_tick = jiffies;
		rq->nohz_flags = 0;
#endif
#ifdef CONFIG_NO_HZ_FULL
+42 −7
Original line number Diff line number Diff line
@@ -7476,6 +7476,9 @@ static void update_blocked_averages(int cpu)
		if (cfs_rq_is_decayed(cfs_rq))
			list_del_leaf_cfs_rq(cfs_rq);
	}
#ifdef CONFIG_NO_HZ_COMMON
	rq->last_blocked_load_update_tick = jiffies;
#endif
	rq_unlock_irqrestore(rq, &rf);
}

@@ -7535,6 +7538,9 @@ static inline void update_blocked_averages(int cpu)
	rq_lock_irqsave(rq, &rf);
	update_rq_clock(rq);
	update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
#ifdef CONFIG_NO_HZ_COMMON
	rq->last_blocked_load_update_tick = jiffies;
#endif
	rq_unlock_irqrestore(rq, &rf);
}

@@ -8071,6 +8077,15 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq)
}
#endif /* CONFIG_NUMA_BALANCING */

#ifdef CONFIG_NO_HZ_COMMON
static struct {
	cpumask_var_t idle_cpus_mask;
	atomic_t nr_cpus;
	unsigned long next_balance;     /* in jiffy units */
	unsigned long next_update;     /* in jiffy units */
} nohz ____cacheline_aligned;
#endif

#define lb_sd_parent(sd) \
	(sd->parent && sd->parent->groups != sd->parent->groups->next)

@@ -8091,6 +8106,30 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
	if (child && child->flags & SD_PREFER_SIBLING)
		prefer_sibling = 1;

#ifdef CONFIG_NO_HZ_COMMON
	if (env->idle == CPU_NEWLY_IDLE) {
		int cpu;

		/* Update the stats of NOHZ idle CPUs in the sd */
		for_each_cpu_and(cpu, sched_domain_span(env->sd),
				 nohz.idle_cpus_mask) {
			struct rq *rq = cpu_rq(cpu);

			/* ... Unless we've already done since the last tick */
			if (time_after(jiffies,
                                       rq->last_blocked_load_update_tick))
				update_blocked_averages(cpu);
		}
	}
	/*
	 * If we've just updated all of the NOHZ idle CPUs, then we can push
	 * back the next nohz.next_update, which will prevent an unnecessary
	 * wakeup for the nohz stats kick
	 */
	if (cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd)))
		nohz.next_update = jiffies + LOAD_AVG_PERIOD;
#endif

	load_idx = get_sd_load_idx(env->sd, env->idle);

	do {
@@ -9183,12 +9222,6 @@ static inline int on_null_domain(struct rq *rq)
 *   needed, they will kick the idle load balancer, which then does idle
 *   load balancing for all the idle CPUs.
 */
static struct {
	cpumask_var_t idle_cpus_mask;
	atomic_t nr_cpus;
	unsigned long next_balance;     /* in jiffy units */
	unsigned long next_update;     /* in jiffy units */
} nohz ____cacheline_aligned;

static inline int find_new_ilb(void)
{
@@ -9640,10 +9673,12 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
	 */
	nohz_idle_balance(this_rq, idle);
	update_blocked_averages(this_rq->cpu);
#ifdef CONFIG_NO_HZ_COMMON
	if (!test_bit(NOHZ_STATS_KICK, nohz_flags(this_rq->cpu)))
		rebalance_domains(this_rq, idle);
#ifdef CONFIG_NO_HZ_COMMON
	clear_bit(NOHZ_STATS_KICK, nohz_flags(this_rq->cpu));
#else
	rebalance_domains(this_rq, idle);
#endif
}

+1 −0
Original line number Diff line number Diff line
@@ -682,6 +682,7 @@ struct rq {
#ifdef CONFIG_NO_HZ_COMMON
#ifdef CONFIG_SMP
	unsigned long last_load_update_tick;
	unsigned long last_blocked_load_update_tick;
#endif /* CONFIG_SMP */
	unsigned long nohz_flags;
#endif /* CONFIG_NO_HZ_COMMON */