Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4550487a authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

sched/fair: Restructure nohz_balance_kick()



The current:

	if (nohz_kick_needed())
		nohz_balancer_kick()

is pointless complexity, fold them into a single call and avoid the
various conditions at the call site.

When we introduce multiple different needs to kick the ilb, the above
construct also becomes a problem.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent b7031a02
Loading
Loading
Loading
Loading
+111 −107
Original line number Diff line number Diff line
@@ -9065,12 +9065,29 @@ static inline int find_new_ilb(void)
	return nr_cpu_ids;
}

static inline void set_cpu_sd_state_busy(void)
{
	struct sched_domain *sd;
	int cpu = smp_processor_id();

	rcu_read_lock();
	sd = rcu_dereference(per_cpu(sd_llc, cpu));

	if (!sd || !sd->nohz_idle)
		goto unlock;
	sd->nohz_idle = 0;

	atomic_inc(&sd->shared->nr_busy_cpus);
unlock:
	rcu_read_unlock();
}

/*
 * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
 * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
 * CPU (if there is one).
 */
static void nohz_balancer_kick(void)
static void kick_ilb(void)
{
	unsigned int flags;
	int ilb_cpu;
@@ -9085,6 +9102,7 @@ static void nohz_balancer_kick(void)
	flags = atomic_fetch_or(NOHZ_KICK_MASK, nohz_flags(ilb_cpu));
	if (flags & NOHZ_KICK_MASK)
		return;

	/*
	 * Use smp_send_reschedule() instead of resched_cpu().
	 * This way we generate a sched IPI on the target CPU which
@@ -9092,7 +9110,94 @@ static void nohz_balancer_kick(void)
	 * will be run before returning from the IPI.
	 */
	smp_send_reschedule(ilb_cpu);
}

/*
 * Current heuristic for kicking the idle load balancer in the presence
 * of an idle cpu in the system.
 *   - This rq has more than one task.
 *   - This rq has at least one CFS task and the capacity of the CPU is
 *     significantly reduced because of RT tasks or IRQs.
 *   - At parent of LLC scheduler domain level, this cpu's scheduler group has
 *     multiple busy cpu.
 *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
 *     domain span are idle.
 */
static void nohz_balancer_kick(struct rq *rq)
{
	unsigned long now = jiffies;
	struct sched_domain_shared *sds;
	struct sched_domain *sd;
	int nr_busy, i, cpu = rq->cpu;
	bool kick = false;

	if (unlikely(rq->idle_balance))
		return;

	/*
	 * We may be recently in ticked or tickless idle mode. At the first
	 * busy tick after returning from idle, we will update the busy stats.
	 */
	set_cpu_sd_state_busy();
	nohz_balance_exit_idle(cpu);

	/*
	 * None are in tickless mode and hence no need for NOHZ idle load
	 * balancing.
	 */
	if (likely(!atomic_read(&nohz.nr_cpus)))
		return;

	if (time_before(now, nohz.next_balance))
		return;

	if (rq->nr_running >= 2) {
		kick = true;
		goto out;
	}

	rcu_read_lock();
	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
	if (sds) {
		/*
		 * XXX: write a coherent comment on why we do this.
		 * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
		 */
		nr_busy = atomic_read(&sds->nr_busy_cpus);
		if (nr_busy > 1) {
			kick = true;
			goto unlock;
		}

	}

	sd = rcu_dereference(rq->sd);
	if (sd) {
		if ((rq->cfs.h_nr_running >= 1) &&
				check_cpu_capacity(rq, sd)) {
			kick = true;
			goto unlock;
		}
	}

	sd = rcu_dereference(per_cpu(sd_asym, cpu));
	if (sd) {
		for_each_cpu(i, sched_domain_span(sd)) {
			if (i == cpu ||
			    !cpumask_test_cpu(i, nohz.idle_cpus_mask))
				continue;

			if (sched_asym_prefer(i, cpu)) {
				kick = true;
				goto unlock;
			}
		}
	}
unlock:
	rcu_read_unlock();
out:
	if (kick)
		kick_ilb();
}

void nohz_balance_exit_idle(unsigned int cpu)
@@ -9112,23 +9217,6 @@ void nohz_balance_exit_idle(unsigned int cpu)
	}
}

static inline void set_cpu_sd_state_busy(void)
{
	struct sched_domain *sd;
	int cpu = smp_processor_id();

	rcu_read_lock();
	sd = rcu_dereference(per_cpu(sd_llc, cpu));

	if (!sd || !sd->nohz_idle)
		goto unlock;
	sd->nohz_idle = 0;

	atomic_inc(&sd->shared->nr_busy_cpus);
unlock:
	rcu_read_unlock();
}

void set_cpu_sd_state_idle(void)
{
	struct sched_domain *sd;
@@ -9171,6 +9259,8 @@ void nohz_balance_enter_idle(int cpu)
	atomic_inc(&nohz.nr_cpus);
	atomic_or(NOHZ_TICK_STOPPED, nohz_flags(cpu));
}
#else
static inline void nohz_balancer_kick(struct rq *rq) { }
#endif

static DEFINE_SPINLOCK(balancing);
@@ -9369,90 +9459,6 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)

	return true;
}

/*
 * Current heuristic for kicking the idle load balancer in the presence
 * of an idle CPU in the system.
 *   - This rq has more than one task.
 *   - This rq has at least one CFS task and the capacity of the CPU is
 *     significantly reduced because of RT tasks or IRQs.
 *   - At parent of LLC scheduler domain level, this CPU's scheduler group has
 *     multiple busy CPUs.
 *   - For SD_ASYM_PACKING, if the lower numbered CPU's in the scheduler
 *     domain span are idle.
 */
static inline bool nohz_kick_needed(struct rq *rq)
{
	unsigned long now = jiffies;
	struct sched_domain_shared *sds;
	struct sched_domain *sd;
	int nr_busy, i, cpu = rq->cpu;
	bool kick = false;

	if (unlikely(rq->idle_balance))
		return false;

       /*
	* We may be recently in ticked or tickless idle mode. At the first
	* busy tick after returning from idle, we will update the busy stats.
	*/
	set_cpu_sd_state_busy();
	nohz_balance_exit_idle(cpu);

	/*
	 * None are in tickless mode and hence no need for NOHZ idle load
	 * balancing.
	 */
	if (likely(!atomic_read(&nohz.nr_cpus)))
		return false;

	if (time_before(now, nohz.next_balance))
		return false;

	if (rq->nr_running >= 2)
		return true;

	rcu_read_lock();
	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
	if (sds) {
		/*
		 * XXX: write a coherent comment on why we do this.
		 * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
		 */
		nr_busy = atomic_read(&sds->nr_busy_cpus);
		if (nr_busy > 1) {
			kick = true;
			goto unlock;
		}

	}

	sd = rcu_dereference(rq->sd);
	if (sd) {
		if ((rq->cfs.h_nr_running >= 1) &&
				check_cpu_capacity(rq, sd)) {
			kick = true;
			goto unlock;
		}
	}

	sd = rcu_dereference(per_cpu(sd_asym, cpu));
	if (sd) {
		for_each_cpu(i, sched_domain_span(sd)) {
			if (i == cpu ||
			    !cpumask_test_cpu(i, nohz.idle_cpus_mask))
				continue;

			if (sched_asym_prefer(i, cpu)) {
				kick = true;
				goto unlock;
			}
		}
	}
unlock:
	rcu_read_unlock();
	return kick;
}
#else
static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
{
@@ -9497,10 +9503,8 @@ void trigger_load_balance(struct rq *rq)

	if (time_after_eq(jiffies, rq->next_balance))
		raise_softirq(SCHED_SOFTIRQ);
#ifdef CONFIG_NO_HZ_COMMON
	if (nohz_kick_needed(rq))
		nohz_balancer_kick();
#endif

	nohz_balancer_kick(rq);
}

static void rq_online_fair(struct rq *rq)