Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 52a08ef1 authored by Jason Low's avatar Jason Low Committed by Ingo Molnar
Browse files

sched: Fix the rq->next_balance logic in rebalance_domains() and idle_balance()



Currently, in idle_balance(), we update rq->next_balance when we pull_tasks.
However, it is also important to update this in the !pulled_tasks case too.

When the CPU is "busy" (the CPU isn't idle), rq->next_balance gets computed
using sd->busy_factor (so we increase the balance interval when the CPU is
busy). However, when the CPU goes idle, rq->next_balance could still be set
to a large value that was computed with the sd->busy_factor.

Thus, we need to also update rq->next_balance in idle_balance() in the cases
where !pulled_tasks too, so that rq->next_balance gets updated without taking
the busy_factor into account when the CPU is about to go idle.

This patch makes rq->next_balance get updated independently of whether or
not we pulled_task. Also, we add logic to ensure that we always traverse
at least 1 of the sched domains to get a proper next_balance value for
updating rq->next_balance.

Additionally, since load_balance() modifies the sd->balance_interval, we
need to re-obtain the sched domain's interval after the call to
load_balance() in rebalance_domains() before we update rq->next_balance.

This patch adds and uses 2 new helper functions, update_next_balance() and
get_sd_balance_interval() to update next_balance and obtain the sched
domain's balance_interval.

Signed-off-by: default avatarJason Low <jason.low2@hp.com>
Reviewed-by: default avatarPreeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Cc: daniel.lezcano@linaro.org
Cc: alex.shi@linaro.org
Cc: efault@gmx.de
Cc: vincent.guittot@linaro.org
Cc: morten.rasmussen@arm.com
Cc: aswin@hp.com
Link: http://lkml.kernel.org/r/1399596562.2200.7.camel@j-VirtualBox


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent a9467fa3
Loading
Loading
Loading
Loading
+46 −23
Original line number Diff line number Diff line
@@ -6672,17 +6672,44 @@ static int load_balance(int this_cpu, struct rq *this_rq,
	return ld_moved;
}

static inline unsigned long
get_sd_balance_interval(struct sched_domain *sd, int cpu_busy)
{
	unsigned long interval = sd->balance_interval;

	if (cpu_busy)
		interval *= sd->busy_factor;

	/* scale ms to jiffies */
	interval = msecs_to_jiffies(interval);
	interval = clamp(interval, 1UL, max_load_balance_interval);

	return interval;
}

static inline void
update_next_balance(struct sched_domain *sd, int cpu_busy, unsigned long *next_balance)
{
	unsigned long interval, next;

	interval = get_sd_balance_interval(sd, cpu_busy);
	next = sd->last_balance + interval;

	if (time_after(*next_balance, next))
		*next_balance = next;
}

/*
 * idle_balance is called by schedule() if this_cpu is about to become
 * idle. Attempts to pull tasks from other CPUs.
 */
static int idle_balance(struct rq *this_rq)
{
	unsigned long next_balance = jiffies + HZ;
	int this_cpu = this_rq->cpu;
	struct sched_domain *sd;
	int pulled_task = 0;
	unsigned long next_balance = jiffies + HZ;
	u64 curr_cost = 0;
	int this_cpu = this_rq->cpu;

	idle_enter_fair(this_rq);

@@ -6692,8 +6719,15 @@ static int idle_balance(struct rq *this_rq)
	 */
	this_rq->idle_stamp = rq_clock(this_rq);

	if (this_rq->avg_idle < sysctl_sched_migration_cost)
	if (this_rq->avg_idle < sysctl_sched_migration_cost) {
		rcu_read_lock();
		sd = rcu_dereference_check_sched_domain(this_rq->sd);
		if (sd)
			update_next_balance(sd, 0, &next_balance);
		rcu_read_unlock();

		goto out;
	}

	/*
	 * Drop the rq->lock, but keep IRQ/preempt disabled.
@@ -6703,15 +6737,16 @@ static int idle_balance(struct rq *this_rq)
	update_blocked_averages(this_cpu);
	rcu_read_lock();
	for_each_domain(this_cpu, sd) {
		unsigned long interval;
		int continue_balancing = 1;
		u64 t0, domain_cost;

		if (!(sd->flags & SD_LOAD_BALANCE))
			continue;

		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
			update_next_balance(sd, 0, &next_balance);
			break;
		}

		if (sd->flags & SD_BALANCE_NEWIDLE) {
			t0 = sched_clock_cpu(this_cpu);
@@ -6727,9 +6762,7 @@ static int idle_balance(struct rq *this_rq)
			curr_cost += domain_cost;
		}

		interval = msecs_to_jiffies(sd->balance_interval);
		if (time_after(next_balance, sd->last_balance + interval))
			next_balance = sd->last_balance + interval;
		update_next_balance(sd, 0, &next_balance);

		/*
		 * Stop searching for tasks to pull if there are
@@ -6753,15 +6786,11 @@ static int idle_balance(struct rq *this_rq)
	if (this_rq->cfs.h_nr_running && !pulled_task)
		pulled_task = 1;

	if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
		/*
		 * We are going idle. next_balance may be set based on
		 * a busy processor. So reset next_balance.
		 */
out:
	/* Move the next balance forward */
	if (time_after(this_rq->next_balance, next_balance))
		this_rq->next_balance = next_balance;
	}

out:
	/* Is there a task of a high priority class? */
	if (this_rq->nr_running != this_rq->cfs.h_nr_running)
		pulled_task = -1;
@@ -7044,16 +7073,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
			break;
		}

		interval = sd->balance_interval;
		if (idle != CPU_IDLE)
			interval *= sd->busy_factor;

		/* scale ms to jiffies */
		interval = msecs_to_jiffies(interval);
		interval = clamp(interval, 1UL, max_load_balance_interval);
		interval = get_sd_balance_interval(sd, idle != CPU_IDLE);

		need_serialize = sd->flags & SD_SERIALIZE;

		if (need_serialize) {
			if (!spin_trylock(&balancing))
				goto out;
@@ -7069,6 +7091,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
				idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
			}
			sd->last_balance = jiffies;
			interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
		}
		if (need_serialize)
			spin_unlock(&balancing);