Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 39507451 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds
Browse files

[PATCH] sched: fix SMT scheduling problems



SMT balancing has a couple of problems.  Firstly, active_load_balance is too
complex - basically it should be a dumb helper for when the periodic balancer
has determined there is an imbalance, but gets stuck because the task is
running.

So rip out all its "smarts", and just make it move one task to the target CPU.

Second, the busy CPU's sched-domain tree was being used for active balancing.
This means that it may not see that nr_balance_failed has reached a critical
level.  So use the target CPU's sched-domain tree for this.  We can do this
because we hold its runqueue lock.

Lastly, reset nr_balance_failed to a point where we allow cache hot migration.
This will help ensure active load balancing is successful.

Thanks to Suresh Siddha for pointing out these issues.

Signed-off-by: default avatarNick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarSuresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 16cfb1c0
Loading
Loading
Loading
Loading
+31 −45
Original line number Diff line number Diff line
@@ -1995,7 +1995,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
			 * We've kicked active balancing, reset the failure
			 * counter.
			 */
			sd->nr_balance_failed = sd->cache_nice_tries;
			sd->nr_balance_failed = sd->cache_nice_tries+1;
		}
	} else
		sd->nr_balance_failed = 0;
@@ -2106,36 +2106,15 @@ static inline void idle_balance(int this_cpu, runqueue_t *this_rq)
static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
{
	struct sched_domain *sd;
	struct sched_group *cpu_group;
	runqueue_t *target_rq;
	cpumask_t visited_cpus;
	int cpu;
	int target_cpu = busiest_rq->push_cpu;

	/*
	 * Search for suitable CPUs to push tasks to in successively higher
	 * domains with SD_LOAD_BALANCE set.
	 */
	visited_cpus = CPU_MASK_NONE;
	for_each_domain(busiest_cpu, sd) {
		if (!(sd->flags & SD_LOAD_BALANCE))
			/* no more domains to search */
			break;

		schedstat_inc(sd, alb_cnt);

		cpu_group = sd->groups;
		do {
			for_each_cpu_mask(cpu, cpu_group->cpumask) {
	if (busiest_rq->nr_running <= 1)
					/* no more tasks left to move */
		/* no task to move */
		return;
				if (cpu_isset(cpu, visited_cpus))
					continue;
				cpu_set(cpu, visited_cpus);
				if (!cpu_and_siblings_are_idle(cpu) || cpu == busiest_cpu)
					continue;

				target_rq = cpu_rq(cpu);
	target_rq = cpu_rq(target_cpu);

	/*
	 * This condition is "impossible", if it occurs
	 * we need to fix it.  Originally reported by
@@ -2145,18 +2124,25 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)

	/* move a task from busiest_rq to target_rq */
	double_lock_balance(busiest_rq, target_rq);
				if (move_tasks(target_rq, cpu, busiest_rq,
						1, sd, SCHED_IDLE, NULL)) {

	/* Search for an sd spanning us and the target CPU. */
	for_each_domain(target_cpu, sd)
		if ((sd->flags & SD_LOAD_BALANCE) &&
			cpu_isset(busiest_cpu, sd->span))
				break;

	if (unlikely(sd == NULL))
		goto out;

	schedstat_inc(sd, alb_cnt);

	if (move_tasks(target_rq, target_cpu, busiest_rq, 1, sd, SCHED_IDLE, NULL))
		schedstat_inc(sd, alb_pushed);
				} else {
	else
		schedstat_inc(sd, alb_failed);
				}
out:
	spin_unlock(&target_rq->lock);
}
			cpu_group = cpu_group->next;
		} while (cpu_group != sd->groups);
	}
}

/*
 * rebalance_tick will get called every timer tick, on every CPU.