Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 408ed066 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

sched: hierarchical load vs find_busiest_group



find_busiest_group() has some assumptions about task weight being in the
NICE_0_LOAD range. Hierarchical task groups break this assumption - fix this
by replacing it with the average task weight, which will adapt the situation.

Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent bb3469ac
Loading
Loading
Loading
Loading
+23 −3
Original line number Diff line number Diff line
@@ -3050,6 +3050,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
	max_load = this_load = total_load = total_pwr = 0;
	busiest_load_per_task = busiest_nr_running = 0;
	this_load_per_task = this_nr_running = 0;

	if (idle == CPU_NOT_IDLE)
		load_idx = sd->busy_idx;
	else if (idle == CPU_NEWLY_IDLE)
@@ -3064,6 +3065,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
		int __group_imb = 0;
		unsigned int balance_cpu = -1, first_idle_cpu = 0;
		unsigned long sum_nr_running, sum_weighted_load;
		unsigned long sum_avg_load_per_task;
		unsigned long avg_load_per_task;

		local_group = cpu_isset(this_cpu, group->cpumask);

@@ -3072,6 +3075,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,

		/* Tally up the load of all CPUs in the group */
		sum_weighted_load = sum_nr_running = avg_load = 0;
		sum_avg_load_per_task = avg_load_per_task = 0;

		max_cpu_load = 0;
		min_cpu_load = ~0UL;

@@ -3105,6 +3110,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
			avg_load += load;
			sum_nr_running += rq->nr_running;
			sum_weighted_load += weighted_cpuload(i);

			sum_avg_load_per_task += cpu_avg_load_per_task(i);
		}

		/*
@@ -3126,7 +3133,20 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
		avg_load = sg_div_cpu_power(group,
				avg_load * SCHED_LOAD_SCALE);

		if ((max_cpu_load - min_cpu_load) > SCHED_LOAD_SCALE)

		/*
		 * Consider the group unbalanced when the imbalance is larger
		 * than the average weight of two tasks.
		 *
		 * APZ: with cgroup the avg task weight can vary wildly and
		 *      might not be a suitable number - should we keep a
		 *      normalized nr_running number somewhere that negates
		 *      the hierarchy?
		 */
		avg_load_per_task = sg_div_cpu_power(group,
				sum_avg_load_per_task * SCHED_LOAD_SCALE);

		if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
			__group_imb = 1;

		group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
@@ -3267,9 +3287,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
			if (busiest_load_per_task > this_load_per_task)
				imbn = 1;
		} else
			this_load_per_task = SCHED_LOAD_SCALE;
			this_load_per_task = cpu_avg_load_per_task(this_cpu);

		if (max_load - this_load + SCHED_LOAD_SCALE_FUZZ >=
		if (max_load - this_load + 2*busiest_load_per_task >=
					busiest_load_per_task * imbn) {
			*imbalance = busiest_load_per_task;
			return busiest;