Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0133d85e authored by Joonwoo Park's avatar Joonwoo Park
Browse files

sched: prevent task migration while governor queries CPUs' load



At present, governor retrieves each CPUs' load sequentially.  In this
way, there is chance of race between governor's CPU load query and task
migration that would result in reporting of lesser CPUs' load than actual.

For example,
CPU0 load = 30%.  CPU1 load = 50%.
Governor                               Load balancer
- sched_get_busy(cpu 0) = 30%.
                                       - A task 'p' migrated from CPU 1 to
                                         CPU 0.  p->ravg->prev_window = 50.
                                         Now CPU 0's load = 80%,
                                         CPU 1's load = 0%.
- sched_get_busy(cpu 1) = 0%
  50% of load from CPU 1 to 0 never
  accounted.

Fix such issues by introducing a new API sched_get_cpus_busy() which
makes for governor to be able to get set of CPUs' load.  The loads set
internally constructed with blocking load balancer to ensure migration
cannot occur in the meantime.

Change-Id: I4fa4dd1195eff26aa603829aca2054871521495e
Signed-off-by: default avatarJoonwoo Park <joonwoop@codeaurora.org>
parent 637c091f
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -1963,6 +1963,8 @@ extern int task_free_unregister(struct notifier_block *n);
#if defined(CONFIG_SCHED_FREQ_INPUT)
extern int sched_set_window(u64 window_start, unsigned int window_size);
extern unsigned long sched_get_busy(int cpu);
extern void sched_get_cpus_busy(unsigned long *busy,
				const struct cpumask *query_cpus);
extern void sched_set_io_is_busy(int val);
#else
static inline int sched_set_window(u64 window_start, unsigned int window_size)
+65 −29
Original line number Diff line number Diff line
@@ -2151,51 +2151,87 @@ scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
	return div64_u64(load * (u64)src_freq, (u64)dst_freq);
}

unsigned long sched_get_busy(int cpu)
void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)
{
	unsigned long flags;
	struct rq *rq = cpu_rq(cpu);
	u64 load;
	struct rq *rq;
	const int cpus = cpumask_weight(query_cpus);
	u64 load[cpus];
	unsigned int cur_freq[cpus], max_freq[cpus];
	int notifier_sent[cpus];
	int cpu, i = 0;
	unsigned int window_size;

	if (unlikely(cpus == 0))
		return;

	/*
	 * This function could be called in timer context, and the
	 * current task may have been executing for a long time. Ensure
	 * that the window stats are current by doing an update.
	 */
	raw_spin_lock_irqsave(&rq->lock, flags);
	update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0);
	load = rq->old_busy_time = rq->prev_runnable_sum;
	local_irq_save(flags);
	for_each_cpu(cpu, query_cpus)
		raw_spin_lock(&cpu_rq(cpu)->lock);

	window_size = sched_ravg_window;

	for_each_cpu(cpu, query_cpus) {
		rq = cpu_rq(cpu);

		update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0);
		load[i] = rq->old_busy_time = rq->prev_runnable_sum;
		/*
		 * Scale load in reference to rq->max_possible_freq.
		 *
		 * Note that scale_load_to_cpu() scales load in reference to
	 * rq->max_freq
		 * rq->max_freq.
		 */
	load = scale_load_to_cpu(load, cpu);
		load[i] = scale_load_to_cpu(load[i], cpu);

	if (!rq->notifier_sent) {
		u64 load_at_cur_freq;

		load_at_cur_freq = scale_load_to_freq(load, rq->max_freq,
								 rq->cur_freq);
		if (load_at_cur_freq > sched_ravg_window)
			load_at_cur_freq = sched_ravg_window;
		load = scale_load_to_freq(load_at_cur_freq,
					 rq->cur_freq, rq->max_possible_freq);
		notifier_sent[i] = rq->notifier_sent;
		rq->notifier_sent = 0;
		cur_freq[i] = rq->cur_freq;
		max_freq[i] = rq->max_freq;
		i++;
	}

	for_each_cpu(cpu, query_cpus)
		raw_spin_unlock(&(cpu_rq(cpu))->lock);
	local_irq_restore(flags);

	i = 0;
	for_each_cpu(cpu, query_cpus) {
		rq = cpu_rq(cpu);

		if (!notifier_sent[i]) {
			load[i] = scale_load_to_freq(load[i], max_freq[i],
						     cur_freq[i]);
			if (load[i] > window_size)
				load[i] = window_size;
			load[i] = scale_load_to_freq(load[i], cur_freq[i],
						     rq->max_possible_freq);
		} else {
		load = scale_load_to_freq(load, rq->max_freq,
			load[i] = scale_load_to_freq(load[i], max_freq[i],
						     rq->max_possible_freq);
		rq->notifier_sent = 0;
		}

	load = div64_u64(load, NSEC_PER_USEC);
		busy[i] = div64_u64(load[i], NSEC_PER_USEC);

	raw_spin_unlock_irqrestore(&rq->lock, flags);
		trace_sched_get_busy(cpu, busy[i]);
		i++;
	}
}

unsigned long sched_get_busy(int cpu)
{
	struct cpumask query_cpu = CPU_MASK_NONE;
	unsigned long busy;

	trace_sched_get_busy(cpu, load);
	cpumask_set_cpu(cpu, &query_cpu);
	sched_get_cpus_busy(&busy, &query_cpu);

	return load;
	return busy;
}

void sched_set_io_is_busy(int val)