Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b9ac0094 authored by Morten Rasmussen's avatar Morten Rasmussen Committed by Dmitry Shmidt
Browse files

ANDROID: sched: Add cpu capacity awareness to wakeup balancing



Wakeup balancing is completely unaware of cpu capacity, cpu utilization
and task utilization. The task is preferably placed on a cpu which is
idle in the instant the wakeup happens. New tasks
(SD_BALANCE_{FORK,EXEC} are placed on an idle cpu in the idlest group if
such can be found, otherwise it goes on the least loaded one. Existing
tasks (SD_BALANCE_WAKE) are placed on the previous cpu or an idle cpu
sharing the same last level cache unless the wakee_flips heuristic in
wake_wide() decides to fallback to considering cpus outside SD_LLC.
Hence existing tasks are not guaranteed to get a chance to migrate to a
different group at wakeup in case the current one has reduced cpu
capacity (due RT/IRQ pressure or different uarch e.g. ARM big.LITTLE).
They may eventually get pulled by other cpus doing
periodic/idle/nohz_idle balance, but it may take quite a while before it
happens.

This patch adds capacity awareness to find_idlest_{group,queue} (used by
SD_BALANCE_{FORK,EXEC} and SD_BALANCE_WAKE under certain circumstances)
such that groups/cpus that can accommodate the waking task based on task
utilization are preferred. In addition, wakeup of existing tasks
(SD_BALANCE_WAKE) is sent through find_idlest_{group,queue} also if the
task doesn't fit the capacity of the previous cpu to allow it to escape
(override wake_affine) when necessary instead of relying on
periodic/idle/nohz_idle balance to eventually sort it out.

cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: default avatarMorten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: default avatarAndres Oportus <andresoportus@google.com>
parent 25cea247
Loading
Loading
Loading
Loading
+61 −30
Original line number Diff line number Diff line
@@ -5200,6 +5200,41 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
	return 1;
}

static inline int task_util(struct task_struct *p)
{
	return p->se.avg.util_avg;
}

static inline bool __task_fits(struct task_struct *p, int cpu, int util)
{
	unsigned long capacity = capacity_of(cpu);

	util += task_util(p);

	return (capacity * 1024) > (util * capacity_margin);
}

static inline bool task_fits_max(struct task_struct *p, int cpu)
{
	unsigned long capacity = capacity_of(cpu);
	unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity;

	if (capacity == max_capacity)
		return true;

	if (capacity * capacity_margin > max_capacity * 1024)
		return true;

	return __task_fits(p, cpu, 0);
}

static int cpu_util(int cpu);

static inline bool task_fits_spare(struct task_struct *p, int cpu)
{
	return __task_fits(p, cpu, cpu_util(cpu));
}

/*
 * find_idlest_group finds and returns the least busy CPU group within the
 * domain.
@@ -5209,7 +5244,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
		  int this_cpu, int sd_flag)
{
	struct sched_group *idlest = NULL, *group = sd->groups;
	struct sched_group *fit_group = NULL;
	unsigned long min_load = ULONG_MAX, this_load = 0;
	unsigned long fit_capacity = ULONG_MAX;
	int load_idx = sd->forkexec_idx;
	int imbalance = 100 + (sd->imbalance_pct-100)/2;

@@ -5240,6 +5277,15 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
				load = target_load(i, load_idx);

			avg_load += load;

			/*
			 * Look for most energy-efficient group that can fit
			 * that can fit the task.
			 */
			if (capacity_of(i) < fit_capacity && task_fits_spare(p, i)) {
				fit_capacity = capacity_of(i);
				fit_group = group;
			}
		}

		/* Adjust by relative CPU capacity of the group */
@@ -5253,6 +5299,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
		}
	} while (group = group->next, group != sd->groups);

	if (fit_group)
		return fit_group;

	if (!idlest || 100*this_load < imbalance*min_load)
		return NULL;
	return idlest;
@@ -5277,7 +5326,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)

	/* Traverse only the allowed CPUs */
	for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
		if (idle_cpu(i)) {
		if (task_fits_spare(p, i)) {
			struct rq *rq = cpu_rq(i);
			struct cpuidle_state *idle = idle_get_state(rq);
			if (idle && idle->exit_latency < min_exit_latency) {
@@ -5289,7 +5338,8 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
				min_exit_latency = idle->exit_latency;
				latest_idle_timestamp = rq->idle_stamp;
				shallowest_idle_cpu = i;
			} else if ((!idle || idle->exit_latency == min_exit_latency) &&
			} else if (idle_cpu(i) &&
				   (!idle || idle->exit_latency == min_exit_latency) &&
				   rq->idle_stamp > latest_idle_timestamp) {
				/*
				 * If equal or no active idle state, then
@@ -5298,6 +5348,13 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
				 */
				latest_idle_timestamp = rq->idle_stamp;
				shallowest_idle_cpu = i;
			} else if (shallowest_idle_cpu == -1) {
				/*
				 * If we haven't found an idle CPU yet
				 * pick a non-idle one that can fit the task as
				 * fallback.
				 */
				shallowest_idle_cpu = i;
			}
		} else if (shallowest_idle_cpu == -1) {
			load = weighted_cpuload(i);
@@ -5585,32 +5642,6 @@ static int cpu_util(int cpu)
	return (util >= capacity) ? capacity : util;
}

static inline int task_util(struct task_struct *p)
{
	return p->se.avg.util_avg;
}

/*
 * Disable WAKE_AFFINE in the case where task @p doesn't fit in the
 * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
 *
 * In that case WAKE_AFFINE doesn't make sense and we'll let
 * BALANCE_WAKE sort things out.
 */
static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
{
	long min_cap, max_cap;

	min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;

	/* Minimum capacity is close to max, no need to abort wake_affine */
	if (max_cap - min_cap < max_cap >> 3)
		return 0;

	return min_cap * 1024 < task_util(p) * capacity_margin;
}

/*
 * select_task_rq_fair: Select target runqueue for the waking task in domains
 * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -5634,8 +5665,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f

	if (sd_flag & SD_BALANCE_WAKE) {
		record_wakee(p);
		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
			      && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
		want_affine = !wake_wide(p) && task_fits_max(p, cpu) &&
			cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
	}

	rcu_read_lock();