Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c662b0e5 authored by Morten Rasmussen's avatar Morten Rasmussen Committed by Chris Redpath
Browse files

FROMLIST: sched/fair: Add group_misfit_task load-balance type



To maximize throughput in systems with asymmetric cpu capacities (e.g.
ARM big.LITTLE) load-balancing has to consider task and cpu utilization
as well as per-cpu compute capacity when load-balancing in addition to
the current average load based load-balancing policy. Tasks with high
utilization that are scheduled on a lower capacity cpu need to be
identified and migrated to a higher capacity cpu if possible to maximize
throughput.

To implement this additional policy an additional group_type
(load-balance scenario) is added: group_misfit_task. This represents
scenarios where a sched_group has one or more tasks that are not
suitable for its per-cpu capacity. group_misfit_task is only considered
if the system is not overloaded or imbalanced (group_imbalanced or
group_overloaded).

Identifying misfit tasks requires the rq lock to be held. To avoid
taking remote rq locks to examine source sched_groups for misfit tasks,
each cpu is responsible for tracking misfit tasks themselves and update
the rq->misfit_task flag. This means checking task utilization when
tasks are scheduled and on sched_tick.

cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>

Signed-off-by: default avatarMorten Rasmussen <morten.rasmussen@arm.com>
[From https://lore.kernel.org/lkml/1530699470-29808-3-git-send-email-morten.rasmussen@arm.com/

]
[backported because some parts are already present in android]
Signed-off-by: default avatarIoan Budea <ioan.budea@arm.com>
Signed-off-by: default avatarValentin Schneider <valentin.schneider@arm.com>
Signed-off-by: default avatarChris Redpath <chris.redpath@arm.com>
Change-Id: I71bd3a77c7088a102ba183df6ece7943aa7eb0c2
parent 83a9ce05
Loading
Loading
Loading
Loading
+38 −36
Original line number Diff line number Diff line
@@ -729,6 +729,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)

static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
static unsigned long task_h_load(struct task_struct *p);
static unsigned long capacity_of(int cpu);

/* Give new sched_entity start runnable values to heavy its load in infant time */
void init_entity_runnable_average(struct sched_entity *se)
@@ -3598,6 +3599,26 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)

static int idle_balance(struct rq *this_rq, struct rq_flags *rf);

static inline int task_fits_capacity(struct task_struct *p, long capacity);

static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
{
	if (!static_branch_unlikely(&sched_asym_cpucapacity))
		return;

	if (!p) {
		rq->misfit_task_load = 0;
		return;
	}

	if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) {
		rq->misfit_task_load = 0;
		return;
	}

	rq->misfit_task_load = task_h_load(p);
}

#else /* CONFIG_SMP */

static inline int
@@ -3635,6 +3656,8 @@ static inline int idle_balance(struct rq *rq, struct rq_flags *rf)
	return 0;
}

static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}

#endif /* CONFIG_SMP */

static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -7796,29 +7819,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
		set_last_buddy(se);
}

static inline void update_misfit_task(struct rq *rq, struct task_struct *p)
{
#ifdef CONFIG_SMP
	rq->misfit_task = !task_fits_capacity(p, capacity_of(rq->cpu));
#endif
}

static inline void clear_rq_misfit(struct rq *rq)
{
#ifdef CONFIG_SMP
	rq->misfit_task = 0;
#endif
}

static inline unsigned int rq_has_misfit(struct rq *rq)
{
#ifdef CONFIG_SMP
	return rq->misfit_task;
#else
	return 0;
#endif
}

static struct task_struct *
pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
@@ -7909,7 +7909,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
	if (hrtick_enabled(rq))
		hrtick_start_fair(rq, p);

	update_misfit_task(rq, p);
	update_misfit_status(p, rq);

	return p;
simple:
@@ -7928,12 +7928,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
	if (hrtick_enabled(rq))
		hrtick_start_fair(rq, p);

	update_misfit_task(rq, p);
	update_misfit_status(p, rq);

	return p;

idle:
	clear_rq_misfit(rq);
	update_misfit_status(NULL, rq);
	new_tasks = idle_balance(rq, rf);

	/*
@@ -8682,7 +8682,8 @@ struct sg_lb_stats {
	unsigned int group_weight;
	enum group_type group_type;
	int group_no_capacity;
	int group_misfit_task; /* A cpu has a task too big for its capacity */
	/* A cpu has a task too big for its capacity */
	unsigned long group_misfit_task_load;
#ifdef CONFIG_NUMA_BALANCING
	unsigned int nr_numa_running;
	unsigned int nr_preferred_running;
@@ -9001,7 +9002,7 @@ group_type group_classify(struct sched_group *group,
	if (sg_imbalanced(group))
		return group_imbalanced;

	if (sgs->group_misfit_task)
	if (sgs->group_misfit_task_load)
		return group_misfit_task;

	return group_other;
@@ -9056,13 +9057,13 @@ static inline void update_sg_lb_stats(struct lb_env *env,
			sgs->idle_cpus++;

		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
		    !sgs->group_misfit_task && rq_has_misfit(rq))
			sgs->group_misfit_task = capacity_of(i);
		    sgs->group_misfit_task_load < rq->misfit_task_load)
			sgs->group_misfit_task_load = rq->misfit_task_load;

		if (cpu_overutilized(i)) {
			*overutilized = true;

			if (rq_has_misfit(rq))
			if (rq->misfit_task_load)
				*misfit_task = true;
		}
	}
@@ -9540,7 +9541,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
	/* Boost imbalance to allow misfit task to be balanced. */
	if (busiest->group_type == group_misfit_task) {
		env->imbalance = max_t(long, env->imbalance,
				       busiest->group_misfit_task);
				       busiest->group_misfit_task_load);
	}

	/*
@@ -9706,7 +9707,8 @@ static struct rq *find_busiest_queue(struct lb_env *env,
		 * For ASYM_CPUCAPACITY domains with misfit tasks we ignore
		 * load.
		 */
		if (env->src_grp_type == group_misfit_task && rq_has_misfit(rq))
		if (env->src_grp_type == group_misfit_task &&
		    rq->misfit_task_load)
			return rq;

		capacity = capacity_of(i);
@@ -10124,7 +10126,7 @@ get_sd_balance_interval(struct sched_domain *sd, int cpu_busy)
	if (energy_aware() && sd_overutilized(sd)) {
		/* we know the root is overutilized, let's check for a misfit task */
		for_each_cpu(cpu, sched_domain_span(sd)) {
			if (rq_has_misfit(cpu_rq(cpu)))
			if (cpu_rq(cpu)->misfit_task_load)
				return 1;
		}
	}
@@ -10748,7 +10750,7 @@ static inline bool nohz_kick_needed(struct rq *rq, bool only_update)

	/* Do idle load balance if there have misfit task */
	if (energy_aware())
		return rq_has_misfit(rq);
		return rq->misfit_task_load;

	rcu_read_lock();
	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
@@ -10875,7 +10877,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
	if (static_branch_unlikely(&sched_numa_balancing))
		task_tick_numa(rq, curr);

	update_misfit_task(rq, curr);
	update_misfit_status(curr, rq);

	update_overutilized_status(rq);
}
+2 −1
Original line number Diff line number Diff line
@@ -755,7 +755,8 @@ struct rq {

	unsigned char idle_balance;

	unsigned int misfit_task;
	unsigned long misfit_task_load;

	/* For active balancing */
	int active_balance;
	int push_cpu;