Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 537f6da1 authored by Pavankumar Kondeti's avatar Pavankumar Kondeti
Browse files

sched/fair: Fix schedtune boosted tasks placement issues



The current code starts the CPU search with max capacity cluster
for schedtune boosted tasks. If any of the CPU in this cluster
can accommodate the task without exceeding its original capacity,
the search is not extended to other clusters. This approach results
in sub-optimal performance on tri-cluster systems with 1 CPU in the
max capacity cluster. The max capacity CPU is packed with several
tasks while the other mid capacity CPUs are idle. Fix this issue by
starting the CPU search with mid capacity cluster as long as the
task fits. The search is also expanded to the max capacity cluster
and the best CPU is selected among the iterated CPUs. The best CPU
is either an idle CPU in the shallowest c-state or CPU with maximum
spare capacity when all the iterated CPUs are busy. The energy
evaluation is also skipped for boosted tasks to maximize the
performance.

While at it, extend the sched_task_util trace point to print if
the task is schedtune boosted or not.

Change-Id: Iefb29e9d32f1c414bd6af31d66306f04c6f57f9c
Signed-off-by: default avatarPavankumar Kondeti <pkondeti@codeaurora.org>
parent a74cb474
Loading
Loading
Loading
Loading
+8 −4
Original line number Diff line number Diff line
@@ -1449,10 +1449,11 @@ TRACE_EVENT(sched_task_util,

	TP_PROTO(struct task_struct *p, int next_cpu, int backup_cpu,
		int target_cpu, bool sync, bool need_idle, int fastpath,
		bool placement_boost, int rtg_cpu, u64 start_t),
		bool placement_boost, int rtg_cpu, u64 start_t,
		bool stune_boosted),

	TP_ARGS(p, next_cpu, backup_cpu, target_cpu, sync, need_idle, fastpath,
		placement_boost, rtg_cpu, start_t),
		placement_boost, rtg_cpu, start_t, stune_boosted),

	TP_STRUCT__entry(
		__field(int, pid			)
@@ -1468,6 +1469,7 @@ TRACE_EVENT(sched_task_util,
		__field(int, placement_boost		)
		__field(int, rtg_cpu			)
		__field(u64, latency			)
		__field(bool, stune_boosted		)
	),

	TP_fast_assign(
@@ -1484,13 +1486,15 @@ TRACE_EVENT(sched_task_util,
		__entry->placement_boost	= placement_boost;
		__entry->rtg_cpu		= rtg_cpu;
		__entry->latency		= (sched_clock() - start_t);
		__entry->stune_boosted		= stune_boosted;
	),

	TP_printk("pid=%d comm=%s util=%lu prev_cpu=%d next_cpu=%d backup_cpu=%d target_cpu=%d sync=%d need_idle=%d fastpath=%d placement_boost=%d rtg_cpu=%d latency=%llu",
	TP_printk("pid=%d comm=%s util=%lu prev_cpu=%d next_cpu=%d backup_cpu=%d target_cpu=%d sync=%d need_idle=%d fastpath=%d placement_boost=%d rtg_cpu=%d latency=%llu stune_boosted=%d",
		__entry->pid, __entry->comm, __entry->util, __entry->prev_cpu,
		__entry->next_cpu, __entry->backup_cpu, __entry->target_cpu,
		__entry->sync, __entry->need_idle, __entry->fastpath,
		__entry->placement_boost, __entry->rtg_cpu, __entry->latency)
		__entry->placement_boost, __entry->rtg_cpu, __entry->latency,
		__entry->stune_boosted)
)

/*
+19 −10
Original line number Diff line number Diff line
@@ -7290,11 +7290,11 @@ struct find_best_target_env {
static bool is_packing_eligible(struct task_struct *p, int target_cpu,
				struct find_best_target_env *fbt_env,
				unsigned int target_cpus_count,
				int best_idle_cstate)
				int best_idle_cstate, bool boosted)
{
	unsigned long tutil, estimated_capacity;

	if (task_placement_boost_enabled(p) || fbt_env->need_idle)
	if (task_placement_boost_enabled(p) || fbt_env->need_idle || boosted)
		return false;

	if (best_idle_cstate == -1)
@@ -7325,8 +7325,12 @@ static int start_cpu(struct task_struct *p, bool boosted,
	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
	int start_cpu = -1;

	if (boosted)
	if (boosted) {
		if (rd->mid_cap_orig_cpu != -1 &&
		    task_fits_max(p, rd->mid_cap_orig_cpu))
			return rd->mid_cap_orig_cpu;
		return rd->max_cap_orig_cpu;
	}

	/* A task always fits on its rtg_target */
	if (rtg_target) {
@@ -7704,7 +7708,7 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu,
		 * next cluster if they are higher in capacity. If we are
		 * not in any kind of boost, we break.
		 */
		if (!prefer_idle &&
		if (!prefer_idle && !boosted &&
			(target_cpu != -1 || best_idle_cpu != -1) &&
			(fbt_env->placement_boost == SCHED_BOOST_NONE ||
			sched_boost() != FULL_THROTTLE_BOOST ||
@@ -7715,9 +7719,12 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu,
		/*
		 * if we are in prefer_idle and have found an idle cpu,
		 * break from searching more groups based on the stune.boost and
		 * group cpu capacity.
		 * group cpu capacity. For !prefer_idle && boosted case, don't
		 * iterate lower capacity CPUs unless the task can't be
		 * accommodated in the higher capacity CPUs.
		 */
		if (prefer_idle && best_idle_cpu != -1) {
		if ((prefer_idle && best_idle_cpu != -1) ||
		    (boosted && (best_idle_cpu != -1 || target_cpu != -1))) {
			if (boosted) {
				if (!next_group_higher_cap)
					break;
@@ -7730,7 +7737,8 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu,
	} while (sg = sg->next, sg != sd->groups);

	if (best_idle_cpu != -1 && !is_packing_eligible(p, target_cpu, fbt_env,
					active_cpus_count, best_idle_cstate)) {
					active_cpus_count, best_idle_cstate,
					boosted)) {
		target_cpu = best_idle_cpu;
		best_idle_cpu = -1;
	}
@@ -8036,6 +8044,7 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
	int placement_boost = task_boost_policy(p);
	u64 start_t = 0;
	int next_cpu = -1, backup_cpu = -1;
	int boosted = (schedtune_task_boost(p) > 0);

	fbt_env.fastpath = 0;

@@ -8090,7 +8099,6 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
				break;
		}
	} else {
		int boosted = (schedtune_task_boost(p) > 0);
		int prefer_idle;

		/*
@@ -8122,7 +8130,7 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
		    p->state == TASK_WAKING)
			delta = task_util(p);
#endif
		if (task_placement_boost_enabled(p) || need_idle ||
		if (task_placement_boost_enabled(p) || need_idle || boosted ||
		    (rtg_target && (!cpumask_test_cpu(prev_cpu, rtg_target) ||
		    cpumask_test_cpu(target_cpu, rtg_target))) ||
		    __cpu_overutilized(prev_cpu, delta) ||
@@ -8159,7 +8167,8 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,

	trace_sched_task_util(p, next_cpu, backup_cpu, target_cpu, sync,
			need_idle, fbt_env.fastpath, placement_boost,
			rtg_target ? cpumask_first(rtg_target) : -1, start_t);
			rtg_target ? cpumask_first(rtg_target) : -1, start_t,
			boosted);
	return target_cpu;
}