Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3bd5f6f4 authored by Joonwoo Park's avatar Joonwoo Park Committed by Syed Rameez Mustafa
Browse files

sched: add preference for waker cluster CPU in wakee task placement



If sync wakee task's demand is small it's worth to place the wakee task
on waker's cluster for better performance in the sense that waker and
wakee are corelated so the wakee should take advantage of waker cluster's
frequency which is voted by the waker along with cache locality benefit.
While biasing towards the waker's cluster we want to avoid the waker CPU
as much as possible as placing the wakee on the waker's CPU can make the
waker got preempted and migrated by load balancer.

Introduce a new tunable 'sched_small_wakee_task_load' that differentiates
eligible small wakee task and place the small wakee tasks on the waker's
cluster.

CRs-fixed: 971295
Change-Id: I96897d9a72a6f63dca4986d9219c2058cd5a7916
Signed-off-by: default avatarJoonwoo Park <joonwoop@codeaurora.org>
parent 86de6cbe
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -1280,6 +1280,18 @@ power-efficient cpu. We don't let it come back to a power-efficient cpu until
its demand *in reference to the power-efficient cpu* drops less than 60%
(sched_downmigrate).


*** 7.26 sched_small_wakee_task_load

Appears at: /proc/sys/kernel/sched_small_wakee_task_load

Default value: 10

This tunable is a percentage.  Configure the maximum demand of small wakee task.
Sync wakee tasks which have demand less than sched_small_wakee_task_load are
categorized as small wakee tasks.  Scheduler places small wakee tasks on the
waker's cluster.

=========================
8. HMP SCHEDULER TRACE POINTS
=========================
+1 −0
Original line number Diff line number Diff line
@@ -67,6 +67,7 @@ extern unsigned int sysctl_sched_downmigrate_pct;
extern int sysctl_sched_upmigrate_min_nice;
extern unsigned int sysctl_sched_boost;
extern unsigned int sysctl_early_detection_duration;
extern unsigned int sysctl_sched_small_wakee_task_load_pct;

#ifdef CONFIG_SCHED_QHMP
extern unsigned int sysctl_sched_min_runtime;
+56 −12
Original line number Diff line number Diff line
@@ -2469,6 +2469,13 @@ unsigned int __read_mostly sysctl_sched_spill_nr_run = 10;
 */
unsigned int __read_mostly sysctl_sched_enable_power_aware = 0;

/*
 * Place sync wakee tasks those have less than configured demand to the waker's
 * cluster.
 */
unsigned int __read_mostly sched_small_wakee_task_load;
unsigned int __read_mostly sysctl_sched_small_wakee_task_load_pct = 10;

/*
 * CPUs with load greater than the sched_spill_load_threshold are not
 * eligible for task placement. When all CPUs in a cluster achieve a
@@ -2595,6 +2602,10 @@ void set_hmp_defaults(void)

	sched_short_sleep_task_threshold = sysctl_sched_select_prev_cpu_us *
					   NSEC_PER_USEC;

	sched_small_wakee_task_load =
		div64_u64((u64)sysctl_sched_small_wakee_task_load_pct *
			  (u64)sched_ravg_window, 100);
}

u32 sched_get_init_task_load(struct task_struct *p)
@@ -2871,6 +2882,7 @@ struct cpu_select_env {
	struct related_thread_group *rtg;
	u8 reason;
	u8 need_idle:1;
	u8 need_waker_cluster:1;
	u8 boost:1;
	u8 sync:1;
	u8 ignore_prev_cpu:1;
@@ -3036,15 +3048,29 @@ next_candidate(const unsigned long *list, int start, int end)
	return sched_cluster[cluster_id];
}

static void update_spare_capacity(
struct cluster_cpu_stats *stats, int cpu, int capacity, u64 cpu_load)
static void
update_spare_capacity(struct cluster_cpu_stats *stats,
		      struct cpu_select_env *env, int cpu, int capacity,
		      u64 cpu_load)
{
	s64 spare_capacity = sched_ravg_window - cpu_load;

	if (spare_capacity > 0 &&
	    (spare_capacity > stats->highest_spare_capacity ||
	     (spare_capacity == stats->highest_spare_capacity &&
	     capacity > cpu_capacity(stats->best_capacity_cpu)))) {
	      ((!env->need_waker_cluster &&
		capacity > cpu_capacity(stats->best_capacity_cpu)) ||
	       (env->need_waker_cluster &&
		cpu_rq(cpu)->nr_running <
		cpu_rq(stats->best_capacity_cpu)->nr_running))))) {
		/*
		 * If sync waker is the only runnable of CPU, cr_avg of the
		 * CPU is 0 so we have high chance to place the wakee on the
		 * waker's CPU which likely causes preemtion of the waker.
		 * This can lead migration of preempted waker.  Place the
		 * wakee on the real idle CPU when it's possible by checking
		 * nr_running to avoid such preemption.
		 */
		stats->highest_spare_capacity = spare_capacity;
		stats->best_capacity_cpu = cpu;
	}
@@ -3064,7 +3090,7 @@ struct cpu_select_env *env, struct cluster_cpu_stats *stats)
			sched_irqload(i), power_cost(i, task_load(env->p) +
					cpu_cravg_sync(i, env->sync)), 0);

			update_spare_capacity(stats, i, next->capacity,
			update_spare_capacity(stats, env, i, next->capacity,
					  cpu_load_sync(i, env->sync));
		}
	}
@@ -3162,9 +3188,11 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c,
		if (unlikely(!cpu_active(i)) || skip_cpu(i, env))
			continue;

		update_spare_capacity(stats, i, c->capacity, env->cpu_load);
		update_spare_capacity(stats, env, i, c->capacity,
				      env->cpu_load);

		if (env->boost || sched_cpu_high_irqload(i) ||
		if (env->boost || env->need_waker_cluster ||
		    sched_cpu_high_irqload(i) ||
		    spill_threshold_crossed(env, cpu_rq(i)))
			continue;

@@ -3235,7 +3263,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
	env->cpu_load = cpu_load_sync(prev_cpu, env->sync);
	if (sched_cpu_high_irqload(prev_cpu) ||
			spill_threshold_crossed(env, cpu_rq(prev_cpu))) {
		update_spare_capacity(stats, prev_cpu,
		update_spare_capacity(stats, env, prev_cpu,
				cluster->capacity, env->cpu_load);
		env->ignore_prev_cpu = 1;
		return false;
@@ -3244,6 +3272,13 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
	return true;
}

static inline bool
wake_to_waker_cluster(struct cpu_select_env *env)
{
	return !env->need_idle && !env->reason && env->sync &&
	       task_load(env->p) < sched_small_wakee_task_load;
}

static inline int
cluster_allowed(struct task_struct *p, struct sched_cluster *cluster)
{
@@ -3269,6 +3304,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		.p			= p,
		.reason			= reason,
		.need_idle		= wake_to_idle(p),
		.need_waker_cluster	= 0,
		.boost			= sched_boost(),
		.sync			= sync,
		.prev_cpu		= target,
@@ -3291,10 +3327,18 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
			clear_bit(pref_cluster->id, env.candidate_list);
		else
			env.rtg = grp;
	} else {
		cluster = cpu_rq(smp_processor_id())->cluster;
		if (wake_to_waker_cluster(&env) &&
		    cluster_allowed(p, cluster)) {
			env.need_waker_cluster = 1;
			bitmap_zero(env.candidate_list, NR_CPUS);
			__set_bit(cluster->id, env.candidate_list);
		} else if (bias_to_prev_cpu(&env, &stats)) {
			fast_path = true;
			goto out;
		}
	}

retry:
	cluster = select_least_power_cluster(&env);
+7 −0
Original line number Diff line number Diff line
@@ -460,6 +460,13 @@ static struct ctl_table kern_table[] = {
		.extra1		= &zero,
		.extra2		= &one,
	},
	{
		.procname	= "sched_small_wakee_task_load",
		.data		= &sysctl_sched_small_wakee_task_load_pct,
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
		.proc_handler   = sched_hmp_proc_update_handler,
	},
#ifdef CONFIG_SCHED_FREQ_INPUT
	{
		.procname       = "sched_new_task_windows",