sched: add preference for waker cluster CPU in wakee task placement (3bd5f6f4) · Commits · e / devices / android_kernel_xiaomi_markw

Documentation/scheduler/sched-zone.txt

+12 −0

Original line number	Diff line number	Diff line
		@@ -1280,6 +1280,18 @@ power-efficient cpu. We don't let it come back to a power-efficient cpu until
		its demand in reference to the power-efficient cpu drops less than 60%
		(sched_downmigrate).


		*** 7.26 sched_small_wakee_task_load

		Appears at: /proc/sys/kernel/sched_small_wakee_task_load

		Default value: 10

		This tunable is a percentage. Configure the maximum demand of small wakee task.
		Sync wakee tasks which have demand less than sched_small_wakee_task_load are
		categorized as small wakee tasks. Scheduler places small wakee tasks on the
		waker's cluster.

		=========================
		8. HMP SCHEDULER TRACE POINTS
		=========================

include/linux/sched/sysctl.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -67,6 +67,7 @@ extern unsigned int sysctl_sched_downmigrate_pct;
		extern int sysctl_sched_upmigrate_min_nice;
		extern unsigned int sysctl_sched_boost;
		extern unsigned int sysctl_early_detection_duration;
		extern unsigned int sysctl_sched_small_wakee_task_load_pct;

		#ifdef CONFIG_SCHED_QHMP
		extern unsigned int sysctl_sched_min_runtime;

kernel/sched/fair.c

+56 −12

Original line number	Diff line number	Diff line
		@@ -2469,6 +2469,13 @@ unsigned int __read_mostly sysctl_sched_spill_nr_run = 10;
		*/
		unsigned int __read_mostly sysctl_sched_enable_power_aware = 0;

		/*
		* Place sync wakee tasks those have less than configured demand to the waker's
		* cluster.
		*/
		unsigned int __read_mostly sched_small_wakee_task_load;
		unsigned int __read_mostly sysctl_sched_small_wakee_task_load_pct = 10;

		/*
		* CPUs with load greater than the sched_spill_load_threshold are not
		* eligible for task placement. When all CPUs in a cluster achieve a
		@@ -2595,6 +2602,10 @@ void set_hmp_defaults(void)

		sched_short_sleep_task_threshold = sysctl_sched_select_prev_cpu_us *
		NSEC_PER_USEC;

		sched_small_wakee_task_load =
		div64_u64((u64)sysctl_sched_small_wakee_task_load_pct *
		(u64)sched_ravg_window, 100);
		}

		u32 sched_get_init_task_load(struct task_struct *p)
		@@ -2871,6 +2882,7 @@ struct cpu_select_env {
		struct related_thread_group *rtg;
		u8 reason;
		u8 need_idle:1;
		u8 need_waker_cluster:1;
		u8 boost:1;
		u8 sync:1;
		u8 ignore_prev_cpu:1;
		@@ -3036,15 +3048,29 @@ next_candidate(const unsigned long *list, int start, int end)
		return sched_cluster[cluster_id];
		}

		static void update_spare_capacity(
		struct cluster_cpu_stats *stats, int cpu, int capacity, u64 cpu_load)
		static void
		update_spare_capacity(struct cluster_cpu_stats *stats,
		struct cpu_select_env *env, int cpu, int capacity,
		u64 cpu_load)
		{
		s64 spare_capacity = sched_ravg_window - cpu_load;

		if (spare_capacity > 0 &&
		(spare_capacity > stats->highest_spare_capacity \|\|
		(spare_capacity == stats->highest_spare_capacity &&
		capacity > cpu_capacity(stats->best_capacity_cpu)))) {
		((!env->need_waker_cluster &&
		capacity > cpu_capacity(stats->best_capacity_cpu)) \|\|
		(env->need_waker_cluster &&
		cpu_rq(cpu)->nr_running <
		cpu_rq(stats->best_capacity_cpu)->nr_running))))) {
		/*
		* If sync waker is the only runnable of CPU, cr_avg of the
		* CPU is 0 so we have high chance to place the wakee on the
		* waker's CPU which likely causes preemtion of the waker.
		* This can lead migration of preempted waker. Place the
		* wakee on the real idle CPU when it's possible by checking
		* nr_running to avoid such preemption.
		*/
		stats->highest_spare_capacity = spare_capacity;
		stats->best_capacity_cpu = cpu;
		}
		@@ -3064,7 +3090,7 @@ struct cpu_select_env env, struct cluster_cpu_stats stats)
		sched_irqload(i), power_cost(i, task_load(env->p) +
		cpu_cravg_sync(i, env->sync)), 0);

		update_spare_capacity(stats, i, next->capacity,
		update_spare_capacity(stats, env, i, next->capacity,
		cpu_load_sync(i, env->sync));
		}
		}
		@@ -3162,9 +3188,11 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c,
		if (unlikely(!cpu_active(i)) \|\| skip_cpu(i, env))
		continue;

		update_spare_capacity(stats, i, c->capacity, env->cpu_load);
		update_spare_capacity(stats, env, i, c->capacity,
		env->cpu_load);

		if (env->boost \|\| sched_cpu_high_irqload(i) \|\|
		if (env->boost \|\| env->need_waker_cluster \|\|
		sched_cpu_high_irqload(i) \|\|
		spill_threshold_crossed(env, cpu_rq(i)))
		continue;

		@@ -3235,7 +3263,7 @@ bias_to_prev_cpu(struct cpu_select_env env, struct cluster_cpu_stats stats)
		env->cpu_load = cpu_load_sync(prev_cpu, env->sync);
		if (sched_cpu_high_irqload(prev_cpu) \|\|
		spill_threshold_crossed(env, cpu_rq(prev_cpu))) {
		update_spare_capacity(stats, prev_cpu,
		update_spare_capacity(stats, env, prev_cpu,
		cluster->capacity, env->cpu_load);
		env->ignore_prev_cpu = 1;
		return false;
		@@ -3244,6 +3272,13 @@ bias_to_prev_cpu(struct cpu_select_env env, struct cluster_cpu_stats stats)
		return true;
		}

		static inline bool
		wake_to_waker_cluster(struct cpu_select_env *env)
		{
		return !env->need_idle && !env->reason && env->sync &&
		task_load(env->p) < sched_small_wakee_task_load;
		}

		static inline int
		cluster_allowed(struct task_struct p, struct sched_cluster cluster)
		{
		@@ -3269,6 +3304,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		.p = p,
		.reason = reason,
		.need_idle = wake_to_idle(p),
		.need_waker_cluster = 0,
		.boost = sched_boost(),
		.sync = sync,
		.prev_cpu = target,
		@@ -3291,10 +3327,18 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		clear_bit(pref_cluster->id, env.candidate_list);
		else
		env.rtg = grp;
		} else {
		cluster = cpu_rq(smp_processor_id())->cluster;
		if (wake_to_waker_cluster(&env) &&
		cluster_allowed(p, cluster)) {
		env.need_waker_cluster = 1;
		bitmap_zero(env.candidate_list, NR_CPUS);
		__set_bit(cluster->id, env.candidate_list);
		} else if (bias_to_prev_cpu(&env, &stats)) {
		fast_path = true;
		goto out;
		}
		}

		retry:
		cluster = select_least_power_cluster(&env);

kernel/sysctl.c

+7 −0

Original line number	Diff line number	Diff line
		@@ -460,6 +460,13 @@ static struct ctl_table kern_table[] = {
		.extra1 = &zero,
		.extra2 = &one,
		},
		{
		.procname = "sched_small_wakee_task_load",
		.data = &sysctl_sched_small_wakee_task_load_pct,
		.maxlen = sizeof(unsigned int),
		.mode = 0644,
		.proc_handler = sched_hmp_proc_update_handler,
		},
		#ifdef CONFIG_SCHED_FREQ_INPUT
		{
		.procname = "sched_new_task_windows",