sched/fair: Add snapshot of placement changes (39d60ed9) · Commits · e / devices / android_kernel_fairphone_FP4

include/trace/events/sched.h

+113 −16

Original line number	Original line	Diff line number	Diff line
	@@ -1065,25 +1065,119 @@ TRACE_EVENT(sched_util_est_cpu,
	__entry->util_est_enqueued)		__entry->util_est_enqueued)
	);		);

			TRACE_EVENT(sched_cpu_util,

			TP_PROTO(int cpu),

			TP_ARGS(cpu),

			TP_STRUCT__entry(
			__field(unsigned int, cpu)
			__field(unsigned int, nr_running)
			__field(long, cpu_util)
			__field(long, cpu_util_cum)
			__field(unsigned int, capacity_curr)
			__field(unsigned int, capacity)
			__field(unsigned int, capacity_orig)
			__field(int, idle_state)
			__field(u64, irqload)
			__field(int, online)
			__field(int, isolated)
			__field(int, reserved)
			__field(int, high_irq_load)
			),

			TP_fast_assign(
			__entry->cpu = cpu;
			__entry->nr_running = cpu_rq(cpu)->nr_running;
			__entry->cpu_util = cpu_util(cpu);
			__entry->cpu_util_cum = cpu_util_cum(cpu, 0);
			__entry->capacity_curr = capacity_curr_of(cpu);
			__entry->capacity = capacity_of(cpu);
			__entry->capacity_orig = capacity_orig_of(cpu);
			__entry->idle_state = idle_get_state_idx(cpu_rq(cpu));
			__entry->irqload = sched_irqload(cpu);
			__entry->online = cpu_online(cpu);
			__entry->isolated = cpu_isolated(cpu);
			__entry->reserved = is_reserved(cpu);
			__entry->high_irq_load = sched_cpu_high_irqload(cpu);
			),

			TP_printk("cpu=%d nr_running=%d cpu_util=%ld cpu_util_cum=%ld capacity_curr=%u capacity=%u capacity_orig=%u idle_state=%d irqload=%llu online=%u, isolated=%u, reserved=%u, high_irq_load=%u",
			__entry->cpu, __entry->nr_running, __entry->cpu_util,
			__entry->cpu_util_cum, __entry->capacity_curr,
			__entry->capacity, __entry->capacity_orig,
			__entry->idle_state, __entry->irqload, __entry->online,
			__entry->isolated, __entry->reserved, __entry->high_irq_load)
			);

			TRACE_EVENT(sched_task_util,

			TP_PROTO(struct task_struct *p, int best_energy_cpu,
			bool sync, bool need_idle, int fastpath,
			bool placement_boost, int rtg_cpu, u64 start_t),

			TP_ARGS(p, best_energy_cpu, sync, need_idle, fastpath,
			placement_boost, rtg_cpu, start_t),

			TP_STRUCT__entry(
			__field(int, pid)
			__array(char, comm, TASK_COMM_LEN)
			__field(unsigned long, util)
			__field(int, prev_cpu)
			__field(int, best_energy_cpu)
			__field(bool, sync)
			__field(bool, need_idle)
			__field(int, fastpath)
			__field(int, placement_boost)
			__field(int, rtg_cpu)
			__field(u64, latency)
			),

			TP_fast_assign(
			__entry->pid = p->pid;
			memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
			__entry->util = task_util(p);
			__entry->prev_cpu = task_cpu(p);
			__entry->best_energy_cpu = best_energy_cpu;
			__entry->sync = sync;
			__entry->need_idle = need_idle;
			__entry->fastpath = fastpath;
			__entry->placement_boost = placement_boost;
			__entry->rtg_cpu = rtg_cpu;
			__entry->latency = (sched_clock() - start_t);
			),

			TP_printk("pid=%d comm=%s util=%lu prev_cpu=%d best_energy_cpu=%d sync=%d need_idle=%d fastpath=%d placement_boost=%d rtg_cpu=%d latency=%llu",
			__entry->pid, __entry->comm, __entry->util, __entry->prev_cpu,
			__entry->best_energy_cpu, __entry->sync, __entry->need_idle,
			__entry->fastpath, __entry->placement_boost, __entry->rtg_cpu,
			__entry->latency)
			)

	/*		/*
	* Tracepoint for find_best_target		* Tracepoint for find_best_target
	*/		*/
	TRACE_EVENT(sched_find_best_target,		TRACE_EVENT(sched_find_best_target,

	TP_PROTO(struct task_struct *tsk, bool prefer_idle,		TP_PROTO(struct task_struct *tsk, bool prefer_idle,
	unsigned long min_util, int best_idle, int best_active,		unsigned long min_util, int start_cpu,
			int best_idle, int best_active, int most_spare_cap,
	int target, int backup),		int target, int backup),

	TP_ARGS(tsk, prefer_idle, min_util, best_idle,		TP_ARGS(tsk, prefer_idle, min_util, start_cpu,
	best_active, target, backup),		best_idle, best_active, most_spare_cap,
			target, backup),

	TP_STRUCT__entry(		TP_STRUCT__entry(
	__array(char, comm, TASK_COMM_LEN)		__array(char, comm, TASK_COMM_LEN)
	__field(pid_t, pid)		__field(pid_t, pid)
	__field(unsigned long, min_util)		__field(unsigned long, min_util)
	__field(bool, prefer_idle)		__field(bool, prefer_idle)
			__field(int, start_cpu)
	__field(int, best_idle)		__field(int, best_idle)
	__field(int, best_active)		__field(int, best_active)
			__field(int, most_spare_cap)
	__field(int, target)		__field(int, target)
	__field(int, backup)		__field(int, backup)
	),		),
	@@ -1093,16 +1187,19 @@ TRACE_EVENT(sched_find_best_target,
	__entry->pid = tsk->pid;		__entry->pid = tsk->pid;
	__entry->min_util = min_util;		__entry->min_util = min_util;
	__entry->prefer_idle = prefer_idle;		__entry->prefer_idle = prefer_idle;
			__entry->start_cpu = start_cpu;
	__entry->best_idle = best_idle;		__entry->best_idle = best_idle;
	__entry->best_active = best_active;		__entry->best_active = best_active;
			__entry->most_spare_cap = most_spare_cap;
	__entry->target = target;		__entry->target = target;
	__entry->backup = backup;		__entry->backup = backup;
	),		),

	TP_printk("pid=%d comm=%s prefer_idle=%d "		TP_printk("pid=%d comm=%s prefer_idle=%d start_cpu=%d best_idle=%d best_active=%d most_spare_cap=%d target=%d backup=%d",
	"best_idle=%d best_active=%d target=%d backup=%d",
	__entry->pid, __entry->comm, __entry->prefer_idle,		__entry->pid, __entry->comm, __entry->prefer_idle,
			__entry->start_cpu,
	__entry->best_idle, __entry->best_active,		__entry->best_idle, __entry->best_active,
			__entry->most_spare_cap,
	__entry->target, __entry->backup)		__entry->target, __entry->backup)
	);		);

kernel/sched/core.c

+2 −0

Original line number	Original line	Diff line number	Diff line
	@@ -3141,6 +3141,8 @@ unsigned long long task_sched_runtime(struct task_struct *p)
	return ns;		return ns;
	}		}

			unsigned int capacity_margin_freq = 1280; /* ~20% margin */

	/*		/*
	* This function gets called by the timer code, with HZ frequency.		* This function gets called by the timer code, with HZ frequency.
	* We call it with interrupts disabled.		* We call it with interrupts disabled.

kernel/sched/fair.c

+375 −44

Original line number	Original line	Diff line number	Diff line
	@@ -3814,7 +3814,18 @@ util_est_dequeue(struct cfs_rq cfs_rq, struct task_struct p, bool task_sleep)
	trace_sched_util_est_task(p, &p->se.avg);		trace_sched_util_est_task(p, &p->se.avg);
	}		}

	static inline int task_fits_capacity(struct task_struct *p,		static inline bool
			bias_to_waker_cpu(struct task_struct p, int cpu, struct cpumask rtg_target)
			{
			bool base_test = cpumask_test_cpu(cpu, &p->cpus_allowed) &&
			cpu_active(cpu) && task_fits_max(p, cpu) &&
			!__cpu_overutilized(cpu, task_util(p));
			bool rtg_test = rtg_target && cpumask_test_cpu(cpu, rtg_target);

			return base_test && (!rtg_target \|\| rtg_test);
			}

			static inline bool task_fits_capacity(struct task_struct *p,
	long capacity,		long capacity,
	int cpu)		int cpu)
	{		{
	@@ -3828,6 +3839,60 @@ static inline int task_fits_capacity(struct task_struct *p,
	return capacity * 1024 > task_util_est(p) * margin;		return capacity * 1024 > task_util_est(p) * margin;
	}		}

			static inline bool task_fits_max(struct task_struct *p, int cpu)
			{
			unsigned long capacity = capacity_orig_of(cpu);
			unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val;

			if (capacity == max_capacity)
			return true;

			if (task_boost_policy(p) == SCHED_BOOST_ON_BIG
			&& is_min_capacity_cpu(cpu))
			return false;

			return task_fits_capacity(p, capacity, cpu);
			}

			struct find_best_target_env {
			struct cpumask *rtg_target;
			int placement_boost;
			bool need_idle;
			int fastpath;
			};

			static bool is_packing_eligible(struct task_struct *p, int target_cpu,
			struct find_best_target_env *fbt_env,
			unsigned int target_cpus_count,
			int best_idle_cstate)
			{
			unsigned long tutil, estimated_capacity;

			if (task_placement_boost_enabled(p) \|\| fbt_env->need_idle)
			return false;

			if (best_idle_cstate == -1)
			return false;

			if (target_cpus_count != 1)
			return true;

			if (task_in_cum_window_demand(cpu_rq(target_cpu), p))
			tutil = 0;
			else
			tutil = task_util(p);

			estimated_capacity = cpu_util_cum(target_cpu, tutil);
			estimated_capacity = add_capacity_margin(estimated_capacity,
			target_cpu);

			/*
			* If there is only one active CPU and it is already above its current
			* capacity, avoid placing additional task on the CPU.
			*/
			return (estimated_capacity <= capacity_curr_of(target_cpu));
			}

	static inline void update_misfit_status(struct task_struct p, struct rq rq)		static inline void update_misfit_status(struct task_struct p, struct rq rq)
	{		{
	if (!static_branch_unlikely(&sched_asym_cpucapacity))		if (!static_branch_unlikely(&sched_asym_cpucapacity))
	@@ -3838,7 +3903,7 @@ static inline void update_misfit_status(struct task_struct p, struct rq rq)
	return;		return;
	}		}

	if (task_fits_capacity(p, capacity_of(cpu_of(rq)), cpu_of(rq))) {		if (task_fits_max(p, cpu_of(rq))) {
	rq->misfit_task_load = 0;		rq->misfit_task_load = 0;
	return;		return;
	}		}
	@@ -6588,24 +6653,72 @@ unsigned long capacity_curr_of(int cpu)
	return cap_scale(max_cap, scale_freq);		return cap_scale(max_cap, scale_freq);
	}		}

			static int get_start_cpu(struct task_struct *p, bool boosted,
			struct cpumask *rtg_target)
			{
			struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
			int start_cpu = -1;

			if (boosted)
			return rd->max_cap_orig_cpu;

			/* A task always fits on its rtg_target */
			if (rtg_target) {
			int rtg_target_cpu = cpumask_first_and(rtg_target,
			cpu_online_mask);

			if (rtg_target_cpu < nr_cpu_ids)
			return rtg_target_cpu;
			}

			/* Where the task should land based on its demand */
			if (rd->min_cap_orig_cpu != -1
			&& task_fits_max(p, rd->min_cap_orig_cpu))
			start_cpu = rd->min_cap_orig_cpu;
			else if (rd->mid_cap_orig_cpu != -1
			&& task_fits_max(p, rd->mid_cap_orig_cpu))
			start_cpu = rd->mid_cap_orig_cpu;
			else
			start_cpu = rd->max_cap_orig_cpu;

			return start_cpu;
			}

			enum fastpaths {
			NONE = 0,
			SYNC_WAKEUP,
			PREV_CPU_FASTPATH,
			};

	static void find_best_target(struct sched_domain sd, cpumask_t cpus,		static void find_best_target(struct sched_domain sd, cpumask_t cpus,
	struct task_struct *p)		struct task_struct *p,
			struct find_best_target_env *fbt_env)
	{		{
	unsigned long min_util = boosted_task_util(p);		unsigned long min_util = boosted_task_util(p);
	unsigned long target_capacity = ULONG_MAX;		unsigned long target_capacity = ULONG_MAX;
	unsigned long min_wake_util = ULONG_MAX;		unsigned long min_wake_util = ULONG_MAX;
	unsigned long target_max_spare_cap = 0;		unsigned long target_max_spare_cap = 0;
	unsigned long target_util = ULONG_MAX;		unsigned long target_util = ULONG_MAX;
			unsigned long best_active_util = ULONG_MAX;
			unsigned long best_active_cuml_util = ULONG_MAX;
			unsigned long best_idle_cuml_util = ULONG_MAX;
	bool prefer_idle = schedtune_prefer_idle(p);		bool prefer_idle = schedtune_prefer_idle(p);
	bool boosted = schedtune_task_boost(p) > 0;		bool boosted = schedtune_task_boost(p) > 0;
	/* Initialise with deepest possible cstate (INT_MAX) */		/* Initialise with deepest possible cstate (INT_MAX) */
	int shallowest_idle_cstate = INT_MAX;		int shallowest_idle_cstate = INT_MAX;
			struct sched_domain *start_sd;
	struct sched_group *sg;		struct sched_group *sg;
	int best_active_cpu = -1;		int best_active_cpu = -1;
	int best_idle_cpu = -1;		int best_idle_cpu = -1;
	int target_cpu = -1;		int target_cpu = -1;
	int backup_cpu = -1;		int backup_cpu = -1;
	int i;		int i, start_cpu;
			long spare_wake_cap, most_spare_wake_cap = 0;
			int most_spare_cap_cpu = -1;
			unsigned int active_cpus_count = 0;
			int prev_cpu = task_cpu(p);
			bool next_group_higher_cap = false;
			int isolated_candidate = -1;

	/*		/*
	* In most cases, target_capacity tracks capacity_orig of the most		* In most cases, target_capacity tracks capacity_orig of the most
	@@ -6619,17 +6732,59 @@ static void find_best_target(struct sched_domain sd, cpumask_t cpus,
	if (prefer_idle && boosted)		if (prefer_idle && boosted)
	target_capacity = 0;		target_capacity = 0;

			/* Find start CPU based on boost value */
			start_cpu = get_start_cpu(p, boosted, fbt_env->rtg_target);
			if (start_cpu < 0) {
			target_cpu = -1;
			goto target;
			}

			/* Find SD for the start CPU */
			start_sd = rcu_dereference(per_cpu(sd_asym_packing, start_cpu));
			if (!start_sd) {
			target_cpu = -1;
			goto target;
			}

			/* fast path for prev_cpu */
			if ((capacity_orig_of(prev_cpu) == capacity_orig_of(start_cpu)) &&
			!cpu_isolated(prev_cpu) && cpu_online(prev_cpu) &&
			idle_cpu(prev_cpu)) {

			if (idle_get_state_idx(cpu_rq(prev_cpu)) <= 1) {
			target_cpu = prev_cpu;

			fbt_env->fastpath = PREV_CPU_FASTPATH;
			trace_sched_find_best_target(p, prefer_idle, min_util,
			start_cpu, -1, -1, -1, target_cpu, -1);
			goto target;
			}
			}

	/* Scan CPUs in all SDs */		/* Scan CPUs in all SDs */
	sg = sd->groups;		sg = start_sd->groups;
	do {		do {
	for_each_cpu_and(i, &p->cpus_allowed, sched_group_span(sg)) {		for_each_cpu_and(i, &p->cpus_allowed, sched_group_span(sg)) {
	unsigned long capacity_curr = capacity_curr_of(i);		unsigned long capacity_curr = capacity_curr_of(i);
	unsigned long capacity_orig = capacity_orig_of(i);		unsigned long capacity_orig = capacity_orig_of(i);
	unsigned long wake_util, new_util;		unsigned long wake_util, new_util, new_util_cuml;
	long spare_cap;		long spare_cap;
	int idle_idx = INT_MAX;		int idle_idx = INT_MAX;

	if (!cpu_online(i))		trace_sched_cpu_util(i);

			if (!cpu_online(i) \|\| cpu_isolated(i))
			continue;

			if (isolated_candidate == -1)
			isolated_candidate = i;

			/*
			* This CPU is the target of an active migration that's
			* yet to complete. Avoid placing another task on it.
			* See check_for_migration()
			*/
			if (is_reserved(i))
	continue;		continue;

	if (sched_cpu_high_irqload(i))		if (sched_cpu_high_irqload(i))
	@@ -6642,6 +6797,23 @@ static void find_best_target(struct sched_domain sd, cpumask_t cpus,
	*/		*/
	wake_util = cpu_util_without(i, p);		wake_util = cpu_util_without(i, p);
	new_util = wake_util + task_util_est(p);		new_util = wake_util + task_util_est(p);
			spare_wake_cap = capacity_orig_of(i) - wake_util;

			if (spare_wake_cap > most_spare_wake_cap) {
			most_spare_wake_cap = spare_wake_cap;
			most_spare_cap_cpu = i;
			}

			/*
			* Cumulative demand may already be accounting for the
			* task. If so, add just the boost-utilization to
			* the cumulative demand of the cpu.
			*/
			if (task_in_cum_window_demand(cpu_rq(i), p))
			new_util_cuml = cpu_util_cum(i, 0) +
			min_util - task_util(p);
			else
			new_util_cuml = cpu_util_cum(i, 0) + min_util;

	/*		/*
	* Ensure minimum capacity to grant the required boost.		* Ensure minimum capacity to grant the required boost.
	@@ -6748,30 +6920,21 @@ static void find_best_target(struct sched_domain sd, cpumask_t cpus,
	*/		*/
	if (wake_util > min_wake_util)		if (wake_util > min_wake_util)
	continue;		continue;
	min_wake_util = wake_util;
	best_active_cpu = i;
	continue;
	}

	/*		/*
	* Enforce EAS mode		* If utilization is the same between CPUs,
	*		* break the ties with WALT's cumulative
	* For non latency sensitive tasks, skip CPUs that		* demand
	* will be overutilized by moving the task there.
	*
	* The goal here is to remain in EAS mode as long as
	* possible at least for !prefer_idle tasks.
	*/		*/
	if ((new_util * capacity_margin) >		if (new_util == best_active_util &&
	(capacity_orig * SCHED_CAPACITY_SCALE))		new_util_cuml > best_active_cuml_util)
	continue;		continue;
			min_wake_util = wake_util;
	/*		best_active_util = new_util;
	* Favor CPUs with smaller capacity for non latency		best_active_cuml_util = new_util_cuml;
	* sensitive tasks.		best_active_cpu = i;
	*/
	if (capacity_orig > target_capacity)
	continue;		continue;
			}

	/*		/*
	* Case B) Non latency sensitive tasks on IDLE CPUs.		* Case B) Non latency sensitive tasks on IDLE CPUs.
	@@ -6804,17 +6967,30 @@ static void find_best_target(struct sched_domain sd, cpumask_t cpus,
	* IOW, prefer a deep IDLE LITTLE CPU vs a		* IOW, prefer a deep IDLE LITTLE CPU vs a
	* shallow idle big CPU.		* shallow idle big CPU.
	*/		*/
	if (capacity_orig == target_capacity &&		if (capacity_orig >= target_capacity &&
	sysctl_sched_cstate_aware &&		sysctl_sched_cstate_aware &&
	idle_idx >= shallowest_idle_cstate)		idle_idx > shallowest_idle_cstate)
			continue;

			if (shallowest_idle_cstate == idle_idx &&
			(best_idle_cpu == prev_cpu \|\|
			(i != prev_cpu &&
			new_util_cuml > best_idle_cuml_util)))
	continue;		continue;

	target_capacity = capacity_orig;		target_capacity = capacity_orig;
	shallowest_idle_cstate = idle_idx;		shallowest_idle_cstate = idle_idx;
			best_idle_cuml_util = new_util_cuml;
	best_idle_cpu = i;		best_idle_cpu = i;
	continue;		continue;
	}		}

			/*
			* Consider only idle CPUs for active migration.
			*/
			if (p->state == TASK_RUNNING)
			continue;

	/*		/*
	* Case C) Non latency sensitive tasks on ACTIVE CPUs.		* Case C) Non latency sensitive tasks on ACTIVE CPUs.
	*		*
	@@ -6835,6 +7011,8 @@ static void find_best_target(struct sched_domain sd, cpumask_t cpus,
	* capacity.		* capacity.
	*/		*/

			active_cpus_count++;

	/* Favor CPUs with maximum spare capacity */		/* Favor CPUs with maximum spare capacity */
	if (capacity_orig == target_capacity &&		if (capacity_orig == target_capacity &&
	spare_cap < target_max_spare_cap)		spare_cap < target_max_spare_cap)
	@@ -6846,7 +7024,45 @@ static void find_best_target(struct sched_domain sd, cpumask_t cpus,
	target_cpu = i;		target_cpu = i;
	}		}

	} while (sg = sg->next, sg != sd->groups);		next_group_higher_cap = (capacity_orig_of(group_first_cpu(sg)) <
			capacity_orig_of(group_first_cpu(sg->next)));

			/*
			* If we've found a cpu, but the boost is ON_ALL we continue
			* visiting other clusters. If the boost is ON_BIG we visit
			* next cluster if they are higher in capacity. If we are
			* not in any kind of boost, we break.
			*/
			if (!prefer_idle &&
			(target_cpu != -1 \|\| best_idle_cpu != -1) &&
			(fbt_env->placement_boost == SCHED_BOOST_NONE \|\|
			sched_boost() != FULL_THROTTLE_BOOST \|\|
			(fbt_env->placement_boost == SCHED_BOOST_ON_BIG &&
			!next_group_higher_cap)))
			break;

			/*
			* if we are in prefer_idle and have found an idle cpu,
			* break from searching more groups based on the stune.boost and
			* group cpu capacity.
			*/
			if (prefer_idle && best_idle_cpu != -1) {
			if (boosted) {
			if (!next_group_higher_cap)
			break;
			} else {
			if (next_group_higher_cap)
			break;
			}
			}

			} while (sg = sg->next, sg != start_sd->groups);

			if (best_idle_cpu != -1 && !is_packing_eligible(p, target_cpu, fbt_env,
			active_cpus_count, shallowest_idle_cstate)) {
			target_cpu = best_idle_cpu;
			best_idle_cpu = -1;
			}

	/*		/*
	* For non latency sensitive tasks, cases B and C in the previous loop,		* For non latency sensitive tasks, cases B and C in the previous loop,
	@@ -6883,6 +7099,28 @@ static void find_best_target(struct sched_domain sd, cpumask_t cpus,
	? best_active_cpu		? best_active_cpu
	: best_idle_cpu;		: best_idle_cpu;

			if (target_cpu == -1 && most_spare_cap_cpu != -1 &&
			/* ensure we use active cpu for active migration */
			!(p->state == TASK_RUNNING && !idle_cpu(most_spare_cap_cpu)))
			target_cpu = most_spare_cap_cpu;

			/*
			* The next step of energy evaluation includes
			* prev_cpu. Drop target or backup if it is
			* same as prev_cpu
			*/
			if (backup_cpu == prev_cpu)
			backup_cpu = -1;

			if (target_cpu == prev_cpu) {
			target_cpu = backup_cpu;
			backup_cpu = -1;
			}

			if (target_cpu == -1 && isolated_candidate != -1 &&
			cpu_isolated(prev_cpu))
			target_cpu = isolated_candidate;

	if (backup_cpu >= 0)		if (backup_cpu >= 0)
	cpumask_set_cpu(backup_cpu, cpus);		cpumask_set_cpu(backup_cpu, cpus);
	if (target_cpu >= 0) {		if (target_cpu >= 0) {
	@@ -6890,8 +7128,10 @@ static void find_best_target(struct sched_domain sd, cpumask_t cpus,
	cpumask_set_cpu(target_cpu, cpus);		cpumask_set_cpu(target_cpu, cpus);
	}		}

	trace_sched_find_best_target(p, prefer_idle, min_util, best_idle_cpu,		trace_sched_find_best_target(p, prefer_idle, min_util, start_cpu,
	best_active_cpu, target_cpu, backup_cpu);		best_idle_cpu, best_active_cpu,
			most_spare_cap_cpu,
			target_cpu, backup_cpu);
	}		}

	/*		/*
	@@ -6918,7 +7158,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
	/* Bring task utilization in sync with prev_cpu */		/* Bring task utilization in sync with prev_cpu */
	sync_entity_load_avg(&p->se);		sync_entity_load_avg(&p->se);

	return !task_fits_capacity(p, min_cap, cpu);		return !task_fits_max(p, cpu);
	}		}

	/*		/*
	@@ -7015,7 +7255,8 @@ static void select_max_spare_cap_cpus(struct sched_domain sd, cpumask_t cpus,
	/* Skip CPUs that will be overutilized. */		/* Skip CPUs that will be overutilized. */
	util = cpu_util_next(cpu, p, cpu);		util = cpu_util_next(cpu, p, cpu);
	cpu_cap = capacity_of(cpu);		cpu_cap = capacity_of(cpu);
	if (cpu_cap * 1024 < util * capacity_margin)		if (cpu_cap * 1024 <
			util * sched_capacity_margin_up[cpu])
	continue;		continue;

	/*		/*
	@@ -7034,6 +7275,49 @@ static void select_max_spare_cap_cpus(struct sched_domain sd, cpumask_t cpus,
	}		}
	}		}

			static inline int wake_to_idle(struct task_struct *p)
			{
			return (current->flags & PF_WAKE_UP_IDLE) \|\|
			(p->flags & PF_WAKE_UP_IDLE);
			}

			#ifdef CONFIG_SCHED_WALT
			static inline bool is_task_util_above_min_thresh(struct task_struct *p)
			{
			unsigned int threshold = (sysctl_sched_boost == CONSERVATIVE_BOOST) ?
			sysctl_sched_min_task_util_for_boost :
			sysctl_sched_min_task_util_for_colocation;

			return task_util(p) > threshold;
			}

			static inline struct cpumask find_rtg_target(struct task_struct p)
			{
			struct related_thread_group *grp;
			struct cpumask *rtg_target;

			rcu_read_lock();

			grp = task_related_thread_group(p);
			if (grp && grp->preferred_cluster && is_task_util_above_min_thresh(p)) {
			rtg_target = &grp->preferred_cluster->cpus;
			if (!task_fits_max(p, cpumask_first(rtg_target)))
			rtg_target = NULL;
			} else {
			rtg_target = NULL;
			}

			rcu_read_unlock();

			return rtg_target;
			}
			#else
			static inline struct cpumask find_rtg_target(struct task_struct p)
			{
			return NULL;
			}
			#endif

	static DEFINE_PER_CPU(cpumask_t, energy_cpus);		static DEFINE_PER_CPU(cpumask_t, energy_cpus);

	/*		/*
	@@ -7080,16 +7364,31 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sy
	{		{
	unsigned long prev_energy = ULONG_MAX, best_energy = ULONG_MAX;		unsigned long prev_energy = ULONG_MAX, best_energy = ULONG_MAX;
	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;		struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
	int weight, cpu, best_energy_cpu = prev_cpu;		int weight, cpu = smp_processor_id(), best_energy_cpu = prev_cpu;
	unsigned long cur_energy;		unsigned long cur_energy;
	struct perf_domain *pd;		struct perf_domain *pd;
	struct sched_domain *sd;		struct sched_domain *sd;
	cpumask_t *candidates;		cpumask_t *candidates;
			struct cpumask *rtg_target = find_rtg_target(p);
			struct find_best_target_env fbt_env;
			bool need_idle = wake_to_idle(p);
			int placement_boost = task_boost_policy(p);
			u64 start_t = 0;
			int delta = 0;

	if (sysctl_sched_sync_hint_enable && sync) {		fbt_env.fastpath = 0;
	cpu = smp_processor_id();
	if (cpumask_test_cpu(cpu, &p->cpus_allowed))		if (trace_sched_task_util_enabled())
	return cpu;		start_t = sched_clock();

			if (need_idle)
			sync = 0;

			if (sysctl_sched_sync_hint_enable && sync &&
			bias_to_waker_cpu(p, cpu, rtg_target)) {
			best_energy_cpu = cpu;
			fbt_env.fastpath = SYNC_WAKEUP;
			goto unlock;
	}		}

	rcu_read_lock();		rcu_read_lock();
	@@ -7115,10 +7414,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sy
	candidates = this_cpu_ptr(&energy_cpus);		candidates = this_cpu_ptr(&energy_cpus);
	cpumask_clear(candidates);		cpumask_clear(candidates);

	if (sched_feat(FIND_BEST_TARGET))		if (sched_feat(FIND_BEST_TARGET)) {
	find_best_target(sd, candidates, p);		fbt_env.rtg_target = rtg_target;
	else		fbt_env.placement_boost = placement_boost;
			fbt_env.need_idle = need_idle;

			find_best_target(NULL, candidates, p, &fbt_env);
			} else {
	select_max_spare_cap_cpus(sd, candidates, pd, p);		select_max_spare_cap_cpus(sd, candidates, pd, p);
			}

	/* Bail out if no candidate was found. */		/* Bail out if no candidate was found. */
	weight = cpumask_weight(candidates);		weight = cpumask_weight(candidates);
	@@ -7133,6 +7437,20 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sy
	goto unlock;		goto unlock;
	}		}

			#ifdef CONFIG_SCHED_WALT
			if (!walt_disabled && sysctl_sched_use_walt_cpu_util &&
			p->state == TASK_WAKING)
			delta = task_util(p);
			#endif
			if (task_placement_boost_enabled(p) \|\| need_idle \|\|
			(rtg_target && (!cpumask_test_cpu(prev_cpu, rtg_target) \|\|
			cpumask_test_cpu(cpu, rtg_target))) \|\|
			__cpu_overutilized(prev_cpu, delta) \|\|
			!task_fits_max(p, prev_cpu) \|\| cpu_isolated(prev_cpu)) {
			best_energy_cpu = cpu;
			goto unlock;
			}

	if (cpumask_test_cpu(prev_cpu, &p->cpus_allowed))		if (cpumask_test_cpu(prev_cpu, &p->cpus_allowed))
	prev_energy = best_energy = compute_energy(p, prev_cpu, pd);		prev_energy = best_energy = compute_energy(p, prev_cpu, pd);
	else		else
	@@ -7151,6 +7469,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sy
	unlock:		unlock:
	rcu_read_unlock();		rcu_read_unlock();

			trace_sched_task_util(p, best_energy_cpu, sync,
			need_idle, fbt_env.fastpath, placement_boost,
			rtg_target ? cpumask_first(rtg_target) : -1, start_t);

	/*		/*
	* Pick the best CPU if prev_cpu cannot be used, or if it saves at		* Pick the best CPU if prev_cpu cannot be used, or if it saves at
	* least 6% of the energy used by prev_cpu.		* least 6% of the energy used by prev_cpu.
	@@ -7191,6 +7513,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
	int want_affine = 0;		int want_affine = 0;
	int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);		int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);

			if (energy_aware()) {
			rcu_read_lock();
			new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync);
			rcu_read_unlock();
			return new_cpu;
			}

	if (sd_flag & SD_BALANCE_WAKE) {		if (sd_flag & SD_BALANCE_WAKE) {
	record_wakee(p);		record_wakee(p);

	@@ -8794,7 +9123,8 @@ group_is_overloaded(struct lb_env env, struct sg_lb_stats sgs)
	static inline bool		static inline bool
	group_smaller_min_cpu_capacity(struct sched_group sg, struct sched_group ref)		group_smaller_min_cpu_capacity(struct sched_group sg, struct sched_group ref)
	{		{
	return sg->sgc->min_capacity * capacity_margin <		return sg->sgc->min_capacity *
			sched_capacity_margin_up[group_first_cpu(sg)] <
	ref->sgc->min_capacity * 1024;		ref->sgc->min_capacity * 1024;
	}		}

	@@ -8805,7 +9135,8 @@ group_smaller_min_cpu_capacity(struct sched_group sg, struct sched_group ref)
	static inline bool		static inline bool
	group_smaller_max_cpu_capacity(struct sched_group sg, struct sched_group ref)		group_smaller_max_cpu_capacity(struct sched_group sg, struct sched_group ref)
	{		{
	return sg->sgc->max_capacity * capacity_margin <		return sg->sgc->max_capacity *
			sched_capacity_margin_up[group_first_cpu(sg)] <
	ref->sgc->max_capacity * 1024;		ref->sgc->max_capacity * 1024;
	}		}

kernel/sched/sched.h

+5 −0

Original line number	Original line	Diff line number	Diff line
	@@ -857,6 +857,11 @@ struct root_domain {
	* CPUs of the rd. Protected by RCU.		* CPUs of the rd. Protected by RCU.
	*/		*/
	struct perf_domain *pd;		struct perf_domain *pd;

			/* First cpu with maximum and minimum original capacity */
			int max_cap_orig_cpu, min_cap_orig_cpu;
			/* First cpu with mid capacity */
			int mid_cap_orig_cpu;
	};		};

	extern struct root_domain def_root_domain;		extern struct root_domain def_root_domain;

kernel/sched/topology.c

+29 −0

Original line number	Original line	Diff line number	Diff line
	@@ -525,6 +525,9 @@ static int init_rootdomain(struct root_domain *rd)
	if (cpupri_init(&rd->cpupri) != 0)		if (cpupri_init(&rd->cpupri) != 0)
	goto free_cpudl;		goto free_cpudl;

			rd->max_cap_orig_cpu = rd->min_cap_orig_cpu = -1;
			rd->mid_cap_orig_cpu = -1;

	init_max_cpu_capacity(&rd->max_cpu_capacity);		init_max_cpu_capacity(&rd->max_cpu_capacity);

	return 0;		return 0;
	@@ -2018,9 +2021,35 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att
	/* Attach the domains */		/* Attach the domains */
	rcu_read_lock();		rcu_read_lock();
	for_each_cpu(i, cpu_map) {		for_each_cpu(i, cpu_map) {
			int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu);
			int min_cpu = READ_ONCE(d.rd->min_cap_orig_cpu);

	sd = *per_cpu_ptr(d.sd, i);		sd = *per_cpu_ptr(d.sd, i);

			if ((max_cpu < 0) \|\| (cpu_rq(i)->cpu_capacity_orig >
			cpu_rq(max_cpu)->cpu_capacity_orig))
			WRITE_ONCE(d.rd->max_cap_orig_cpu, i);

			if ((min_cpu < 0) \|\| (cpu_rq(i)->cpu_capacity_orig <
			cpu_rq(min_cpu)->cpu_capacity_orig))
			WRITE_ONCE(d.rd->min_cap_orig_cpu, i);

	cpu_attach_domain(sd, d.rd, i);		cpu_attach_domain(sd, d.rd, i);
	}		}

			/* set the mid capacity cpu (assumes only 3 capacities) */
			for_each_cpu(i, cpu_map) {
			int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu);
			int min_cpu = READ_ONCE(d.rd->min_cap_orig_cpu);

			if ((cpu_rq(i)->cpu_capacity_orig
			!= cpu_rq(min_cpu)->cpu_capacity_orig) &&
			(cpu_rq(i)->cpu_capacity_orig
			!= cpu_rq(max_cpu)->cpu_capacity_orig)) {
			WRITE_ONCE(d.rd->mid_cap_orig_cpu, i);
			break;
			}
			}
	rcu_read_unlock();		rcu_read_unlock();

	if (has_asym)		if (has_asym)