ANDROID: Add find_best_target to minimise energy calculation overhead (f240e444) · Commits · e / devices / android_kernel_oneplus_sm8150

include/trace/events/sched.h

+42 −0

Original line number	Diff line number	Diff line
		@@ -879,6 +879,48 @@ TRACE_EVENT_CONDITION(sched_overutilized,
		__entry->overutilized ? 1 : 0, __entry->cpulist)
		);

		/*
		* Tracepoint for find_best_target
		*/
		TRACE_EVENT(sched_find_best_target,

		TP_PROTO(struct task_struct *tsk, bool prefer_idle,
		unsigned long min_util, int start_cpu,
		int best_idle, int best_active, int target),

		TP_ARGS(tsk, prefer_idle, min_util, start_cpu,
		best_idle, best_active, target),

		TP_STRUCT__entry(
		__array( char, comm, TASK_COMM_LEN )
		__field( pid_t, pid )
		__field( unsigned long, min_util )
		__field( bool, prefer_idle )
		__field( int, start_cpu )
		__field( int, best_idle )
		__field( int, best_active )
		__field( int, target )
		),

		TP_fast_assign(
		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
		__entry->pid = tsk->pid;
		__entry->min_util = min_util;
		__entry->prefer_idle = prefer_idle;
		__entry->start_cpu = start_cpu;
		__entry->best_idle = best_idle;
		__entry->best_active = best_active;
		__entry->target = target;
		),

		TP_printk("pid=%d comm=%s prefer_idle=%d start_cpu=%d "
		"best_idle=%d best_active=%d target=%d",
		__entry->pid, __entry->comm,
		__entry->prefer_idle, __entry->start_cpu,
		__entry->best_idle, __entry->best_active,
		__entry->target)
		);

		#endif /* CONFIG_SMP */
		#endif /* _TRACE_SCHED_H */

kernel/sched/fair.c

+351 −19

Original line number	Diff line number	Diff line
		@@ -5869,6 +5869,7 @@ static inline bool cpu_in_sg(struct sched_group *sg, int cpu)
		*/
		static inline int select_energy_cpu_idx(struct energy_env *eenv)
		{
		int last_cpu_idx = eenv->max_cpu_count - 1;
		struct sched_domain *sd;
		struct sched_group *sg;
		int sd_cpu = -1;
		@@ -5920,21 +5921,27 @@ static inline int select_energy_cpu_idx(struct energy_env *eenv)
		* Compare the other CPU candidates to find a CPU which can be
		* more energy efficient then EAS_CPU_PRV
		*/
		for (cpu_idx = EAS_CPU_NXT; cpu_idx < eenv->max_cpu_count; cpu_idx++) {
		/* Skip not valid scheduled candidates */
		if (sched_feat(FBT_STRICT_ORDER))
		last_cpu_idx = EAS_CPU_BKP;

		for(cpu_idx = EAS_CPU_NXT; cpu_idx <= last_cpu_idx; cpu_idx++) {
		if (eenv->cpu[cpu_idx].cpu_id < 0)
		continue;
		/* Compute energy delta wrt EAS_CPU_PRV */
		eenv->cpu[cpu_idx].nrg_delta =
		eenv->cpu[cpu_idx].energy -
		eenv->cpu[EAS_CPU_PRV].energy;

		/* filter energy variations within the dead-zone margin */
		if (abs(eenv->cpu[cpu_idx].nrg_delta) < margin)
		eenv->cpu[cpu_idx].nrg_delta = 0;
		/* update the schedule candidate with min(nrg_delta) */
		if (eenv->cpu[cpu_idx].nrg_delta <
		eenv->cpu[eenv->next_idx].nrg_delta)
		eenv->cpu[eenv->next_idx].nrg_delta) {
		eenv->next_idx = cpu_idx;
		/* break out if we want to stop on first saving candidate */
		if (sched_feat(FBT_STRICT_ORDER))
		break;
		}
		}

		return eenv->next_idx;
		@@ -6704,6 +6711,283 @@ static inline int task_fits_capacity(struct task_struct *p, long capacity)
		return capacity * 1024 > boosted_task_util(p) * capacity_margin;
		}

		static int start_cpu(bool boosted)
		{
		struct root_domain *rd = cpu_rq(smp_processor_id())->rd;

		return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu;
		}

		static inline int find_best_target(struct task_struct p, int backup_cpu,
		bool boosted, bool prefer_idle)
		{
		unsigned long best_idle_min_cap_orig = ULONG_MAX;
		unsigned long min_util = boosted_task_util(p);
		unsigned long target_capacity = ULONG_MAX;
		unsigned long min_wake_util = ULONG_MAX;
		unsigned long target_max_spare_cap = 0;
		unsigned long target_util = ULONG_MAX;
		unsigned long best_active_util = ULONG_MAX;
		int best_idle_cstate = INT_MAX;
		struct sched_domain *sd;
		struct sched_group *sg;
		int best_active_cpu = -1;
		int best_idle_cpu = -1;
		int target_cpu = -1;
		int cpu, i;

		*backup_cpu = -1;

		/* Find start CPU based on boost value */
		cpu = start_cpu(boosted);
		if (cpu < 0)
		return -1;

		/* Find SD for the start CPU */
		sd = rcu_dereference(per_cpu(sd_ea, cpu));
		if (!sd)
		return -1;

		/* Scan CPUs in all SDs */
		sg = sd->groups;
		do {
		for_each_cpu_and(i, &p->cpus_allowed, sched_group_span(sg)) {
		unsigned long capacity_curr = capacity_curr_of(i);
		unsigned long capacity_orig = capacity_orig_of(i);
		unsigned long wake_util, new_util;

		if (!cpu_online(i))
		continue;

		/*
		* p's blocked utilization is still accounted for on prev_cpu
		* so prev_cpu will receive a negative bias due to the double
		* accounting. However, the blocked utilization may be zero.
		*/
		wake_util = cpu_util_wake(i, p);
		new_util = wake_util + task_util(p);

		/*
		* Ensure minimum capacity to grant the required boost.
		* The target CPU can be already at a capacity level higher
		* than the one required to boost the task.
		*/
		new_util = max(min_util, new_util);
		if (new_util > capacity_orig)
		continue;

		/*
		* Case A) Latency sensitive tasks
		*
		* Unconditionally favoring tasks that prefer idle CPU to
		* improve latency.
		*
		* Looking for:
		* - an idle CPU, whatever its idle_state is, since
		* the first CPUs we explore are more likely to be
		* reserved for latency sensitive tasks.
		* - a non idle CPU where the task fits in its current
		* capacity and has the maximum spare capacity.
		* - a non idle CPU with lower contention from other
		* tasks and running at the lowest possible OPP.
		*
		* The last two goals tries to favor a non idle CPU
		* where the task can run as if it is "almost alone".
		* A maximum spare capacity CPU is favoured since
		* the task already fits into that CPU's capacity
		* without waiting for an OPP chance.
		*
		* The following code path is the only one in the CPUs
		* exploration loop which is always used by
		* prefer_idle tasks. It exits the loop with wither a
		* best_active_cpu or a target_cpu which should
		* represent an optimal choice for latency sensitive
		* tasks.
		*/
		if (prefer_idle) {

		/*
		* Case A.1: IDLE CPU
		* Return the first IDLE CPU we find.
		*/
		if (idle_cpu(i)) {
		trace_sched_find_best_target(p,
		prefer_idle, min_util,
		cpu, best_idle_cpu,
		best_active_cpu, i);

		return i;
		}

		/*
		* Case A.2: Target ACTIVE CPU
		* Favor CPUs with max spare capacity.
		*/
		if ((capacity_curr > new_util) &&
		(capacity_orig - new_util > target_max_spare_cap)) {
		target_max_spare_cap = capacity_orig - new_util;
		target_cpu = i;
		continue;
		}
		if (target_cpu != -1)
		continue;


		/*
		* Case A.3: Backup ACTIVE CPU
		* Favor CPUs with:
		* - lower utilization due to other tasks
		* - lower utilization with the task in
		*/
		if (wake_util > min_wake_util)
		continue;
		if (new_util > best_active_util)
		continue;
		min_wake_util = wake_util;
		best_active_util = new_util;
		best_active_cpu = i;
		continue;
		}

		/*
		* Enforce EAS mode
		*
		* For non latency sensitive tasks, skip CPUs that
		* will be overutilized by moving the task there.
		*
		* The goal here is to remain in EAS mode as long as
		* possible at least for !prefer_idle tasks.
		*/
		if ((new_util * capacity_margin) >
		(capacity_orig * SCHED_CAPACITY_SCALE))
		continue;

		/*
		* Case B) Non latency sensitive tasks on IDLE CPUs.
		*
		* Find an optimal backup IDLE CPU for non latency
		* sensitive tasks.
		*
		* Looking for:
		* - minimizing the capacity_orig,
		* i.e. preferring LITTLE CPUs
		* - favoring shallowest idle states
		* i.e. avoid to wakeup deep-idle CPUs
		*
		* The following code path is used by non latency
		* sensitive tasks if IDLE CPUs are available. If at
		* least one of such CPUs are available it sets the
		* best_idle_cpu to the most suitable idle CPU to be
		* selected.
		*
		* If idle CPUs are available, favour these CPUs to
		* improve performances by spreading tasks.
		* Indeed, the energy_diff() computed by the caller
		* will take care to ensure the minimization of energy
		* consumptions without affecting performance.
		*/
		if (idle_cpu(i)) {
		int idle_idx = idle_get_state_idx(cpu_rq(i));

		/* Select idle CPU with lower cap_orig */
		if (capacity_orig > best_idle_min_cap_orig)
		continue;

		/*
		* Skip CPUs in deeper idle state, but only
		* if they are also less energy efficient.
		* IOW, prefer a deep IDLE LITTLE CPU vs a
		* shallow idle big CPU.
		*/
		if (sysctl_sched_cstate_aware &&
		best_idle_cstate <= idle_idx)
		continue;

		/* Keep track of best idle CPU */
		best_idle_min_cap_orig = capacity_orig;
		best_idle_cstate = idle_idx;
		best_idle_cpu = i;
		continue;
		}

		/*
		* Case C) Non latency sensitive tasks on ACTIVE CPUs.
		*
		* Pack tasks in the most energy efficient capacities.
		*
		* This task packing strategy prefers more energy
		* efficient CPUs (i.e. pack on smaller maximum
		* capacity CPUs) while also trying to spread tasks to
		* run them all at the lower OPP.
		*
		* This assumes for example that it's more energy
		* efficient to run two tasks on two CPUs at a lower
		* OPP than packing both on a single CPU but running
		* that CPU at an higher OPP.
		*
		* Thus, this case keep track of the CPU with the
		* smallest maximum capacity and highest spare maximum
		* capacity.
		*/

		/* Favor CPUs with smaller capacity */
		if (capacity_orig > target_capacity)
		continue;

		/* Favor CPUs with maximum spare capacity */
		if ((capacity_orig - new_util) < target_max_spare_cap)
		continue;

		target_max_spare_cap = capacity_orig - new_util;
		target_capacity = capacity_orig;
		target_util = new_util;
		target_cpu = i;
		}

		} while (sg = sg->next, sg != sd->groups);

		/*
		* For non latency sensitive tasks, cases B and C in the previous loop,
		* we pick the best IDLE CPU only if we was not able to find a target
		* ACTIVE CPU.
		*
		* Policies priorities:
		*
		* - prefer_idle tasks:
		*
		* a) IDLE CPU available, we return immediately
		* b) ACTIVE CPU where task fits and has the bigger maximum spare
		* capacity (i.e. target_cpu)
		* c) ACTIVE CPU with less contention due to other tasks
		* (i.e. best_active_cpu)
		*
		* - NON prefer_idle tasks:
		*
		* a) ACTIVE CPU: target_cpu
		* b) IDLE CPU: best_idle_cpu
		*/
		if (target_cpu == -1)
		target_cpu = prefer_idle
		? best_active_cpu
		: best_idle_cpu;
		else
		*backup_cpu = prefer_idle
		? best_active_cpu
		: best_idle_cpu;

		trace_sched_find_best_target(p, prefer_idle, min_util, cpu,
		best_idle_cpu, best_active_cpu,
		target_cpu);

		/* it is possible for target and backup
		* to select same CPU - if so, drop backup
		*/
		if (*backup_cpu == target_cpu)
		*backup_cpu = -1;

		return target_cpu;
		}

		/*
		* Disable WAKE_AFFINE in the case where task @p doesn't fit in the
		* capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
		@@ -6814,6 +7098,7 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
		int cpu, int prev_cpu,
		int sync)
		{
		int use_fbt = sched_feat(FIND_BEST_TARGET);
		int cpu_iter, eas_cpu_idx = EAS_CPU_NXT;
		int energy_cpu = prev_cpu;
		struct energy_env *eenv;
		@@ -6829,6 +7114,14 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
		if (eenv->max_cpu_count < 2)
		return energy_cpu;

		if(!use_fbt) {
		/*
		* using this function outside wakeup balance will not supply
		* an sd ptr. Instead, fetch the highest level with energy data.
		*/
		if (!sd)
		sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));

		for_each_cpu_and(cpu_iter, &p->cpus_allowed, sched_domain_span(sd)) {
		unsigned long spare;

		@@ -6849,6 +7142,42 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
		if (eas_cpu_idx >= eenv->eenv_cpu_count)
		break;
		}
		} else {
		int boosted = (schedtune_task_boost(p) > 0);
		int prefer_idle;

		/*
		* give compiler a hint that if sched_features
		* cannot be changed, it is safe to optimise out
		* all if(prefer_idle) blocks.
		*/
		prefer_idle = sched_feat(EAS_PREFER_IDLE) ?
		(schedtune_prefer_idle(p) > 0) : 0;

		eenv->max_cpu_count = EAS_CPU_BKP+1;

		/* Find a cpu with sufficient capacity */
		eenv->cpu[EAS_CPU_NXT].cpu_id = find_best_target(p,
		&eenv->cpu[EAS_CPU_BKP].cpu_id,
		boosted, prefer_idle);

		/* take note if no backup was found */
		if (eenv->cpu[EAS_CPU_BKP].cpu_id < 0)
		eenv->max_cpu_count = EAS_CPU_BKP;

		/* take note if no target was found */
		if (eenv->cpu[EAS_CPU_NXT].cpu_id < 0)
		eenv->max_cpu_count = EAS_CPU_NXT;
		}

		if (eenv->max_cpu_count == EAS_CPU_NXT) {
		/*
		* we did not find any energy-awareness
		* candidates beyond prev_cpu, so we will
		* fall-back to the regular slow-path.
		*/
		return energy_cpu;
		}

		/* find most energy-efficient CPU */
		eas_cpu_idx = select_energy_cpu_idx(eenv);
		@@ -6894,8 +7223,11 @@ static inline int wake_energy(struct task_struct *p, int prev_cpu,
		* we cannot do energy-aware wakeup placement sensibly
		* for tasks with 0 utilization, so let them be placed
		* according to the normal strategy.
		* However if fbt is in use we may still benefit from
		* the heuristics we use there in selecting candidate
		* CPUs.
		*/
		if (unlikely(!task_util(p)))
		if (unlikely(!sched_feat(FIND_BEST_TARGET) && !task_util(p)))
		return false;

		if(!sched_feat(EAS_PREFER_IDLE)){

kernel/sched/features.h

+9 −0

Original line number	Diff line number	Diff line
		@@ -102,5 +102,14 @@ SCHED_FEAT(ENERGY_AWARE, false)
		* Direct tasks in a schedtune.prefer_idle=1 group through
		* the EAS path for wakeup task placement. Otherwise, put
		* those tasks through the mainline slow path.
		* FIND_BEST_TARGET
		* Limit the number of placement options for which we calculate
		* energy by using heuristics to select 'best idle' and
		* 'best active' cpu options.
		* FBT_STRICT_ORDER
		* ON: If the target CPU saves any energy, use that.
		* OFF: Use whichever of target or backup saves most.
		*/
		SCHED_FEAT(EAS_PREFER_IDLE, true)
		SCHED_FEAT(FIND_BEST_TARGET, true)
		SCHED_FEAT(FBT_STRICT_ORDER, true)

kernel/sched/sched.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -649,6 +649,9 @@ struct root_domain {
		struct cpupri cpupri;

		unsigned long max_cpu_capacity;

		/* First cpu with maximum and minimum original capacity */
		int max_cap_orig_cpu, min_cap_orig_cpu;
		};

		extern struct root_domain def_root_domain;

kernel/sched/topology.c

+14 −0

Original line number	Diff line number	Diff line
		@@ -282,6 +282,9 @@ static int init_rootdomain(struct root_domain *rd)

		if (cpupri_init(&rd->cpupri) != 0)
		goto free_cpudl;

		rd->max_cap_orig_cpu = rd->min_cap_orig_cpu = -1;

		return 0;

		free_cpudl:
		@@ -1806,6 +1809,9 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att
		/* Attach the domains */
		rcu_read_lock();
		for_each_cpu(i, cpu_map) {
		int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu);
		int min_cpu = READ_ONCE(d.rd->min_cap_orig_cpu);

		rq = cpu_rq(i);
		sd = *per_cpu_ptr(d.sd, i);

		@@ -1813,6 +1819,14 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att
		if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
		WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);

		if ((max_cpu < 0) \|\| (cpu_rq(i)->cpu_capacity_orig >
		cpu_rq(max_cpu)->cpu_capacity_orig))
		WRITE_ONCE(d.rd->max_cap_orig_cpu, i);

		if ((min_cpu < 0) \|\| (cpu_rq(i)->cpu_capacity_orig <
		cpu_rq(min_cpu)->cpu_capacity_orig))
		WRITE_ONCE(d.rd->min_cap_orig_cpu, i);

		cpu_attach_domain(sd, d.rd, i);
		}
		rcu_read_unlock();