ANDROID: sched/fair: re-factor energy_diff to use a single (extensible) energy_env (60664914) · Commits · e / devices / android_kernel_xiaomi_nabu

kernel/sched/fair.c

+308 −158

Original line number	Diff line number	Diff line
		@@ -5504,30 +5504,6 @@ static inline bool energy_aware(void)
		return sched_feat(ENERGY_AWARE);
		}

		struct energy_env {
		struct sched_group *sg_top;
		struct sched_group *sg_cap;
		int cap_idx;
		int util_delta;
		int src_cpu;
		int dst_cpu;
		int trg_cpu;
		int energy;
		int payoff;
		struct task_struct *task;
		struct {
		int before;
		int after;
		int delta;
		int diff;
		} nrg;
		struct {
		int before;
		int after;
		int delta;
		} cap;
		};

		static int cpu_util_wake(int cpu, struct task_struct *p);

		/*
		@@ -5552,22 +5528,88 @@ static unsigned long __cpu_norm_util(unsigned long util, unsigned long capacity)
		return (util << SCHED_CAPACITY_SHIFT)/capacity;
		}

		static unsigned long group_max_util(struct energy_env *eenv)
		/*
		* CPU candidates.
		*
		* These are labels to reference CPU candidates for an energy_diff.
		* Currently we support only two possible candidates: the task's previous CPU
		* and another candiate CPU.
		* More advanced/aggressive EAS selection policies can consider more
		* candidates.
		*/
		#define EAS_CPU_PRV 0
		#define EAS_CPU_NXT 1
		#define EAS_CPU_BKP 2

		/*
		* energy_diff - supports the computation of the estimated energy impact in
		* moving a "task"'s "util_delta" between different CPU candidates.
		*/
		struct eenv_cpu {
		/* CPU ID, must be in cpus_mask */
		int cpu_id;

		/*
		* Index (into sched_group_energy::cap_states) of the OPP the
		* CPU needs to run at if the task is placed on it.
		* This includes the both active and blocked load, due to
		* other tasks on this CPU, as well as the task's own
		* utilization.
		*/
		int cap_idx;
		int cap;

		/* Estimated system energy */
		unsigned long energy;

		/* Estimated energy variation wrt EAS_CPU_PRV */
		long nrg_delta;
		};

		struct energy_env {
		/* Utilization to move */
		struct task_struct *p;
		unsigned long util_delta;
		unsigned long util_delta_boosted;

		/* Mask of CPUs candidates to evaluate */
		cpumask_t cpus_mask;

		/* CPU candidates to evaluate */
		struct eenv_cpu *cpu;
		int eenv_cpu_count;

		/*
		* Index (into energy_env::cpu) of the morst energy efficient CPU for
		* the specified energy_env::task
		*/
		int next_idx;
		int max_cpu_count;

		/* Support data */
		struct sched_group *sg_top;
		struct sched_group *sg_cap;
		struct sched_group *sg;
		};

		static int cpu_util_wake(int cpu, struct task_struct *p);

		static unsigned long group_max_util(struct energy_env *eenv, int cpu_idx)
		{
		unsigned long max_util = 0;
		unsigned long util;
		int cpu;

		for_each_cpu(cpu, sched_group_span(eenv->sg_cap)) {
		util = cpu_util_wake(cpu, eenv->task);
		util = cpu_util_wake(cpu, eenv->p);

		/*
		* If we are looking at the target CPU specified by the eenv,
		* then we should add the (estimated) utilization of the task
		* assuming we will wake it up on that CPU.
		*/
		if (unlikely(cpu == eenv->trg_cpu))
		util += eenv->util_delta;
		if (unlikely(cpu == eenv->cpu[cpu_idx].cpu_id))
		util += eenv->util_delta_boosted;

		max_util = max(max_util, util);
		}
		@@ -5585,21 +5627,21 @@ static unsigned long group_max_util(struct energy_env *eenv)
		* estimate (more busy).
		*/
		static unsigned
		long group_norm_util(struct energy_env eenv, struct sched_group sg)
		long group_norm_util(struct energy_env *eenv, int cpu_idx)
		{
		unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap;
		unsigned long capacity = eenv->cpu[cpu_idx].cap;
		unsigned long util, util_sum = 0;
		int cpu;

		for_each_cpu(cpu, sched_group_span(sg)) {
		util = cpu_util_wake(cpu, eenv->task);
		for_each_cpu(cpu, sched_group_span(eenv->sg)) {
		util = cpu_util_wake(cpu, eenv->p);

		/*
		* If we are looking at the target CPU specified by the eenv,
		* then we should add the (estimated) utilization of the task
		* assuming we will wake it up on that CPU.
		*/
		if (unlikely(cpu == eenv->trg_cpu))
		if (unlikely(cpu == eenv->cpu[cpu_idx].cpu_id))
		util += eenv->util_delta;

		util_sum += __cpu_norm_util(util, capacity);
		@@ -5610,26 +5652,30 @@ long group_norm_util(struct energy_env eenv, struct sched_group sg)
		return util_sum;
		}

		static int find_new_capacity(struct energy_env *eenv,
		const struct sched_group_energy const *sge)
		static int find_new_capacity(struct energy_env *eenv, int cpu_idx)
		{
		int idx;
		unsigned long util = group_max_util(eenv);
		const struct sched_group_energy *sge = eenv->sg_cap->sge;
		unsigned long util = group_max_util(eenv, cpu_idx);
		int idx, cap_idx;

		eenv->cap_idx = sge->nr_cap_states - 1;
		cap_idx = sge->nr_cap_states - 1;

		for (idx = 0; idx < sge->nr_cap_states; idx++) {
		if (sge->cap_states[idx].cap >= util) {
		eenv->cap_idx = idx;
		cap_idx = idx;
		break;
		}
		}
		/* Keep track of SG's capacity */
		eenv->cpu[cpu_idx].cap = sge->cap_states[cap_idx].cap;
		eenv->cpu[cpu_idx].cap_idx = cap_idx;

		return eenv->cap_idx;
		return cap_idx;
		}

		static int group_idle_state(struct energy_env eenv, struct sched_group sg)
		static int group_idle_state(struct energy_env *eenv, int cpu_idx)
		{
		struct sched_group *sg = eenv->sg;
		int src_in_grp, dst_in_grp;
		int i, state = INT_MAX;
		int max_idle_state_idx;
		@@ -5649,8 +5695,10 @@ static int group_idle_state(struct energy_env eenv, struct sched_group sg)
		for_each_cpu(i, sched_group_span(sg))
		grp_util += cpu_util(i);

		src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_span(sg));
		dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_span(sg));
		src_in_grp = cpumask_test_cpu(eenv->cpu[EAS_CPU_PRV].cpu_id,
		sched_group_span(sg));
		dst_in_grp = cpumask_test_cpu(eenv->cpu[cpu_idx].cpu_id,
		sched_group_span(sg));
		if (src_in_grp == dst_in_grp) {
		/*
		* both CPUs under consideration are in the same group or not in
		@@ -5697,19 +5745,53 @@ static int group_idle_state(struct energy_env eenv, struct sched_group sg)
		}

		/*
		* sched_group_energy(): Computes the absolute energy consumption of cpus
		* belonging to the sched_group including shared resources shared only by
		* members of the group. Iterates over all cpus in the hierarchy below the
		* sched_group starting from the bottom working it's way up before going to
		* the next cpu until all cpus are covered at all levels. The current
		* implementation is likely to gather the same util statistics multiple times.
		* This can probably be done in a faster but more complex way.
		* Note: sched_group_energy() may fail when racing with sched_domain updates.
		* calc_sg_energy: compute energy for the eenv's SG (i.e. eenv->sg).
		*
		* This works in iterations to compute the SG's energy for each CPU
		* candidate defined by the energy_env's cpu array.
		*/
		static int sched_group_energy(struct energy_env *eenv)
		static void calc_sg_energy(struct energy_env *eenv)
		{
		struct sched_group *sg = eenv->sg;
		unsigned long busy_energy, idle_energy;
		unsigned int busy_power, idle_power;
		unsigned long total_energy = 0;
		unsigned long sg_util;
		int cap_idx, idle_idx;
		int cpu_idx;

		for (cpu_idx = EAS_CPU_PRV; cpu_idx < eenv->max_cpu_count; ++cpu_idx) {
		if (eenv->cpu[cpu_idx].cpu_id == -1)
		continue;

		/* Compute ACTIVE energy */
		cap_idx = find_new_capacity(eenv, cpu_idx);
		busy_power = sg->sge->cap_states[cap_idx].power;
		sg_util = group_norm_util(eenv, cpu_idx);
		busy_energy = sg_util * busy_power;

		/* Compute IDLE energy */
		idle_idx = group_idle_state(eenv, cpu_idx);
		idle_power = sg->sge->idle_states[idle_idx].power;
		idle_energy = SCHED_CAPACITY_SCALE - sg_util;
		idle_energy *= idle_power;

		total_energy = busy_energy + idle_energy;
		eenv->cpu[cpu_idx].energy += total_energy;
		}
		}

		/*
		* compute_energy() computes the absolute variation in energy consumption by
		* moving eenv.util_delta from EAS_CPU_PRV to EAS_CPU_NXT.
		*
		* NOTE: compute_energy() may fail when racing with sched_domain updates, in
		* which case we abort by returning -EINVAL.
		*/
		static int compute_energy(struct energy_env *eenv)
		{
		struct sched_domain *sd;
		int cpu, total_energy = 0;
		int cpu;
		struct cpumask visit_cpus;
		struct sched_group *sg;

		@@ -5727,16 +5809,7 @@ static int sched_group_energy(struct energy_env *eenv)
		* sched_group?
		*/
		sd = rcu_dereference(per_cpu(sd_scs, cpu));

		if (!sd)
		/*
		* We most probably raced with hotplug; returning a
		* wrong energy estimation is better than entering an
		* infinite loop.
		*/
		return -EINVAL;

		if (sd->parent)
		if (sd && sd->parent)
		sg_shared_cap = sd->parent->groups;

		for_each_domain(cpu, sd) {
		@@ -5747,26 +5820,18 @@ static int sched_group_energy(struct energy_env *eenv)
		break;

		do {
		unsigned long group_util;
		int sg_busy_energy, sg_idle_energy;
		int cap_idx, idle_idx;

		eenv->sg_cap = sg;
		if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight)
		eenv->sg_cap = sg_shared_cap;
		else
		eenv->sg_cap = sg;

		cap_idx = find_new_capacity(eenv, sg->sge);
		idle_idx = group_idle_state(eenv, sg);
		group_util = group_norm_util(eenv, sg);
		sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power)
		>> SCHED_CAPACITY_SHIFT;
		sg_idle_energy = ((SCHED_CAPACITY_SCALE-group_util)
		* sg->sge->idle_states[idle_idx].power)
		>> SCHED_CAPACITY_SHIFT;

		total_energy += sg_busy_energy + sg_idle_energy;
		/*
		* Compute the energy for all the candidate
		* CPUs in the current visited SG.
		*/
		eenv->sg = sg;
		calc_sg_energy(eenv);

		/* remove CPUs we have just visited */
		if (!sd->child)
		cpumask_xor(&visit_cpus, &visit_cpus, sched_group_span(sg));

		@@ -5779,7 +5844,6 @@ static int sched_group_energy(struct energy_env *eenv)
		continue;
		}

		eenv->energy = total_energy;
		return 0;
		}

		@@ -5788,72 +5852,92 @@ static inline bool cpu_in_sg(struct sched_group *sg, int cpu)
		return cpu != -1 && cpumask_test_cpu(cpu, sched_group_span(sg));
		}

		static inline unsigned long task_util(struct task_struct *p);

		/*
		* __energy_diff(): Estimate the energy impact of changing the utilization
		* distribution. eenv specifies the change: utilisation amount, source, and
		* destination cpu. Source or destination cpu may be -1 in which case the
		* utilization is removed from or added to the system (e.g. task wake-up). If
		* both are specified, the utilization is migrated.
		* select_energy_cpu_idx(): estimate the energy impact of changing the
		* utilization distribution.
		*
		* The eenv parameter specifies the changes: utilisation amount and a pair of
		* possible CPU candidates (the previous CPU and a different target CPU).
		*
		* This function returns the index of a CPU candidate specified by the
		* energy_env which corresponds to the most energy_efficient CPU.
		* Thus, 0 (EAS_CPU_PRV) means that non of the CPU candidate is more energy
		* efficient than running on prev_cpu. This is also the value returned in case
		* of abort due to error conditions during the computations.
		* A value greater than zero means that the most energy efficient CPU is the
		* one represented by eenv->cpu[eenv->next_idx].cpu_id.
		*/
		static int energy_diff(struct energy_env *eenv)
		static inline int select_energy_cpu_idx(struct energy_env *eenv)
		{
		struct sched_domain *sd;
		struct sched_group *sg;
		int sd_cpu = -1, energy_before = 0, energy_after = 0;
		int sd_cpu = -1;
		int cpu_idx;
		int margin;

		struct energy_env eenv_before = {
		.util_delta = task_util(eenv->task),
		.src_cpu = eenv->src_cpu,
		.dst_cpu = eenv->dst_cpu,
		.nrg = { 0, 0, 0, 0},
		.cap = { 0, 0, 0 },
		.trg_cpu = eenv->src_cpu,
		.task = eenv->task,
		};

		if (eenv->src_cpu == eenv->dst_cpu)
		return 0;

		sd_cpu = (eenv->src_cpu != -1) ? eenv->src_cpu : eenv->dst_cpu;
		sd_cpu = eenv->cpu[EAS_CPU_PRV].cpu_id;
		sd = rcu_dereference(per_cpu(sd_ea, sd_cpu));

		if (!sd)
		return 0; /* Error */
		return EAS_CPU_PRV;

		sg = sd->groups;
		cpumask_clear(&eenv->cpus_mask);
		for (cpu_idx = EAS_CPU_PRV; cpu_idx < eenv->max_cpu_count; ++cpu_idx) {
		int cpu = eenv->cpu[cpu_idx].cpu_id;

		do {
		if (cpu_in_sg(sg, eenv->src_cpu) \|\| cpu_in_sg(sg, eenv->dst_cpu)) {
		eenv_before.sg_top = eenv->sg_top = sg;
		if (cpu < 0)
		continue;
		cpumask_set_cpu(cpu, &eenv->cpus_mask);
		}

		if (sched_group_energy(&eenv_before))
		return 0; /* Invalid result abort */
		energy_before += eenv_before.energy;
		sg = sd->groups;
		do {
		/* Skip SGs which do not contains a candidate CPU */
		if (!cpumask_intersects(&eenv->cpus_mask, sched_group_span(sg)))
		continue;

		if (sched_group_energy(eenv))
		return 0; /* Invalid result abort */
		energy_after += eenv->energy;
		}
		eenv->sg_top = sg;
		if (compute_energy(eenv) == -EINVAL)
		return EAS_CPU_PRV;
		} while (sg = sg->next, sg != sd->groups);

		eenv->nrg.after = energy_after;
		eenv->nrg.before = energy_before;
		/* remember - eenv energy values are unscaled */

		/*
		* Dead-zone margin preventing too many migrations.
		* Compute the dead-zone margin used to prevent too many task
		* migrations with negligible energy savings.
		* An energy saving is considered meaningful if it reduces the energy
		* consumption of EAS_CPU_PRV CPU candidate by at least ~1.56%
		*/
		margin = eenv->cpu[EAS_CPU_PRV].energy >> 6;

		margin = energy_before >> 6; /* ~1.56% */

		eenv->nrg.diff = energy_after-energy_before;
		/*
		* By default the EAS_CPU_PRV CPU is considered the most energy
		* efficient, with a 0 energy variation.
		*/
		eenv->next_idx = EAS_CPU_PRV;
		eenv->cpu[EAS_CPU_PRV].nrg_delta = 0;

		if (abs(eenv->nrg.diff) < margin)
		return 0;
		/*
		* Compare the other CPU candidates to find a CPU which can be
		* more energy efficient then EAS_CPU_PRV
		*/
		for (cpu_idx = EAS_CPU_NXT; cpu_idx < eenv->max_cpu_count; cpu_idx++) {
		/* Skip not valid scheduled candidates */
		if (eenv->cpu[cpu_idx].cpu_id < 0)
		continue;
		/* Compute energy delta wrt EAS_CPU_PRV */
		eenv->cpu[cpu_idx].nrg_delta =
		eenv->cpu[cpu_idx].energy -
		eenv->cpu[EAS_CPU_PRV].energy;
		/* filter energy variations within the dead-zone margin */
		if (abs(eenv->cpu[cpu_idx].nrg_delta) < margin)
		eenv->cpu[cpu_idx].nrg_delta = 0;
		/* update the schedule candidate with min(nrg_delta) */
		if (eenv->cpu[cpu_idx].nrg_delta <
		eenv->cpu[eenv->next_idx].nrg_delta)
		eenv->next_idx = cpu_idx;
		}

		return eenv->nrg.diff;
		return eenv->next_idx;
		}

		/*
		@@ -5970,7 +6054,7 @@ static int wake_affine(struct sched_domain sd, struct task_struct p,
		return affine;
		}

		static inline int task_util(struct task_struct *p);
		static inline unsigned long task_util(struct task_struct *p);

		#ifdef CONFIG_SCHED_TUNE
		struct reciprocal_value schedtune_spc_rdiv;
		@@ -6592,7 +6676,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
		return select_idle_sibling_cstate_aware(p, prev, target);
		}

		static inline int task_util(struct task_struct *p)
		static inline unsigned long task_util(struct task_struct *p)
		{
		return p->se.avg.util_avg;
		}
		@@ -6617,7 +6701,7 @@ static int cpu_util_wake(int cpu, struct task_struct *p)

		static inline int task_fits_capacity(struct task_struct *p, long capacity)
		{
		return capacity * 1024 > task_util(p) * capacity_margin;
		return capacity * 1024 > boosted_task_util(p) * capacity_margin;
		}

		/*
		@@ -6649,6 +6733,77 @@ static bool cpu_overutilized(int cpu)
		return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);
		}

		DEFINE_PER_CPU(struct energy_env, eenv_cache);

		/* kernels often have NR_CPUS defined to be much
		* larger than exist in practise on booted systems.
		* Allocate the cpu array for eenv calculations
		* at boot time to avoid massive overprovisioning.
		*/
		static inline void alloc_eenv(void)
		{
		int cpu;
		int cpu_count = num_possible_cpus();

		for_each_possible_cpu(cpu) {
		struct energy_env *eenv = &per_cpu(eenv_cache, cpu);
		eenv->cpu = kmalloc(sizeof(struct eenv_cpu) * cpu_count, GFP_KERNEL);
		eenv->eenv_cpu_count = cpu_count;
		}
		}

		static inline void reset_eenv(struct energy_env *eenv)
		{
		int cpu_count;
		struct eenv_cpu *cpu;

		cpu_count = eenv->eenv_cpu_count;
		cpu = eenv->cpu;
		memset(eenv, 0, sizeof(struct energy_env));
		eenv->cpu = cpu;
		memset(eenv->cpu, 0, sizeof(struct eenv_cpu)*cpu_count);
		eenv->eenv_cpu_count = cpu_count;

		}
		/*
		* get_eenv - reset the eenv struct cached for this CPU
		*
		* When the eenv is returned, it is configured to do
		* energy calculations for the maximum number of CPUs
		* the task can be placed on. The prev_cpu entry is
		* filled in here. Callers are responsible for adding
		* other CPU candidates up to eenv->max_cpu_count.
		*/
		static inline struct energy_env get_eenv(struct task_struct p, int prev_cpu)
		{
		struct energy_env *eenv;
		cpumask_t cpumask_possible_cpus;
		int cpu = smp_processor_id();
		int i;

		eenv = &(per_cpu(eenv_cache, cpu));
		reset_eenv(eenv);

		/* populate eenv */
		eenv->p = p;
		/* use boosted task util for capacity selection
		* during energy calculation, but unboosted task
		* util for group utilization calculations
		*/
		eenv->util_delta = task_util(p);
		eenv->util_delta_boosted = boosted_task_util(p);

		cpumask_and(&cpumask_possible_cpus, &p->cpus_allowed, cpu_online_mask);
		eenv->max_cpu_count = cpumask_weight(&cpumask_possible_cpus);

		for (i=0; i < eenv->max_cpu_count; i++)
		eenv->cpu[i].cpu_id = -1;
		eenv->cpu[EAS_CPU_PRV].cpu_id = prev_cpu;
		eenv->next_idx = EAS_CPU_PRV;

		return eenv;
		}

		/*
		* Needs to be called inside rcu_read_lock critical section.
		* sd is a pointer to the sched domain we wish to use for an
		@@ -6659,10 +6814,9 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
		int cpu, int prev_cpu,
		int sync)
		{
		int energy_cpu = prev_cpu, spare_cpu = prev_cpu;
		unsigned long max_spare = 0;
		int min_diff = 0;
		int i;
		int cpu_iter, eas_cpu_idx = EAS_CPU_NXT;
		int energy_cpu = prev_cpu;
		struct energy_env *eenv;

		if (sysctl_sched_sync_hint_enable && sync) {
		if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
		@@ -6670,43 +6824,37 @@ static int find_energy_efficient_cpu(struct sched_domain *sd,
		}
		}

		for_each_cpu_and(i, &p->cpus_allowed, sched_domain_span(sd)) {
		int diff;
		unsigned long spare;

		struct energy_env eenv = {
		.util_delta = task_util(p),
		.src_cpu = prev_cpu,
		.dst_cpu = i,
		.task = p,
		.trg_cpu = i,
		};
		/* take ownership of our per-cpu data structure */
		eenv = get_eenv(p, prev_cpu);
		if (eenv->max_cpu_count < 2)
		return energy_cpu;

		spare = capacity_spare_wake(i, p);
		for_each_cpu_and(cpu_iter, &p->cpus_allowed, sched_domain_span(sd)) {
		unsigned long spare;

		if (i == prev_cpu)
		/* prev_cpu already in list */
		if (cpu_iter == prev_cpu)
		continue;

		if (spare > max_spare) {
		max_spare = spare;
		spare_cpu = i;
		}
		spare = capacity_spare_wake(cpu_iter, p);

		if (spare * 1024 < capacity_margin * task_util(p))
		continue;

		diff = energy_diff(&eenv);
		/* Add CPU candidate */
		eenv->cpu[eas_cpu_idx++].cpu_id = cpu_iter;
		eenv->max_cpu_count = eas_cpu_idx;

		if (diff < min_diff) {
		min_diff = diff;
		energy_cpu = i;
		}
		/* stop adding CPUs if we have no space left */
		if (eas_cpu_idx >= eenv->eenv_cpu_count)
		break;
		}

		if (energy_cpu == prev_cpu && !cpu_overutilized(prev_cpu))
		return prev_cpu;
		/* find most energy-efficient CPU */
		eas_cpu_idx = select_energy_cpu_idx(eenv);
		energy_cpu = eenv->cpu[eas_cpu_idx].cpu_id;

		return energy_cpu != prev_cpu ? energy_cpu : spare_cpu;
		return energy_cpu;
		}

		static inline bool nohz_kick_needed(struct rq *rq, bool only_update);
		@@ -10694,6 +10842,8 @@ __init void init_sched_fair_class(void)
		nohz.next_update = jiffies;
		zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
		#endif

		alloc_eenv();
		#endif /* SMP */

		}

kernel/sched/sched.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -2024,6 +2024,9 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { }


		#ifdef CONFIG_SMP

		extern void init_energy_aware_data(int cpu);

		static inline
		void __dl_update(struct dl_bw *dl_b, s64 bw)
		{