Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bcc5677f authored by Joonwoo Park's avatar Joonwoo Park
Browse files

sched: precompute required frequency for CPU load



At present in order to estimate power cost of CPU load, HMP scheduler
converts CPU load to coresponding frequency on the fly which can be
avoided.

Optimize and reduce execution time of select_best_cpu() by precomputing
CPU load to frequency conversion.  This optimization reduces about ~20% of
execution time of select_best_cpu() on average.

Change-Id: I385c57f2ea9a50883b76ba6ca3deb673b827217f
Signed-off-by: default avatarJoonwoo Park <joonwoop@codeaurora.org>
parent bad0cb41
Loading
Loading
Loading
Loading
+31 −9
Original line number Diff line number Diff line
@@ -1732,19 +1732,26 @@ u32 __weak get_freq_max_load(int cpu, u32 freq)
}

DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);
static DEFINE_SPINLOCK(freq_max_load_lock);

int sched_update_freq_max_load(const cpumask_t *cpumask)
{
	int i, cpu, ret;
	unsigned int freq, max;
	unsigned int freq;
	struct cpu_pstate_pwr *costs;
	struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
	struct freq_max_load *max_load, *old_max_load;
	struct freq_max_load_entry *entry;
	u64 max_demand_capacity, max_demand;
	unsigned long flags;
	u32 hfreq;
	int hpct;

	if (!per_cpu_info || !sysctl_sched_enable_power_aware)
		return 0;

	mutex_lock(&policy_mutex);
	spin_lock_irqsave(&freq_max_load_lock, flags);
	max_demand_capacity = div64_u64(max_task_load(), max_possible_capacity);
	for_each_cpu(cpu, cpumask) {
		if (!per_cpu_info[cpu].ptable) {
			ret = -EINVAL;
@@ -1755,24 +1762,35 @@ int sched_update_freq_max_load(const cpumask_t *cpumask)

		/*
		 * allocate len + 1 and leave the last power cost as 0 for
		 * power_cost_at_freq() can stop iterating index when
		 * power_cost() can stop iterating index when
		 * per_cpu_info[cpu].len > len of max_load due to race between
		 * cpu power stats update and get_cpu_pwr_stats().
		 */
		max_load = kzalloc(sizeof(struct freq_max_load) +
				   sizeof(u32) * (per_cpu_info[cpu].len + 1),
				   GFP_ATOMIC);
				   sizeof(struct freq_max_load_entry) *
				   (per_cpu_info[cpu].len + 1), GFP_ATOMIC);
		if (unlikely(!max_load)) {
			ret = -ENOMEM;
			goto fail;
		}

		max_load->length = per_cpu_info[cpu].len;

		max_demand = max_demand_capacity *
			     cpu_rq(cpu)->max_possible_capacity;

		i = 0;
		costs = per_cpu_info[cpu].ptable;
		while (costs[i].freq) {
			entry = &max_load->freqs[i];
			freq = costs[i].freq;
			max = get_freq_max_load(cpu, freq);
			max_load->freqs[i] = div64_u64((u64)freq * max, 100);
			hpct = get_freq_max_load(cpu, freq);
			if (hpct <= 0 && hpct > 100)
				hpct = 100;
			hfreq = div64_u64((u64)freq * hpct , 100);
			entry->hdemand =
			    div64_u64(max_demand * hfreq,
				      cpu_rq(cpu)->max_possible_freq);
			i++;
		}

@@ -1781,7 +1799,7 @@ int sched_update_freq_max_load(const cpumask_t *cpumask)
			kfree_rcu(old_max_load, rcu);
	}

	mutex_unlock(&policy_mutex);
	spin_unlock_irqrestore(&freq_max_load_lock, flags);
	return 0;

fail:
@@ -1793,7 +1811,7 @@ fail:
		}
	}

	mutex_unlock(&policy_mutex);
	spin_unlock_irqrestore(&freq_max_load_lock, flags);
	return ret;
}
#else	/* CONFIG_SCHED_FREQ_INPUT */
@@ -2451,6 +2469,8 @@ int sched_set_window(u64 window_start, unsigned int window_size)

	reset_all_window_stats(ws, window_size);

	sched_update_freq_max_load(cpu_possible_mask);

	mutex_unlock(&policy_mutex);

	return 0;
@@ -2757,6 +2777,8 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
	if (update_max) {
		max_possible_capacity = highest_mpc;
		max_load_scale_factor = highest_mplsf;

		sched_update_freq_max_load(cpu_possible_mask);
	}

	__update_min_max_capacity();
+55 −60
Original line number Diff line number Diff line
@@ -2654,24 +2654,7 @@ static inline u64 cpu_load(int cpu)

static inline u64 cpu_load_sync(int cpu, int sync)
{
	struct rq *rq = cpu_rq(cpu);
	u64 load;

	load = rq->hmp_stats.cumulative_runnable_avg;

	/*
	 * If load is being checked in a sync wakeup environment,
	 * we may want to discount the load of the currently running
	 * task.
	 */
	if (sync && cpu == smp_processor_id()) {
		if (load > rq->curr->ravg.demand)
			load -= rq->curr->ravg.demand;
		else
			load = 0;
	}

	return scale_load_to_cpu(load, cpu);
	return scale_load_to_cpu(cpu_cravg_sync(cpu, sync), cpu);
}

static int
@@ -2819,12 +2802,20 @@ int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost)
	return abs(delta) > cost_limit;
}

static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
/*
 * Return the cost of running task p on CPU cpu. This function
 * currently assumes that task p is the only task which will run on
 * the CPU.
 */
unsigned int power_cost(int cpu, u64 demand)
{
	int i = 0;
	int first, mid, last;
	struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
	struct cpu_pstate_pwr *costs;
	struct freq_max_load *max_load;
	int total_static_pwr_cost = 0;
	struct rq *rq = cpu_rq(cpu);
	unsigned int pc;

	if (!per_cpu_info || !per_cpu_info[cpu].ptable ||
	    !sysctl_sched_enable_power_aware)
@@ -2833,49 +2824,52 @@ static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
		 * capacity as a rough stand-in for real CPU power
		 * numbers, assuming bigger CPUs are more power
		 * hungry. */
		return cpu_rq(cpu)->max_possible_capacity;

	costs = per_cpu_info[cpu].ptable;
		return rq->max_possible_capacity;

	rcu_read_lock();
	max_load = rcu_dereference(per_cpu(freq_max_load, cpu));
	while (costs[i].freq != 0) {
		if (costs[i+1].freq == 0 ||
		    (costs[i].freq >= freq &&
		     (!max_load || max_load->freqs[i] >= freq))) {
			rcu_read_unlock();
			return costs[i].power;
		}
		i++;
	if (!max_load) {
		pc = rq->max_possible_capacity;
		goto unlock;
	}
	rcu_read_unlock();
	BUG();

	costs = per_cpu_info[cpu].ptable;

	if (demand <= max_load->freqs[0].hdemand) {
		pc = costs[0].power;
		goto unlock;
	} else if (demand > max_load->freqs[max_load->length - 1].hdemand) {
		pc = costs[max_load->length - 1].power;
		goto unlock;
	}

/* Return the cost of running the total task load total_load on CPU cpu. */
unsigned int power_cost(u64 total_load, int cpu)
{
	unsigned int task_freq;
	struct rq *rq = cpu_rq(cpu);
	u64 demand;
	int total_static_pwr_cost = 0;
	first = 0;
	last = max_load->length - 1;
	mid = (last - first) >> 1;
	while (1) {
		if (demand <= max_load->freqs[mid].hdemand)
			last = mid;
		else
			first = mid;

	if (!sysctl_sched_enable_power_aware)
		return rq->max_possible_capacity;
		if (last - first == 1)
			break;
		mid = first + ((last - first) >> 1);
	}

	/* calculate % of max freq needed */
	demand = total_load * 100;
	demand = div64_u64(demand, max_task_load());
	pc = costs[last].power;

	task_freq = demand * rq->max_possible_freq;
	task_freq /= 100; /* khz needed */
unlock:
	rcu_read_unlock();

	if (idle_cpu(cpu) && rq->cstate) {
		total_static_pwr_cost += rq->static_cpu_pwr_cost;
		if (rq->dstate)
			total_static_pwr_cost += rq->static_cluster_pwr_cost;
	}
	return power_cost_at_freq(cpu, task_freq) + total_static_pwr_cost;

	return pc + total_static_pwr_cost;

}

#define UP_MIGRATION		1
@@ -2908,8 +2902,7 @@ static int skip_freq_domain(struct rq *task_rq, struct rq *rq, int reason)
	return skip;
}

static int skip_cpu(struct rq *task_rq, struct rq *rq, int cpu,
		    u64 task_load, int reason)
static int skip_cpu(struct rq *task_rq, struct rq *rq, int cpu, int reason)
{
	int skip;

@@ -2970,8 +2963,9 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		struct rq *rq = cpu_rq(i);

		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), sched_irqload(i),
		 power_cost(scale_load_to_cpu(task_load(p) +
		 cpu_load_sync(i, sync), i), i), cpu_temp(i));
				    power_cost(i, task_load(p) +
					       cpu_cravg_sync(i, sync)),
				    cpu_temp(i));

		if (skip_freq_domain(trq, rq, reason)) {
			cpumask_andnot(&search_cpus, &search_cpus,
@@ -2979,8 +2973,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
			continue;
		}

		tload =  scale_load_to_cpu(task_load(p), i);
		if (skip_cpu(trq, rq, i, tload, reason))
		if (skip_cpu(trq, rq, i, reason))
			continue;

		cpu_load = cpu_load_sync(i, sync);
@@ -2999,6 +2992,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		if (boost)
			continue;

		tload = scale_load_to_cpu(task_load(p), i);
		if (!eligible_cpu(tload, cpu_load, i, sync) ||
					!task_load_will_fit(p, tload, i))
			continue;
@@ -3008,7 +3002,8 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		 * under spill.
		 */

		cpu_cost = power_cost(tload + cpu_load, i);
		cpu_cost = power_cost(i, task_load(p) +
					 cpu_cravg_sync(i, sync));

		if (cpu_cost > min_cost)
			continue;
@@ -3646,7 +3641,7 @@ static inline int select_best_cpu(struct task_struct *p, int target,
	return 0;
}

static inline int power_cost(u64 total_load, int cpu)
unsigned int power_cost(int cpu, u64 demand)
{
	return SCHED_CAPACITY_SCALE;
}
@@ -5069,7 +5064,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
	/* Log effect on hmp stats after throttling */
	trace_sched_cpu_load(rq, idle_cpu(cpu_of(rq)),
			     sched_irqload(cpu_of(rq)),
			     power_cost_at_freq(cpu_of(rq), 0),
			     power_cost(cpu_of(rq), 0),
			     cpu_temp(cpu_of(rq)));
}

@@ -5126,7 +5121,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
	/* Log effect on hmp stats after un-throttling */
	trace_sched_cpu_load(rq, idle_cpu(cpu_of(rq)),
			     sched_irqload(cpu_of(rq)),
			     power_cost_at_freq(cpu_of(rq), 0),
			     power_cost(cpu_of(rq), 0),
			     cpu_temp(cpu_of(rq)));
}

@@ -7788,7 +7783,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,

		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i),
				     sched_irqload(i),
				     power_cost_at_freq(i, 0),
				     power_cost(i, 0),
				     cpu_temp(i));

		/* Bias balancing toward cpus of our domain */
+2 −1
Original line number Diff line number Diff line
@@ -1677,15 +1677,16 @@ static int find_lowest_rq_hmp(struct task_struct *task)
	for_each_cpu(i, lowest_mask) {
		cpu_load = scale_load_to_cpu(
			cpu_rq(i)->hmp_stats.cumulative_runnable_avg, i);
		cpu_cost = power_cost(cpu_load, i);

#ifdef CONFIG_SCHED_QHMP
		cpu_cost = power_cost(cpu_load, i);
		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), mostly_idle_cpu(i),
				     sched_irqload(i), cpu_cost, cpu_temp(i));

		if (sched_boost() && capacity(cpu_rq(i)) != max_capacity)
			continue;
#else
		cpu_cost = power_cost(i, cpu_cravg_sync(i, 0));
		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), sched_irqload(i),
						cpu_cost, cpu_temp(i));
#endif
+30 −2
Original line number Diff line number Diff line
@@ -27,9 +27,15 @@ extern __read_mostly int scheduler_running;
extern unsigned long calc_load_update;
extern atomic_long_t calc_load_tasks;

struct freq_max_load_entry {
	/* The maximum load which has accounted governor's headroom. */
	u64 hdemand;
};

struct freq_max_load {
	struct rcu_head rcu;
	u32 freqs[0];
	int length;
	struct freq_max_load_entry freqs[0];
};

extern DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);
@@ -1146,12 +1152,34 @@ static inline void clear_reserved(int cpu)
	clear_bit(CPU_RESERVED, &rq->hmp_flags);
}

static inline u64 cpu_cravg_sync(int cpu, int sync)
{
	struct rq *rq = cpu_rq(cpu);
	u64 load;

	load = rq->hmp_stats.cumulative_runnable_avg;

	/*
	 * If load is being checked in a sync wakeup environment,
	 * we may want to discount the load of the currently running
	 * task.
	 */
	if (sync && cpu == smp_processor_id()) {
		if (load > rq->curr->ravg.demand)
			load -= rq->curr->ravg.demand;
		else
			load = 0;
	}

	return load;
}

extern void check_for_migration(struct rq *rq, struct task_struct *p);
extern void pre_big_task_count_change(const struct cpumask *cpus);
extern void post_big_task_count_change(const struct cpumask *cpus);
extern void set_hmp_defaults(void);
extern int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost);
extern unsigned int power_cost(u64 total_load, int cpu);
extern unsigned int power_cost(int cpu, u64 demand);
extern void reset_all_window_stats(u64 window_start, unsigned int window_size);
extern void boost_kick(int cpu);
extern int sched_boost(void);