Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bcd02521 authored by Joonwoo Park's avatar Joonwoo Park
Browse files

sched: take into account of governor's frequency max load



At present HMP scheduler packs tasks to busy CPU till the CPU's load is
100% to avoid waking up of idle CPU as much as possible.  Such aggressive
packing leads unintended CPU frequency raise as governor raises the busy
CPU's frequency when its load is more than configured frequency max load
which can be less than 100%.

Fix to take into account of governor's frequency max load and pack tasks
only when the CPU's projected load is less than max load to avoid
unnecessary frequency raise.

Change-Id: I4447e5e0c2fa5214ae7a9128f04fd7585ed0dcac
Signed-off-by: default avatarJoonwoo Park <joonwoop@codeaurora.org>
parent 630fdcc6
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -1966,6 +1966,7 @@ extern unsigned long sched_get_busy(int cpu);
extern void sched_get_cpus_busy(unsigned long *busy,
				const struct cpumask *query_cpus);
extern void sched_set_io_is_busy(int val);
int sched_update_freq_max_load(const cpumask_t *cpumask);
#else
static inline int sched_set_window(u64 window_start, unsigned int window_size)
{
@@ -1978,6 +1979,11 @@ static inline unsigned long sched_get_busy(int cpu)
static inline void sched_get_cpus_busy(unsigned long *busy,
				const struct cpumask *query_cpus) {};
static inline void sched_set_io_is_busy(int val) {};

static inline int sched_update_freq_max_load(const cpumask_t *cpumask)
{
	return 0;
}
#endif

/*
+94 −1
Original line number Diff line number Diff line
@@ -1206,7 +1206,6 @@ __read_mostly int sysctl_sched_freq_inc_notify = 10 * 1024 * 1024; /* + 10GHz */
__read_mostly int sysctl_sched_freq_dec_notify = 10 * 1024 * 1024; /* - 10GHz */

static __read_mostly unsigned int sched_io_is_busy;

#endif	/* CONFIG_SCHED_FREQ_INPUT */

/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
@@ -1631,6 +1630,78 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,

	BUG();
}

u32 __weak get_freq_max_load(int cpu, u32 freq)
{
	/* 100% by default */
	return 100;
}

DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);

int sched_update_freq_max_load(const cpumask_t *cpumask)
{
	int i, cpu, ret;
	unsigned int freq, max;
	struct cpu_pstate_pwr *costs;
	struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
	struct freq_max_load *max_load, *old_max_load;

	if (!per_cpu_info || !sysctl_sched_enable_power_aware)
		return 0;

	mutex_lock(&policy_mutex);
	for_each_cpu(cpu, cpumask) {
		if (!per_cpu_info[cpu].ptable) {
			ret = -EINVAL;
			goto fail;
		}

		old_max_load = rcu_dereference(per_cpu(freq_max_load, cpu));

		/*
		 * allocate len + 1 and leave the last power cost as 0 for
		 * power_cost_at_freq() can stop iterating index when
		 * per_cpu_info[cpu].len > len of max_load due to race between
		 * cpu power stats update and get_cpu_pwr_stats().
		 */
		max_load = kzalloc(sizeof(struct freq_max_load) +
				   sizeof(u32) * (per_cpu_info[cpu].len + 1),
				   GFP_ATOMIC);
		if (unlikely(!max_load)) {
			ret = -ENOMEM;
			goto fail;
		}

		i = 0;
		costs = per_cpu_info[cpu].ptable;
		while (costs[i].freq) {
			freq = costs[i].freq;
			max = get_freq_max_load(cpu, freq);
			max_load->freqs[i] = div64_u64((u64)freq * max, 100);
			i++;
		}

		rcu_assign_pointer(per_cpu(freq_max_load, cpu), max_load);
		if (old_max_load)
			kfree_rcu(old_max_load, rcu);
	}

	mutex_unlock(&policy_mutex);
	return 0;

fail:
	for_each_cpu(cpu, cpumask) {
		max_load = rcu_dereference(per_cpu(freq_max_load, cpu));
		if (max_load) {
			rcu_assign_pointer(per_cpu(freq_max_load, cpu), NULL);
			kfree_rcu(max_load, rcu);
		}
	}

	mutex_unlock(&policy_mutex);
	return ret;
}
#else	/* CONFIG_SCHED_FREQ_INPUT */

static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
@@ -2601,6 +2672,17 @@ static int cpufreq_notifier_trans(struct notifier_block *nb,
	return 0;
}

static int pwr_stats_ready_notifier(struct notifier_block *nb,
				    unsigned long cpu, void *data)
{
	cpumask_t mask = CPU_MASK_NONE;

	cpumask_set_cpu(cpu, &mask);
	sched_update_freq_max_load(&mask);

	return 0;
}

static struct notifier_block notifier_policy_block = {
	.notifier_call = cpufreq_notifier_policy
};
@@ -2609,6 +2691,15 @@ static struct notifier_block notifier_trans_block = {
	.notifier_call = cpufreq_notifier_trans
};

static struct notifier_block notifier_pwr_stats_ready = {
	.notifier_call = pwr_stats_ready_notifier
};

int __weak register_cpu_pwr_stats_ready_notifier(struct notifier_block *nb)
{
	return -EINVAL;
}

static int register_sched_callback(void)
{
	int ret;
@@ -2623,6 +2714,8 @@ static int register_sched_callback(void)
		ret = cpufreq_register_notifier(&notifier_trans_block,
						CPUFREQ_TRANSITION_NOTIFIER);

	register_cpu_pwr_stats_ready_notifier(&notifier_pwr_stats_ready);

	return 0;
}

+9 −2
Original line number Diff line number Diff line
@@ -2830,6 +2830,7 @@ static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
	int i = 0;
	struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
	struct cpu_pstate_pwr *costs;
	struct freq_max_load *max_load;

	if (!per_cpu_info || !per_cpu_info[cpu].ptable ||
	    !sysctl_sched_enable_power_aware)
@@ -2842,12 +2843,18 @@ static unsigned int power_cost_at_freq(int cpu, unsigned int freq)

	costs = per_cpu_info[cpu].ptable;

	rcu_read_lock();
	max_load = rcu_dereference(per_cpu(freq_max_load, cpu));
	while (costs[i].freq != 0) {
		if (costs[i].freq >= freq ||
		    costs[i+1].freq == 0)
		if (costs[i+1].freq == 0 ||
		    (costs[i].freq >= freq &&
		     (!max_load || max_load->freqs[i] >= freq))) {
			rcu_read_unlock();
			return costs[i].power;
		}
		i++;
	}
	rcu_read_unlock();
	BUG();
}

+7 −0
Original line number Diff line number Diff line
@@ -25,6 +25,13 @@ extern __read_mostly int scheduler_running;
extern unsigned long calc_load_update;
extern atomic_long_t calc_load_tasks;

struct freq_max_load {
	struct rcu_head rcu;
	u32 freqs[0];
};

extern DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);

extern long calc_load_fold_active(struct rq *this_rq);
extern void update_cpu_load_active(struct rq *this_rq);