sched: precompute required frequency for CPU load (bcc5677f) · Commits · e / devices / android_kernel_xiaomi_markw

kernel/sched/core.c

+31 −9

Original line number	Diff line number	Diff line
		@@ -1732,19 +1732,26 @@ u32 __weak get_freq_max_load(int cpu, u32 freq)
		}

		DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);
		static DEFINE_SPINLOCK(freq_max_load_lock);

		int sched_update_freq_max_load(const cpumask_t *cpumask)
		{
		int i, cpu, ret;
		unsigned int freq, max;
		unsigned int freq;
		struct cpu_pstate_pwr *costs;
		struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
		struct freq_max_load max_load, old_max_load;
		struct freq_max_load_entry *entry;
		u64 max_demand_capacity, max_demand;
		unsigned long flags;
		u32 hfreq;
		int hpct;

		if (!per_cpu_info \|\| !sysctl_sched_enable_power_aware)
		return 0;

		mutex_lock(&policy_mutex);
		spin_lock_irqsave(&freq_max_load_lock, flags);
		max_demand_capacity = div64_u64(max_task_load(), max_possible_capacity);
		for_each_cpu(cpu, cpumask) {
		if (!per_cpu_info[cpu].ptable) {
		ret = -EINVAL;
		@@ -1755,24 +1762,35 @@ int sched_update_freq_max_load(const cpumask_t *cpumask)

		/*
		* allocate len + 1 and leave the last power cost as 0 for
		* power_cost_at_freq() can stop iterating index when
		* power_cost() can stop iterating index when
		* per_cpu_info[cpu].len > len of max_load due to race between
		* cpu power stats update and get_cpu_pwr_stats().
		*/
		max_load = kzalloc(sizeof(struct freq_max_load) +
		sizeof(u32) * (per_cpu_info[cpu].len + 1),
		GFP_ATOMIC);
		sizeof(struct freq_max_load_entry) *
		(per_cpu_info[cpu].len + 1), GFP_ATOMIC);
		if (unlikely(!max_load)) {
		ret = -ENOMEM;
		goto fail;
		}

		max_load->length = per_cpu_info[cpu].len;

		max_demand = max_demand_capacity *
		cpu_rq(cpu)->max_possible_capacity;

		i = 0;
		costs = per_cpu_info[cpu].ptable;
		while (costs[i].freq) {
		entry = &max_load->freqs[i];
		freq = costs[i].freq;
		max = get_freq_max_load(cpu, freq);
		max_load->freqs[i] = div64_u64((u64)freq * max, 100);
		hpct = get_freq_max_load(cpu, freq);
		if (hpct <= 0 && hpct > 100)
		hpct = 100;
		hfreq = div64_u64((u64)freq * hpct , 100);
		entry->hdemand =
		div64_u64(max_demand * hfreq,
		cpu_rq(cpu)->max_possible_freq);
		i++;
		}

		@@ -1781,7 +1799,7 @@ int sched_update_freq_max_load(const cpumask_t *cpumask)
		kfree_rcu(old_max_load, rcu);
		}

		mutex_unlock(&policy_mutex);
		spin_unlock_irqrestore(&freq_max_load_lock, flags);
		return 0;

		fail:
		@@ -1793,7 +1811,7 @@ fail:
		}
		}

		mutex_unlock(&policy_mutex);
		spin_unlock_irqrestore(&freq_max_load_lock, flags);
		return ret;
		}
		#else /* CONFIG_SCHED_FREQ_INPUT */
		@@ -2451,6 +2469,8 @@ int sched_set_window(u64 window_start, unsigned int window_size)

		reset_all_window_stats(ws, window_size);

		sched_update_freq_max_load(cpu_possible_mask);

		mutex_unlock(&policy_mutex);

		return 0;
		@@ -2757,6 +2777,8 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
		if (update_max) {
		max_possible_capacity = highest_mpc;
		max_load_scale_factor = highest_mplsf;

		sched_update_freq_max_load(cpu_possible_mask);
		}

		__update_min_max_capacity();

kernel/sched/fair.c

+55 −60

Original line number	Diff line number	Diff line
		@@ -2654,24 +2654,7 @@ static inline u64 cpu_load(int cpu)

		static inline u64 cpu_load_sync(int cpu, int sync)
		{
		struct rq *rq = cpu_rq(cpu);
		u64 load;

		load = rq->hmp_stats.cumulative_runnable_avg;

		/*
		* If load is being checked in a sync wakeup environment,
		* we may want to discount the load of the currently running
		* task.
		*/
		if (sync && cpu == smp_processor_id()) {
		if (load > rq->curr->ravg.demand)
		load -= rq->curr->ravg.demand;
		else
		load = 0;
		}

		return scale_load_to_cpu(load, cpu);
		return scale_load_to_cpu(cpu_cravg_sync(cpu, sync), cpu);
		}

		static int
		@@ -2819,12 +2802,20 @@ int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost)
		return abs(delta) > cost_limit;
		}

		static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
		/*
		* Return the cost of running task p on CPU cpu. This function
		* currently assumes that task p is the only task which will run on
		* the CPU.
		*/
		unsigned int power_cost(int cpu, u64 demand)
		{
		int i = 0;
		int first, mid, last;
		struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
		struct cpu_pstate_pwr *costs;
		struct freq_max_load *max_load;
		int total_static_pwr_cost = 0;
		struct rq *rq = cpu_rq(cpu);
		unsigned int pc;

		if (!per_cpu_info \|\| !per_cpu_info[cpu].ptable \|\|
		!sysctl_sched_enable_power_aware)
		@@ -2833,49 +2824,52 @@ static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
		* capacity as a rough stand-in for real CPU power
		* numbers, assuming bigger CPUs are more power
		* hungry. */
		return cpu_rq(cpu)->max_possible_capacity;

		costs = per_cpu_info[cpu].ptable;
		return rq->max_possible_capacity;

		rcu_read_lock();
		max_load = rcu_dereference(per_cpu(freq_max_load, cpu));
		while (costs[i].freq != 0) {
		if (costs[i+1].freq == 0 \|\|
		(costs[i].freq >= freq &&
		(!max_load \|\| max_load->freqs[i] >= freq))) {
		rcu_read_unlock();
		return costs[i].power;
		}
		i++;
		if (!max_load) {
		pc = rq->max_possible_capacity;
		goto unlock;
		}
		rcu_read_unlock();
		BUG();

		costs = per_cpu_info[cpu].ptable;

		if (demand <= max_load->freqs[0].hdemand) {
		pc = costs[0].power;
		goto unlock;
		} else if (demand > max_load->freqs[max_load->length - 1].hdemand) {
		pc = costs[max_load->length - 1].power;
		goto unlock;
		}

		/* Return the cost of running the total task load total_load on CPU cpu. */
		unsigned int power_cost(u64 total_load, int cpu)
		{
		unsigned int task_freq;
		struct rq *rq = cpu_rq(cpu);
		u64 demand;
		int total_static_pwr_cost = 0;
		first = 0;
		last = max_load->length - 1;
		mid = (last - first) >> 1;
		while (1) {
		if (demand <= max_load->freqs[mid].hdemand)
		last = mid;
		else
		first = mid;

		if (!sysctl_sched_enable_power_aware)
		return rq->max_possible_capacity;
		if (last - first == 1)
		break;
		mid = first + ((last - first) >> 1);
		}

		/* calculate % of max freq needed */
		demand = total_load * 100;
		demand = div64_u64(demand, max_task_load());
		pc = costs[last].power;

		task_freq = demand * rq->max_possible_freq;
		task_freq /= 100; /* khz needed */
		unlock:
		rcu_read_unlock();

		if (idle_cpu(cpu) && rq->cstate) {
		total_static_pwr_cost += rq->static_cpu_pwr_cost;
		if (rq->dstate)
		total_static_pwr_cost += rq->static_cluster_pwr_cost;
		}
		return power_cost_at_freq(cpu, task_freq) + total_static_pwr_cost;

		return pc + total_static_pwr_cost;

		}

		#define UP_MIGRATION 1
		@@ -2908,8 +2902,7 @@ static int skip_freq_domain(struct rq task_rq, struct rq rq, int reason)
		return skip;
		}

		static int skip_cpu(struct rq task_rq, struct rq rq, int cpu,
		u64 task_load, int reason)
		static int skip_cpu(struct rq task_rq, struct rq rq, int cpu, int reason)
		{
		int skip;

		@@ -2970,8 +2963,9 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		struct rq *rq = cpu_rq(i);

		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), sched_irqload(i),
		power_cost(scale_load_to_cpu(task_load(p) +
		cpu_load_sync(i, sync), i), i), cpu_temp(i));
		power_cost(i, task_load(p) +
		cpu_cravg_sync(i, sync)),
		cpu_temp(i));

		if (skip_freq_domain(trq, rq, reason)) {
		cpumask_andnot(&search_cpus, &search_cpus,
		@@ -2979,8 +2973,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		continue;
		}

		tload = scale_load_to_cpu(task_load(p), i);
		if (skip_cpu(trq, rq, i, tload, reason))
		if (skip_cpu(trq, rq, i, reason))
		continue;

		cpu_load = cpu_load_sync(i, sync);
		@@ -2999,6 +2992,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		if (boost)
		continue;

		tload = scale_load_to_cpu(task_load(p), i);
		if (!eligible_cpu(tload, cpu_load, i, sync) \|\|
		!task_load_will_fit(p, tload, i))
		continue;
		@@ -3008,7 +3002,8 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		* under spill.
		*/

		cpu_cost = power_cost(tload + cpu_load, i);
		cpu_cost = power_cost(i, task_load(p) +
		cpu_cravg_sync(i, sync));

		if (cpu_cost > min_cost)
		continue;
		@@ -3646,7 +3641,7 @@ static inline int select_best_cpu(struct task_struct *p, int target,
		return 0;
		}

		static inline int power_cost(u64 total_load, int cpu)
		unsigned int power_cost(int cpu, u64 demand)
		{
		return SCHED_CAPACITY_SCALE;
		}
		@@ -5069,7 +5064,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
		/* Log effect on hmp stats after throttling */
		trace_sched_cpu_load(rq, idle_cpu(cpu_of(rq)),
		sched_irqload(cpu_of(rq)),
		power_cost_at_freq(cpu_of(rq), 0),
		power_cost(cpu_of(rq), 0),
		cpu_temp(cpu_of(rq)));
		}

		@@ -5126,7 +5121,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
		/* Log effect on hmp stats after un-throttling */
		trace_sched_cpu_load(rq, idle_cpu(cpu_of(rq)),
		sched_irqload(cpu_of(rq)),
		power_cost_at_freq(cpu_of(rq), 0),
		power_cost(cpu_of(rq), 0),
		cpu_temp(cpu_of(rq)));
		}

		@@ -7788,7 +7783,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,

		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i),
		sched_irqload(i),
		power_cost_at_freq(i, 0),
		power_cost(i, 0),
		cpu_temp(i));

		/* Bias balancing toward cpus of our domain */

kernel/sched/rt.c

+2 −1

Original line number	Diff line number	Diff line
		@@ -1677,15 +1677,16 @@ static int find_lowest_rq_hmp(struct task_struct *task)
		for_each_cpu(i, lowest_mask) {
		cpu_load = scale_load_to_cpu(
		cpu_rq(i)->hmp_stats.cumulative_runnable_avg, i);
		cpu_cost = power_cost(cpu_load, i);

		#ifdef CONFIG_SCHED_QHMP
		cpu_cost = power_cost(cpu_load, i);
		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), mostly_idle_cpu(i),
		sched_irqload(i), cpu_cost, cpu_temp(i));

		if (sched_boost() && capacity(cpu_rq(i)) != max_capacity)
		continue;
		#else
		cpu_cost = power_cost(i, cpu_cravg_sync(i, 0));
		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), sched_irqload(i),
		cpu_cost, cpu_temp(i));
		#endif

kernel/sched/sched.h

+30 −2

Original line number	Diff line number	Diff line
		@@ -27,9 +27,15 @@ extern __read_mostly int scheduler_running;
		extern unsigned long calc_load_update;
		extern atomic_long_t calc_load_tasks;

		struct freq_max_load_entry {
		/* The maximum load which has accounted governor's headroom. */
		u64 hdemand;
		};

		struct freq_max_load {
		struct rcu_head rcu;
		u32 freqs[0];
		int length;
		struct freq_max_load_entry freqs[0];
		};

		extern DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);
		@@ -1146,12 +1152,34 @@ static inline void clear_reserved(int cpu)
		clear_bit(CPU_RESERVED, &rq->hmp_flags);
		}

		static inline u64 cpu_cravg_sync(int cpu, int sync)
		{
		struct rq *rq = cpu_rq(cpu);
		u64 load;

		load = rq->hmp_stats.cumulative_runnable_avg;

		/*
		* If load is being checked in a sync wakeup environment,
		* we may want to discount the load of the currently running
		* task.
		*/
		if (sync && cpu == smp_processor_id()) {
		if (load > rq->curr->ravg.demand)
		load -= rq->curr->ravg.demand;
		else
		load = 0;
		}

		return load;
		}

		extern void check_for_migration(struct rq rq, struct task_struct p);
		extern void pre_big_task_count_change(const struct cpumask *cpus);
		extern void post_big_task_count_change(const struct cpumask *cpus);
		extern void set_hmp_defaults(void);
		extern int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost);
		extern unsigned int power_cost(u64 total_load, int cpu);
		extern unsigned int power_cost(int cpu, u64 demand);
		extern void reset_all_window_stats(u64 window_start, unsigned int window_size);
		extern void boost_kick(int cpu);
		extern int sched_boost(void);