Merge "Merge remote-tracking branch '318/dev/msm-3.18-sched' into msm318" (60a36113) · Commits · e / devices / android_kernel_xiaomi_markw

include/linux/sched.h

+12 −0

Original line number	Diff line number	Diff line
		@@ -1330,6 +1330,7 @@ struct task_struct {
		#endif
		struct related_thread_group *grp;
		struct list_head grp_list;
		u64 cpu_cycles;
		#endif
		#ifdef CONFIG_CGROUP_SCHED
		struct task_group *sched_task_group;
		@@ -2216,6 +2217,8 @@ extern void sched_set_cpu_cstate(int cpu, int cstate,
		int wakeup_energy, int wakeup_latency);
		extern void sched_set_cluster_dstate(const cpumask_t *cluster_cpus, int dstate,
		int wakeup_energy, int wakeup_latency);
		extern void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32
		fmax);
		#ifdef CONFIG_SCHED_QHMP
		extern int sched_set_cpu_prefer_idle(int cpu, int prefer_idle);
		extern int sched_get_cpu_prefer_idle(int cpu);
		@@ -2242,6 +2245,9 @@ static inline void sched_set_cluster_dstate(const cpumask_t *cluster_cpus,
		int dstate, int wakeup_energy, int wakeup_latency)
		{
		}

		static inline void
		sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax) { }
		#endif

		#ifdef CONFIG_NO_HZ_COMMON
		@@ -3222,4 +3228,10 @@ static inline unsigned long rlimit_max(unsigned int limit)
		return task_rlimit_max(current, limit);
		}

		struct cpu_cycle_counter_cb {
		u64 (*get_cpu_cycle_counter)(int cpu);
		u32 (*get_cpu_cycles_max_per_us)(int cpu);
		};
		int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb);

		#endif

include/trace/events/sched.h

+37 −7

Original line number	Diff line number	Diff line
		@@ -206,7 +206,6 @@ DECLARE_EVENT_CLASS(sched_cpu_load,
		__field(unsigned int, capacity )
		__field( u64, cumulative_runnable_avg )
		__field( u64, irqload )
		__field(unsigned int, cur_freq )
		__field(unsigned int, max_freq )
		__field(unsigned int, power_cost )
		__field( int, cstate )
		@@ -223,7 +222,6 @@ DECLARE_EVENT_CLASS(sched_cpu_load,
		__entry->capacity = cpu_capacity(rq->cpu);
		__entry->cumulative_runnable_avg = rq->hmp_stats.cumulative_runnable_avg;
		__entry->irqload = irqload;
		__entry->cur_freq = cpu_cur_freq(rq->cpu);
		__entry->max_freq = cpu_max_freq(rq->cpu);
		__entry->power_cost = power_cost;
		__entry->cstate = rq->cstate;
		@@ -231,10 +229,10 @@ DECLARE_EVENT_CLASS(sched_cpu_load,
		__entry->temp = temp;
		),

		TP_printk("cpu %u idle %d nr_run %u nr_big %u lsf %u capacity %u cr_avg %llu irqload %llu fcur %u fmax %u power_cost %u cstate %d dstate %d temp %d",
		TP_printk("cpu %u idle %d nr_run %u nr_big %u lsf %u capacity %u cr_avg %llu irqload %llu fmax %u power_cost %u cstate %d dstate %d temp %d",
		__entry->cpu, __entry->idle, __entry->nr_running, __entry->nr_big_tasks,
		__entry->load_scale_factor, __entry->capacity,
		__entry->cumulative_runnable_avg, __entry->irqload, __entry->cur_freq,
		__entry->cumulative_runnable_avg, __entry->irqload,
		__entry->max_freq, __entry->power_cost, __entry->cstate,
		__entry->dstate, __entry->temp)
		);
		@@ -274,9 +272,9 @@ TRACE_EVENT(sched_set_boost,
		TRACE_EVENT(sched_update_task_ravg,

		TP_PROTO(struct task_struct p, struct rq rq, enum task_event evt,
		u64 wallclock, u64 irqtime),
		u64 wallclock, u64 irqtime, u32 cycles, u32 exec_time),

		TP_ARGS(p, rq, evt, wallclock, irqtime),
		TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time),

		TP_STRUCT__entry(
		__array( char, comm, TASK_COMM_LEN )
		@@ -312,7 +310,8 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->evt = evt;
		__entry->cpu = rq->cpu;
		__entry->cur_pid = rq->curr->pid;
		__entry->cur_freq = cpu_cur_freq(rq->cpu);
		__entry->cur_freq = cpu_cycles_to_freq(rq->cpu, cycles,
		exec_time);
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
		__entry->pid = p->pid;
		__entry->mark_start = p->ravg.mark_start;
		@@ -351,6 +350,36 @@ TRACE_EVENT(sched_update_task_ravg,
		)
		);

		TRACE_EVENT(sched_get_task_cpu_cycles,

		TP_PROTO(int cpu, int event, u64 cycles, u32 exec_time),

		TP_ARGS(cpu, event, cycles, exec_time),

		TP_STRUCT__entry(
		__field(int, cpu )
		__field(int, event )
		__field(u64, cycles )
		__field(u64, exec_time )
		__field(u32, freq )
		__field(u32, legacy_freq )
		),

		TP_fast_assign(
		__entry->cpu = cpu;
		__entry->event = event;
		__entry->cycles = cycles;
		__entry->exec_time = exec_time;
		__entry->freq = cpu_cycles_to_freq(cpu, cycles,
		exec_time);
		__entry->legacy_freq = cpu_cur_freq(cpu);
		),

		TP_printk("cpu=%d event=%d cycles=%llu exec_time=%llu freq=%u legacy_freq=%u",
		__entry->cpu, __entry->event, __entry->cycles,
		__entry->exec_time, __entry->freq, __entry->legacy_freq)
		);

		TRACE_EVENT(sched_update_history,

		TP_PROTO(struct rq rq, struct task_struct p, u32 runtime, int samples,
		@@ -1194,6 +1223,7 @@ TRACE_EVENT(sched_get_nr_running_avg,
		TP_printk("avg=%d big_avg=%d iowait_avg=%d",
		__entry->avg, __entry->big_avg, __entry->iowait_avg)
		);

		#endif /* _TRACE_SCHED_H */

		/* This part must be outside protection */

init/Kconfig

+8 −0

Original line number	Diff line number	Diff line
		@@ -1164,6 +1164,14 @@ config SCHED_HMP
		in their instructions per-cycle capability or the maximum
		frequency they can attain.

		config SCHED_HMP_CSTATE_AWARE
		bool "CPU C-state aware scheduler"
		depends on SCHED_HMP
		help
		This feature will let the HMP scheduler optimize task placement
		with CPUs C-state. If this is enabled, scheduler places tasks
		onto the shallowest C-state CPU among the most power efficient CPUs.

		config SCHED_QHMP
		bool "QHMP scheduler extensions"
		depends on SCHED_HMP

kernel/sched/core.c

+209 −62

Original line number	Diff line number	Diff line
		@@ -835,6 +835,9 @@ void sched_set_cluster_dstate(const cpumask_t *cluster_cpus, int dstate,
		static ktime_t ktime_last;
		static bool sched_ktime_suspended;

		static bool use_cycle_counter;
		static struct cpu_cycle_counter_cb cpu_cycle_counter_cb;

		u64 sched_ktime_clock(void)
		{
		if (unlikely(sched_ktime_suspended))
		@@ -1228,7 +1231,7 @@ capacity_scale_cpu_efficiency(struct sched_cluster *cluster)
		*/
		static unsigned long capacity_scale_cpu_freq(struct sched_cluster *cluster)
		{
		return (1024 * cluster->max_freq) / min_max_freq;
		return (1024 * cluster_max_freq(cluster)) / min_max_freq;
		}

		/*
		@@ -1249,7 +1252,8 @@ load_scale_cpu_efficiency(struct sched_cluster *cluster)
		*/
		static inline unsigned long load_scale_cpu_freq(struct sched_cluster *cluster)
		{
		return DIV_ROUND_UP(1024 * max_possible_freq, cluster->max_freq);
		return DIV_ROUND_UP(1024 * max_possible_freq,
		cluster_max_freq(cluster));
		}

		static int compute_capacity(struct sched_cluster *cluster)
		@@ -1315,8 +1319,10 @@ static struct sched_cluster init_cluster = {
		.load_scale_factor = 1024,
		.cur_freq = 1,
		.max_freq = 1,
		.max_mitigated_freq = UINT_MAX,
		.min_freq = 1,
		.max_possible_freq = 1,
		.cpu_cycle_max_scale_factor = 1,
		.dstate = 0,
		.dstate_wakeup_energy = 0,
		.dstate_wakeup_latency = 0,
		@@ -1463,8 +1469,10 @@ static struct sched_cluster alloc_new_cluster(const struct cpumask cpus)
		cluster->load_scale_factor = 1024;
		cluster->cur_freq = 1;
		cluster->max_freq = 1;
		cluster->max_mitigated_freq = UINT_MAX;
		cluster->min_freq = 1;
		cluster->max_possible_freq = 1;
		cluster->cpu_cycle_max_scale_factor = 1;
		cluster->dstate = 0;
		cluster->dstate_wakeup_energy = 0;
		cluster->dstate_wakeup_latency = 0;
		@@ -1529,6 +1537,44 @@ static void init_clusters(void)
		INIT_LIST_HEAD(&cluster_head);
		}

		static inline void
		__update_cpu_cycle_max_possible_freq(struct sched_cluster *cluster)
		{
		int cpu = cluster_first_cpu(cluster);

		cluster->cpu_cycle_max_scale_factor =
		div64_u64(cluster->max_possible_freq * NSEC_PER_USEC,
		cpu_cycle_counter_cb.get_cpu_cycles_max_per_us(cpu));
		}

		static inline void
		update_cpu_cycle_max_possible_freq(struct sched_cluster *cluster)
		{
		if (!use_cycle_counter)
		return;

		__update_cpu_cycle_max_possible_freq(cluster);
		}

		int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb)
		{
		struct sched_cluster *cluster = NULL;

		mutex_lock(&cluster_lock);
		if (!cb->get_cpu_cycle_counter \|\| !cb->get_cpu_cycles_max_per_us) {
		mutex_unlock(&cluster_lock);
		return -EINVAL;
		}

		cpu_cycle_counter_cb = *cb;
		for_each_sched_cluster(cluster)
		__update_cpu_cycle_max_possible_freq(cluster);
		use_cycle_counter = true;
		mutex_unlock(&cluster_lock);

		return 0;
		}

		static int __init set_sched_enable_hmp(char *str)
		{
		int enable_hmp = 0;
		@@ -1637,8 +1683,19 @@ static inline void clear_hmp_request(int cpu) { }

		static inline void update_cluster_topology(void) {}

		int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb)
		{
		return 0;
		}
		#endif /* CONFIG_SCHED_HMP */

		#define SCHED_MIN_FREQ 1

		struct cpu_cycle {
		u64 cycles;
		u64 time;
		};

		#if defined(CONFIG_SCHED_HMP)

		/*
		@@ -1786,19 +1843,17 @@ update_window_start(struct rq *rq, u64 wallclock)
		rq->window_start += (u64)nr_windows * (u64)sched_ravg_window;
		}

		static inline u64 scale_exec_time(u64 delta, struct rq *rq)
		#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)

		static inline u64 scale_exec_time(u64 delta, struct rq *rq,
		const struct cpu_cycle *cc)
		{
		int cpu = cpu_of(rq);
		unsigned int cur_freq = cpu_cur_freq(cpu);
		int sf;

		if (unlikely(cur_freq > max_possible_freq))
		cur_freq = max_possible_freq;

		/* round up div64 */
		delta = div64_u64(delta * cur_freq + max_possible_freq - 1,
		max_possible_freq);

		delta = DIV64_U64_ROUNDUP(delta * cc->cycles *
		cpu_cycle_max_scale_factor(cpu),
		max_possible_freq * cc->time);
		sf = DIV_ROUND_UP(cpu_efficiency(cpu) * 1024, max_possible_efficiency);

		delta *= sf;
		@@ -1860,7 +1915,7 @@ static int send_notification(struct rq *rq, int check_pred)
		u64 prev = rq->old_busy_time;
		u64 predicted = rq->hmp_stats.pred_demands_sum;

		if (rq->cluster->cur_freq == rq->cluster->max_freq)
		if (rq->cluster->cur_freq == cpu_max_freq(cpu_of(rq)))
		return 0;

		prev = max(prev, rq->old_estimated_time);
		@@ -2172,7 +2227,8 @@ void update_task_pred_demand(struct rq rq, struct task_struct p, int event)
		* Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
		*/
		static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		int event, u64 wallclock, u64 irqtime)
		int event, u64 wallclock, u64 irqtime,
		const struct cpu_cycle *cc)
		{
		int new_window, nr_full_windows = 0;
		int p_is_curr_task = (p == rq->curr);
		@@ -2262,7 +2318,7 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		delta = wallclock - mark_start;
		else
		delta = irqtime;
		delta = scale_exec_time(delta, rq);
		delta = scale_exec_time(delta, rq, cc);
		rq->curr_runnable_sum += delta;
		if (new_task)
		rq->nt_curr_runnable_sum += delta;
		@@ -2287,14 +2343,15 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		if (!nr_full_windows) {
		/* A full window hasn't elapsed, account partial
		* contribution to previous completed window. */
		delta = scale_exec_time(window_start - mark_start, rq);
		delta = scale_exec_time(window_start - mark_start, rq,
		cc);
		if (!exiting_task(p))
		p->ravg.prev_window += delta;
		} else {
		/* Since at least one full window has elapsed,
		* the contribution to the previous window is the
		* full window (window_size). */
		delta = scale_exec_time(window_size, rq);
		delta = scale_exec_time(window_size, rq, cc);
		if (!exiting_task(p))
		p->ravg.prev_window = delta;
		}
		@@ -2303,7 +2360,7 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		rq->nt_prev_runnable_sum += delta;

		/* Account piece of busy time in the current window. */
		delta = scale_exec_time(wallclock - window_start, rq);
		delta = scale_exec_time(wallclock - window_start, rq, cc);
		rq->curr_runnable_sum += delta;
		if (new_task)
		rq->nt_curr_runnable_sum += delta;
		@@ -2329,7 +2386,8 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		if (!nr_full_windows) {
		/* A full window hasn't elapsed, account partial
		* contribution to previous completed window. */
		delta = scale_exec_time(window_start - mark_start, rq);
		delta = scale_exec_time(window_start - mark_start, rq,
		cc);
		if (!is_idle_task(p) && !exiting_task(p))
		p->ravg.prev_window += delta;

		@@ -2342,7 +2400,7 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		/* Since at least one full window has elapsed,
		* the contribution to the previous window is the
		* full window (window_size). */
		delta = scale_exec_time(window_size, rq);
		delta = scale_exec_time(window_size, rq, cc);
		if (!is_idle_task(p) && !exiting_task(p))
		p->ravg.prev_window = delta;

		@@ -2360,7 +2418,7 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		rq->prev_runnable_sum = delta;

		/* Account piece of busy time in the current window. */
		delta = scale_exec_time(wallclock - window_start, rq);
		delta = scale_exec_time(wallclock - window_start, rq, cc);
		rq->curr_runnable_sum = delta;
		if (new_task)
		rq->nt_curr_runnable_sum = delta;
		@@ -2392,7 +2450,8 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum;
		rq->nt_curr_runnable_sum = 0;
		if (mark_start > window_start) {
		rq->curr_runnable_sum = scale_exec_time(irqtime, rq);
		rq->curr_runnable_sum = scale_exec_time(irqtime, rq,
		cc);
		return;
		}

		@@ -2401,12 +2460,12 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		delta = window_start - mark_start;
		if (delta > window_size)
		delta = window_size;
		delta = scale_exec_time(delta, rq);
		delta = scale_exec_time(delta, rq, cc);
		rq->prev_runnable_sum += delta;

		/* Process the remaining IRQ busy time in the current window. */
		delta = wallclock - window_start;
		rq->curr_runnable_sum = scale_exec_time(delta, rq);
		rq->curr_runnable_sum = scale_exec_time(delta, rq, cc);

		return;
		}
		@@ -2526,7 +2585,7 @@ update_task_pred_demand(struct rq rq, struct task_struct p, int event)
		}

		static inline void update_cpu_busy_time(struct task_struct p, struct rq rq,
		int event, u64 wallclock, u64 irqtime)
		int event, u64 wallclock, u64 irqtime, const struct cpu_cycle *cc)
		{
		}

		@@ -2539,6 +2598,41 @@ static inline u32 predict_and_update_buckets(struct rq *rq,

		#endif /* CONFIG_SCHED_FREQ_INPUT */

		static void update_task_cpu_cycles(struct task_struct *p, int cpu)
		{
		if (use_cycle_counter)
		p->cpu_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
		}

		static struct cpu_cycle
		get_task_cpu_cycles(struct task_struct p, struct rq rq, int event,
		u64 wallclock)
		{
		u64 cur_cycles;
		struct cpu_cycle cc;
		int cpu = cpu_of(rq);

		if (!use_cycle_counter) {
		cc.cycles = cpu_cur_freq(cpu);
		cc.time = 1;
		return cc;
		}

		cur_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
		if (unlikely(cur_cycles < p->cpu_cycles))
		cc.cycles = cur_cycles + (U64_MAX - p->cpu_cycles);
		else
		cc.cycles = cur_cycles - p->cpu_cycles;
		cc.time = wallclock - p->ravg.mark_start;
		BUG_ON((s64)cc.time < 0);

		p->cpu_cycles = cur_cycles;

		trace_sched_get_task_cpu_cycles(cpu, event, cc.cycles, cc.time);

		return cc;
		}

		static int account_busy_for_task_demand(struct task_struct *p, int event)
		{
		/* No need to bother updating task demand for exiting tasks
		@@ -2625,9 +2719,9 @@ done:
		}

		static void add_to_task_demand(struct rq rq, struct task_struct p,
		u64 delta)
		u64 delta, const struct cpu_cycle *cc)
		{
		delta = scale_exec_time(delta, rq);
		delta = scale_exec_time(delta, rq, cc);
		p->ravg.sum += delta;
		if (unlikely(p->ravg.sum > sched_ravg_window))
		p->ravg.sum = sched_ravg_window;
		@@ -2684,7 +2778,8 @@ static void add_to_task_demand(struct rq rq, struct task_struct p,
		* depends on it!
		*/
		static void update_task_demand(struct task_struct p, struct rq rq,
		int event, u64 wallclock)
		int event, u64 wallclock,
		const struct cpu_cycle *cc)
		{
		u64 mark_start = p->ravg.mark_start;
		u64 delta, window_start = rq->window_start;
		@@ -2707,7 +2802,7 @@ static void update_task_demand(struct task_struct p, struct rq rq,
		if (!new_window) {
		/* The simple case - busy time contained within the existing
		* window. */
		add_to_task_demand(rq, p, wallclock - mark_start);
		add_to_task_demand(rq, p, wallclock - mark_start, cc);
		return;
		}

		@@ -2718,12 +2813,12 @@ static void update_task_demand(struct task_struct p, struct rq rq,
		window_start -= (u64)nr_full_windows * (u64)window_size;

		/* Process (window_start - mark_start) first */
		add_to_task_demand(rq, p, window_start - mark_start);
		add_to_task_demand(rq, p, window_start - mark_start, cc);

		/* Push new sample(s) into task's demand history */
		update_history(rq, p, p->ravg.sum, 1, event);
		if (nr_full_windows)
		update_history(rq, p, scale_exec_time(window_size, rq),
		update_history(rq, p, scale_exec_time(window_size, rq, cc),
		nr_full_windows, event);

		/* Roll window_start back to current to process any remainder
		@@ -2732,30 +2827,39 @@ static void update_task_demand(struct task_struct p, struct rq rq,

		/* Process (wallclock - window_start) next */
		mark_start = window_start;
		add_to_task_demand(rq, p, wallclock - mark_start);
		add_to_task_demand(rq, p, wallclock - mark_start, cc);
		}

		/* Reflect task activity on its demand and cpu's busy time statistics */
		static void update_task_ravg(struct task_struct p, struct rq rq,
		int event, u64 wallclock, u64 irqtime)
		static struct cpu_cycle
		update_task_ravg(struct task_struct p, struct rq rq, int event,
		u64 wallclock, u64 irqtime)
		{
		struct cpu_cycle cc = { .cycles = SCHED_MIN_FREQ, .time = 1 };

		if (sched_use_pelt \|\| !rq->window_start \|\| sched_disable_window_stats)
		return;
		return cc;

		lockdep_assert_held(&rq->lock);

		update_window_start(rq, wallclock);

		if (!p->ravg.mark_start)
		if (!p->ravg.mark_start) {
		update_task_cpu_cycles(p, cpu_of(rq));
		goto done;
		}

		update_task_demand(p, rq, event, wallclock);
		update_cpu_busy_time(p, rq, event, wallclock, irqtime);
		cc = get_task_cpu_cycles(p, rq, event, wallclock);
		update_task_demand(p, rq, event, wallclock, &cc);
		update_cpu_busy_time(p, rq, event, wallclock, irqtime, &cc);
		update_task_pred_demand(rq, p, event);
		done:
		trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime);
		trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime,
		cc.cycles, cc.time);

		p->ravg.mark_start = wallclock;

		return cc;
		}

		void sched_account_irqtime(int cpu, struct task_struct *curr,
		@@ -2824,6 +2928,7 @@ static inline void mark_task_starting(struct task_struct *p)
		p->ravg.mark_start = p->last_wake_ts = wallclock;
		p->last_cpu_selected_ts = wallclock;
		p->last_switch_out_ts = 0;
		update_task_cpu_cycles(p, cpu_of(rq));
		}

		static inline void set_window_start(struct rq *rq)
		@@ -3041,6 +3146,7 @@ void sched_get_cpus_busy(struct sched_load *busy,
		int early_detection[cpus];
		int cpu, i = 0;
		unsigned int window_size;
		struct cpu_cycle cc;

		if (unlikely(cpus == 0))
		return;
		@@ -3059,8 +3165,10 @@ void sched_get_cpus_busy(struct sched_load *busy,
		for_each_cpu(cpu, query_cpus) {
		rq = cpu_rq(cpu);

		update_task_ravg(rq->curr, rq, TASK_UPDATE,
		cc = update_task_ravg(rq->curr, rq, TASK_UPDATE,
		sched_ktime_clock(), 0);
		cur_freq[i] = cpu_cycles_to_freq(i, cc.cycles, cc.time);

		load[i] = rq->old_busy_time = rq->prev_runnable_sum;
		nload[i] = rq->nt_prev_runnable_sum;
		pload[i] = rq->hmp_stats.pred_demands_sum;
		@@ -3078,7 +3186,6 @@ void sched_get_cpus_busy(struct sched_load *busy,
		notifier_sent[i] = rq->notifier_sent;
		early_detection[i] = (rq->ed_task != NULL);
		rq->notifier_sent = 0;
		cur_freq[i] = cpu_cur_freq(cpu);
		max_freq[i] = cpu_max_freq(cpu);
		i++;
		}
		@@ -3224,6 +3331,8 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
		update_task_ravg(p, task_rq(p), TASK_MIGRATE,
		wallclock, 0);

		update_task_cpu_cycles(p, new_cpu);

		new_task = is_new_task(p);

		if (p->ravg.curr_window) {
		@@ -3532,6 +3641,53 @@ unsigned int sched_get_group_id(struct task_struct *p)
		return group_id;
		}

		static void update_cpu_cluster_capacity(const cpumask_t *cpus)
		{
		int i;
		struct sched_cluster *cluster;
		struct cpumask cpumask;

		cpumask_copy(&cpumask, cpus);
		pre_big_task_count_change(cpu_possible_mask);

		for_each_cpu(i, &cpumask) {
		cluster = cpu_rq(i)->cluster;
		cpumask_andnot(&cpumask, &cpumask, &cluster->cpus);

		cluster->capacity = compute_capacity(cluster);
		cluster->load_scale_factor = compute_load_scale_factor(cluster);

		/* 'cpus' can contain cpumask more than one cluster */
		check_for_up_down_migrate_update(&cluster->cpus);
		}

		__update_min_max_capacity();

		post_big_task_count_change(cpu_possible_mask);
		}

		void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax)
		{
		struct cpumask cpumask;
		struct sched_cluster *cluster;
		unsigned int orig_max_freq;
		int i, update_capacity = 0;

		cpumask_copy(&cpumask, cpus);
		for_each_cpu(i, &cpumask) {
		cluster = cpu_rq(i)->cluster;
		cpumask_andnot(&cpumask, &cpumask, &cluster->cpus);

		orig_max_freq = cpu_max_freq(i);
		cluster->max_mitigated_freq = fmax;

		update_capacity += (orig_max_freq != cpu_max_freq(i));
		}

		if (update_capacity)
		update_cpu_cluster_capacity(cpus);
		}

		static int cpufreq_notifier_policy(struct notifier_block *nb,
		unsigned long val, void *data)
		{
		@@ -3562,7 +3718,7 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
		cpumask_andnot(&policy_cluster, &policy_cluster,
		&cluster->cpus);

		orig_max_freq = cluster->max_freq;
		orig_max_freq = cpu_max_freq(i);
		cluster->min_freq = policy->min;
		cluster->max_freq = policy->max;
		cluster->cur_freq = policy->cur;
		@@ -3579,31 +3735,16 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,

		sort_clusters();
		update_all_clusters_stats();
		update_cpu_cycle_max_possible_freq(cluster);
		mutex_unlock(&cluster_lock);
		continue;
		}

		update_capacity += (orig_max_freq != policy->max);
		}

		if (!update_capacity)
		return 0;

		policy_cluster = *policy->related_cpus;
		pre_big_task_count_change(cpu_possible_mask);

		for_each_cpu(i, &policy_cluster) {
		cluster = cpu_rq(i)->cluster;
		cpumask_andnot(&policy_cluster, &policy_cluster,
		&cluster->cpus);
		cluster->capacity = compute_capacity(cluster);
		cluster->load_scale_factor = compute_load_scale_factor(cluster);
		update_capacity += (orig_max_freq != cpu_max_freq(i));
		}

		__update_min_max_capacity();

		check_for_up_down_migrate_update(policy->related_cpus);
		post_big_task_count_change(cpu_possible_mask);
		if (update_capacity)
		update_cpu_cluster_capacity(policy->related_cpus);

		return 0;
		}
		@@ -3733,10 +3874,16 @@ heavy_task_wakeup(struct task_struct p, struct rq rq, int event)
		return 0;
		}

		static inline void
		static struct cpu_cycle
		update_task_ravg(struct task_struct p, struct rq rq,
		int event, u64 wallclock, u64 irqtime)
		{
		static const struct cpu_cycle cc = {
		.cycles = SCHED_MIN_FREQ,
		.time = 1
		};

		return cc;
		}

		static inline void mark_task_starting(struct task_struct *p) {}

kernel/sched/fair.c

+91 −13

Original line number	Diff line number	Diff line
		@@ -2911,9 +2911,11 @@ struct cpu_select_env {
		};

		struct cluster_cpu_stats {
		int best_idle_cpu, best_capacity_cpu, best_cpu, best_sibling_cpu;
		int best_idle_cpu, least_loaded_cpu;
		int best_capacity_cpu, best_cpu, best_sibling_cpu;
		int min_cost, best_sibling_cpu_cost;
		u64 min_load, best_sibling_cpu_load;
		int best_cpu_cstate;
		u64 min_load, best_load, best_sibling_cpu_load;
		s64 highest_spare_capacity;
		};

		@@ -3146,22 +3148,79 @@ next_best_cluster(struct sched_cluster cluster, struct cpu_select_env env,
		return next;
		}

		static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
		struct cpu_select_env *env)
		#ifdef CONFIG_SCHED_HMP_CSTATE_AWARE
		static void __update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
		struct cpu_select_env *env, int cpu_cost)
		{
		int cpu_cost;
		int cpu_cstate;
		int prev_cpu = env->prev_cpu;

		cpu_cost = power_cost(cpu, task_load(env->p) +
		cpu_cravg_sync(cpu, env->sync));
		if (cpu_cost > stats->min_cost)
		cpu_cstate = cpu_rq(cpu)->cstate;

		if (env->need_idle) {
		stats->min_cost = cpu_cost;
		if (idle_cpu(cpu)) {
		if (cpu_cstate < stats->best_cpu_cstate \|\|
		(cpu_cstate == stats->best_cpu_cstate &&
		cpu == prev_cpu)) {
		stats->best_idle_cpu = cpu;
		stats->best_cpu_cstate = cpu_cstate;
		}
		} else {
		if (env->cpu_load < stats->min_load \|\|
		(env->cpu_load == stats->min_load &&
		cpu == prev_cpu)) {
		stats->least_loaded_cpu = cpu;
		stats->min_load = env->cpu_load;
		}
		}

		return;
		}

		if (cpu_cost < stats->min_cost) {
		stats->min_cost = cpu_cost;
		stats->best_cpu_cstate = cpu_cstate;
		stats->best_load = env->cpu_load;
		stats->best_cpu = cpu;
		return;
		}

		/* CPU cost is the same. Start breaking the tie by C-state */

		if (cpu_cstate > stats->best_cpu_cstate)
		return;

		if (cpu_cstate < stats->best_cpu_cstate) {
		stats->best_cpu_cstate = cpu_cstate;
		stats->best_load = env->cpu_load;
		stats->best_cpu = cpu;
		return;
		}

		/* C-state is the same. Use prev CPU to break the tie */
		if (cpu == prev_cpu) {
		stats->best_cpu = cpu;
		return;
		}

		if (stats->best_cpu != prev_cpu &&
		((cpu_cstate == 0 && env->cpu_load < stats->best_load) \|\|
		(cpu_cstate > 0 && env->cpu_load > stats->best_load))) {
		stats->best_load = env->cpu_load;
		stats->best_cpu = cpu;
		}
		}
		#else /* CONFIG_SCHED_HMP_CSTATE_AWARE */
		static void __update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
		struct cpu_select_env *env, int cpu_cost)
		{
		int prev_cpu = env->prev_cpu;

		if (cpu != prev_cpu && cpus_share_cache(prev_cpu, cpu)) {
		if (stats->best_sibling_cpu_cost > cpu_cost \|\|
		(stats->best_sibling_cpu_cost == cpu_cost &&
		stats->best_sibling_cpu_load > env->cpu_load)) {

		stats->best_sibling_cpu_cost = cpu_cost;
		stats->best_sibling_cpu_load = env->cpu_load;
		stats->best_sibling_cpu = cpu;
		@@ -3169,8 +3228,8 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
		}

		if ((cpu_cost < stats->min_cost) \|\|
		((stats->best_cpu != prev_cpu && stats->min_load > env->cpu_load) \|\|
		cpu == prev_cpu)) {
		((stats->best_cpu != prev_cpu &&
		stats->min_load > env->cpu_load) \|\| cpu == prev_cpu)) {
		if (env->need_idle) {
		if (idle_cpu(cpu)) {
		stats->min_cost = cpu_cost;
		@@ -3183,6 +3242,18 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
		}
		}
		}
		#endif

		static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
		struct cpu_select_env *env)
		{
		int cpu_cost;

		cpu_cost = power_cost(cpu, task_load(env->p) +
		cpu_cravg_sync(cpu, env->sync));
		if (cpu_cost <= stats->min_cost)
		__update_cluster_stats(cpu, stats, env, cpu_cost);
		}

		static void find_best_cpu_in_cluster(struct sched_cluster *c,
		struct cpu_select_env env, struct cluster_cpu_stats stats)
		@@ -3224,6 +3295,9 @@ static inline void init_cluster_cpu_stats(struct cluster_cpu_stats *stats)
		stats->min_cost = stats->best_sibling_cpu_cost = INT_MAX;
		stats->min_load = stats->best_sibling_cpu_load = ULLONG_MAX;
		stats->highest_spare_capacity = 0;
		stats->least_loaded_cpu = -1;
		stats->best_cpu_cstate = INT_MAX;
		/* No need to initialize stats->best_load */
		}

		/*
		@@ -3390,8 +3464,11 @@ retry:

		} while ((cluster = next_best_cluster(cluster, &env, &stats)));

		if (stats.best_idle_cpu >= 0) {
		if (env.need_idle) {
		if (stats.best_idle_cpu >= 0)
		target = stats.best_idle_cpu;
		else if (stats.least_loaded_cpu >= 0)
		target = stats.least_loaded_cpu;
		} else if (stats.best_cpu >= 0) {
		if (stats.best_cpu != task_cpu(p) &&
		stats.min_cost == stats.best_sibling_cpu_cost)
		@@ -4076,6 +4153,7 @@ void init_new_task_load(struct task_struct *p)

		p->init_load_pct = 0;
		memset(&p->ravg, 0, sizeof(struct ravg));
		p->cpu_cycles = 0;
		p->se.avg.decay_count = 0;
		rcu_assign_pointer(p->grp, NULL);
		INIT_LIST_HEAD(&p->grp_list);