sched: Add separate load tracking histogram to predict loads (6e35cb2f) · Commits · e / devices / android_kernel_xiaomi_markw

include/linux/sched.h

+9 −0

Original line number	Diff line number	Diff line
		@@ -1137,6 +1137,7 @@ struct sched_statistics {
		#endif

		#define RAVG_HIST_SIZE_MAX 5
		#define NUM_BUSY_BUCKETS 10

		/* ravg represents frequency scaled cpu-demand of tasks */
		struct ravg {
		@@ -1161,6 +1162,11 @@ struct ravg {
		*
		* 'prev_window' represents task's contribution to cpu busy time
		* statistics (rq->prev_runnable_sum) in previous window
		*
		* 'pred_demand' represents task's current predicted cpu busy time
		*
		* 'busy_buckets' groups historical busy time into different buckets
		* used for prediction
		*/
		u64 mark_start;
		u32 sum, demand;
		@@ -1168,6 +1174,8 @@ struct ravg {
		#ifdef CONFIG_SCHED_FREQ_INPUT
		u32 curr_window, prev_window;
		u16 active_windows;
		u32 pred_demand;
		u8 busy_buckets[NUM_BUSY_BUCKETS];
		#endif
		};

		@@ -1972,6 +1980,7 @@ extern int task_free_unregister(struct notifier_block *n);
		struct sched_load {
		unsigned long prev_load;
		unsigned long new_task_load;
		unsigned long predicted_load;
		};

		#if defined(CONFIG_SCHED_FREQ_INPUT)

include/linux/sched/sysctl.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -77,6 +77,7 @@ extern unsigned int sysctl_sched_enable_colocation;
		extern unsigned int sysctl_sched_restrict_cluster_spill;
		#if defined(CONFIG_SCHED_FREQ_INPUT)
		extern unsigned int sysctl_sched_new_task_windows;
		extern unsigned int sysctl_sched_pred_alert_freq;
		#endif
		#endif

include/trace/events/sched.h

+92 −14

Original line number	Diff line number	Diff line
		@@ -77,6 +77,9 @@ TRACE_EVENT(sched_enq_deq_task,
		__field(unsigned int, cpus_allowed )
		#ifdef CONFIG_SCHED_HMP
		__field(unsigned int, demand )
		#ifdef CONFIG_SCHED_FREQ_INPUT
		__field(unsigned int, pred_demand )
		#endif
		#endif
		),

		@@ -92,12 +95,18 @@ TRACE_EVENT(sched_enq_deq_task,
		__entry->cpus_allowed = cpus_allowed;
		#ifdef CONFIG_SCHED_HMP
		__entry->demand = p->ravg.demand;
		#ifdef CONFIG_SCHED_FREQ_INPUT
		__entry->pred_demand = p->ravg.pred_demand;
		#endif
		#endif
		),

		TP_printk("cpu=%d %s comm=%s pid=%d prio=%d nr_running=%u cpu_load=%lu rt_nr_running=%u affine=%x"
		#ifdef CONFIG_SCHED_HMP
		" demand=%u"
		#ifdef CONFIG_SCHED_FREQ_INPUT
		" pred_demand=%u"
		#endif
		#endif
		, __entry->cpu,
		__entry->enqueue ? "enqueue" : "dequeue",
		@@ -106,6 +115,9 @@ TRACE_EVENT(sched_enq_deq_task,
		__entry->cpu_load, __entry->rt_nr_running, __entry->cpus_allowed
		#ifdef CONFIG_SCHED_HMP
		, __entry->demand
		#ifdef CONFIG_SCHED_FREQ_INPUT
		, __entry->pred_demand
		#endif
		#endif
		)
		);
		@@ -282,6 +294,7 @@ TRACE_EVENT(sched_update_task_ravg,
		__field(unsigned int, sum )
		__field( int, cpu )
		#ifdef CONFIG_SCHED_FREQ_INPUT
		__field(unsigned int, pred_demand )
		__field( u64, cs )
		__field( u64, ps )
		__field( u32, curr_window )
		@@ -308,6 +321,7 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->sum = p->ravg.sum;
		__entry->irqtime = irqtime;
		#ifdef CONFIG_SCHED_FREQ_INPUT
		__entry->pred_demand = p->ravg.pred_demand;
		__entry->cs = rq->curr_runnable_sum;
		__entry->ps = rq->prev_runnable_sum;
		__entry->curr_window = p->ravg.curr_window;
		@@ -320,7 +334,7 @@ TRACE_EVENT(sched_update_task_ravg,

		TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
		#ifdef CONFIG_SCHED_FREQ_INPUT
		" cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
		" pred_demand %u cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
		#endif
		, __entry->wallclock, __entry->win_start, __entry->delta,
		task_event_names[__entry->evt], __entry->cpu,
		@@ -329,8 +343,8 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->delta_m, __entry->demand,
		__entry->sum, __entry->irqtime
		#ifdef CONFIG_SCHED_FREQ_INPUT
		, __entry->cs, __entry->ps, __entry->curr_window,
		__entry->prev_window,
		, __entry->pred_demand, __entry->cs, __entry->ps,
		__entry->curr_window, __entry->prev_window,
		__entry->nt_cs, __entry->nt_ps,
		__entry->active_windows
		#endif
		@@ -351,6 +365,9 @@ TRACE_EVENT(sched_update_history,
		__field( int, samples )
		__field(enum task_event, evt )
		__field(unsigned int, demand )
		#ifdef CONFIG_SCHED_FREQ_INPUT
		__field(unsigned int, pred_demand )
		#endif
		__array( u32, hist, RAVG_HIST_SIZE_MAX)
		__field(unsigned int, nr_big_tasks )
		__field( int, cpu )
		@@ -363,18 +380,29 @@ TRACE_EVENT(sched_update_history,
		__entry->samples = samples;
		__entry->evt = evt;
		__entry->demand = p->ravg.demand;
		#ifdef CONFIG_SCHED_FREQ_INPUT
		__entry->pred_demand = p->ravg.pred_demand;
		#endif
		memcpy(__entry->hist, p->ravg.sum_history,
		RAVG_HIST_SIZE_MAX * sizeof(u32));
		__entry->nr_big_tasks = rq->hmp_stats.nr_big_tasks;
		__entry->cpu = rq->cpu;
		),

		TP_printk("%d (%s): runtime %u samples %d event %s demand %u (hist: %u %u %u %u %u) cpu %d nr_big %u",
		TP_printk("%d (%s): runtime %u samples %d event %s demand %u"
		#ifdef CONFIG_SCHED_FREQ_INPUT
		" pred_demand %u"
		#endif
		" (hist: %u %u %u %u %u) cpu %d nr_big %u",
		__entry->pid, __entry->comm,
		__entry->runtime, __entry->samples,
		task_event_names[__entry->evt],
		__entry->demand, __entry->hist[0],
		__entry->hist[1], __entry->hist[2], __entry->hist[3],
		__entry->demand,
		#ifdef CONFIG_SCHED_FREQ_INPUT
		__entry->pred_demand,
		#endif
		__entry->hist[0], __entry->hist[1],
		__entry->hist[2], __entry->hist[3],
		__entry->hist[4], __entry->cpu, __entry->nr_big_tasks)
		);

		@@ -413,6 +441,43 @@ TRACE_EVENT(sched_reset_all_window_stats,

		#ifdef CONFIG_SCHED_FREQ_INPUT

		TRACE_EVENT(sched_update_pred_demand,

		TP_PROTO(struct rq rq, struct task_struct p, u32 runtime, int pct,
		unsigned int pred_demand),

		TP_ARGS(rq, p, runtime, pct, pred_demand),

		TP_STRUCT__entry(
		__array( char, comm, TASK_COMM_LEN )
		__field( pid_t, pid )
		__field(unsigned int, runtime )
		__field( int, pct )
		__field(unsigned int, pred_demand )
		__array( u8, bucket, NUM_BUSY_BUCKETS)
		__field( int, cpu )
		),

		TP_fast_assign(
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
		__entry->pid = p->pid;
		__entry->runtime = runtime;
		__entry->pct = pct;
		__entry->pred_demand = pred_demand;
		memcpy(__entry->bucket, p->ravg.busy_buckets,
		NUM_BUSY_BUCKETS * sizeof(u8));
		__entry->cpu = rq->cpu;
		),

		TP_printk("%d (%s): runtime %u pct %d cpu %d pred_demand %u (buckets: %u %u %u %u %u %u %u %u %u %u)",
		__entry->pid, __entry->comm,
		__entry->runtime, __entry->pct, __entry->cpu,
		__entry->pred_demand, __entry->bucket[0], __entry->bucket[1],
		__entry->bucket[2], __entry->bucket[3],__entry->bucket[4],
		__entry->bucket[5], __entry->bucket[6], __entry->bucket[7],
		__entry->bucket[8], __entry->bucket[9])
		);

		TRACE_EVENT(sched_migration_update_sum,

		TP_PROTO(struct rq rq, struct task_struct p),
		@@ -444,14 +509,15 @@ TRACE_EVENT(sched_migration_update_sum,

		TRACE_EVENT(sched_get_busy,

		TP_PROTO(int cpu, u64 load, u64 nload, int early),
		TP_PROTO(int cpu, u64 load, u64 nload, u64 pload, int early),

		TP_ARGS(cpu, load, nload, early),
		TP_ARGS(cpu, load, nload, pload, early),

		TP_STRUCT__entry(
		__field( int, cpu )
		__field( u64, load )
		__field( u64, nload )
		__field( u64, pload )
		__field( int, early )
		),

		@@ -459,33 +525,45 @@ TRACE_EVENT(sched_get_busy,
		__entry->cpu = cpu;
		__entry->load = load;
		__entry->nload = nload;
		__entry->pload = pload;
		__entry->early = early;
		),

		TP_printk("cpu %d load %lld new_task_load %lld early %d",
		__entry->cpu, __entry->load, __entry->nload, __entry->early)
		TP_printk("cpu %d load %lld new_task_load %lld predicted_load %lld early %d",
		__entry->cpu, __entry->load, __entry->nload,
		__entry->pload, __entry->early)
		);

		TRACE_EVENT(sched_freq_alert,

		TP_PROTO(int cpu, u64 old_load, u64 new_load),
		TP_PROTO(int cpu, int pd_notif, u64 old_load, u64 new_load,
		u64 old_pred, u64 new_pred),

		TP_ARGS(cpu, old_load, new_load),
		TP_ARGS(cpu, pd_notif, old_load, new_load, old_pred, new_pred),

		TP_STRUCT__entry(
		__field( int, cpu )
		__field( int, pd_notif )
		__field( u64, old_load )
		__field( u64, new_load )
		__field( u64, old_pred )
		__field( u64, new_pred )
		),

		TP_fast_assign(
		__entry->cpu = cpu;
		__entry->pd_notif = pd_notif;
		__entry->old_load = old_load;
		__entry->new_load = new_load;
		__entry->old_pred = old_pred;
		__entry->new_pred = new_pred;
		),

		TP_printk("cpu %d old_load=%llu new_load=%llu",
		__entry->cpu, __entry->old_load, __entry->new_load)
		TP_printk("cpu %d pd_notif=%d old_load=%llu new_load=%llu "
		"old_pred=%llu new_pred=%llu",
		__entry->cpu, __entry->pd_notif, __entry->old_load,
		__entry->new_load, __entry->old_pred,
		__entry->new_pred)
		);

		#endif /* CONFIG_SCHED_FREQ_INPUT */

kernel/sched/core.c

+313 −25

Original line number	Diff line number	Diff line
		@@ -1656,8 +1656,6 @@ static __read_mostly unsigned int sched_window_stats_policy =
		__read_mostly unsigned int sysctl_sched_window_stats_policy =
		WINDOW_STATS_MAX_RECENT_AVG;

		__read_mostly unsigned int sysctl_sched_new_task_windows = 5;

		static __read_mostly unsigned int sched_account_wait_time = 1;
		__read_mostly unsigned int sysctl_sched_account_wait_time = 1;

		@@ -1667,6 +1665,8 @@ unsigned int __read_mostly sysctl_sched_enable_colocation = 1;

		#ifdef CONFIG_SCHED_FREQ_INPUT

		__read_mostly unsigned int sysctl_sched_new_task_windows = 5;

		static __read_mostly unsigned int sched_migration_fixup = 1;
		__read_mostly unsigned int sysctl_sched_migration_fixup = 1;

		@@ -1686,6 +1686,9 @@ __read_mostly int sysctl_sched_freq_inc_notify = 10 * 1024 * 1024; /* + 10GHz */
		__read_mostly int sysctl_sched_freq_dec_notify = 10 * 1024 * 1024; /* - 10GHz */

		static __read_mostly unsigned int sched_io_is_busy;

		__read_mostly unsigned int sysctl_sched_pred_alert_freq = 10 * 1024 * 1024;

		#endif /* CONFIG_SCHED_FREQ_INPUT */

		/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
		@@ -1723,6 +1726,15 @@ __read_mostly unsigned int sched_ravg_window = 10000000;
		/* Temporarily disable window-stats activity on all cpus */
		unsigned int __read_mostly sched_disable_window_stats;

		/*
		* Major task runtime. If a task runs for more than sched_major_task_runtime
		* in a window, it's considered to be generating majority of workload
		* for this window. Prediction could be adjusted for such tasks.
		*/
		#ifdef CONFIG_SCHED_FREQ_INPUT
		__read_mostly unsigned int sched_major_task_runtime = 10000000;
		#endif

		static unsigned int sync_cpu;

		#define EXITING_TASK_MARKER 0xdeaddead
		@@ -1820,7 +1832,7 @@ static inline unsigned int load_to_freq(struct rq *rq, u64 load)
		}

		/* Should scheduler alert governor for changing frequency? */
		static int send_notification(struct rq *rq)
		static int send_notification(struct rq *rq, int check_pred)
		{
		unsigned int cur_freq, freq_required;
		unsigned long flags;
		@@ -1829,11 +1841,29 @@ static int send_notification(struct rq *rq)
		if (!sched_enable_hmp)
		return 0;

		if (check_pred) {
		u64 prev = rq->old_busy_time;
		u64 predicted = rq->hmp_stats.pred_demands_sum;

		if (rq->cluster->cur_freq == rq->cluster->max_freq)
		return 0;

		prev = max(prev, rq->old_estimated_time);
		if (prev > predicted)
		return 0;

		cur_freq = load_to_freq(rq, prev);
		freq_required = load_to_freq(rq, predicted);

		if (freq_required < cur_freq + sysctl_sched_pred_alert_freq)
		return 0;
		} else {
		cur_freq = load_to_freq(rq, rq->old_busy_time);
		freq_required = load_to_freq(rq, rq->prev_runnable_sum);

		if (nearly_same_freq(cur_freq, freq_required))
		return 0;
		}

		raw_spin_lock_irqsave(&rq->lock, flags);
		if (!rq->notifier_sent) {
		@@ -1846,14 +1876,16 @@ static int send_notification(struct rq *rq)
		}

		/* Alert governor if there is a need to change frequency */
		void check_for_freq_change(struct rq *rq)
		void check_for_freq_change(struct rq *rq, bool check_pred)
		{
		int cpu = cpu_of(rq);

		if (!send_notification(rq))
		if (!send_notification(rq, check_pred))
		return;

		trace_sched_freq_alert(cpu, rq->old_busy_time, rq->prev_runnable_sum);
		trace_sched_freq_alert(cpu, check_pred, rq->old_busy_time,
		rq->prev_runnable_sum, rq->old_estimated_time,
		rq->hmp_stats.pred_demands_sum);

		atomic_notifier_call_chain(
		&load_alert_notifier_head, 0,
		@@ -1904,6 +1936,223 @@ static inline bool is_new_task(struct task_struct *p)
		return p->ravg.active_windows < sysctl_sched_new_task_windows;
		}

		#define INC_STEP 8
		#define DEC_STEP 2
		#define CONSISTENT_THRES 16
		#define INC_STEP_BIG 16
		/*
		* bucket_increase - update the count of all buckets
		*
		* @buckets: array of buckets tracking busy time of a task
		* @idx: the index of bucket to be incremented
		*
		* Each time a complete window finishes, count of bucket that runtime
		* falls in (@idx) is incremented. Counts of all other buckets are
		* decayed. The rate of increase and decay could be different based
		* on current count in the bucket.
		*/
		static inline void bucket_increase(u8 *buckets, int idx)
		{
		int i, step;

		for (i = 0; i < NUM_BUSY_BUCKETS; i++) {
		if (idx != i) {
		if (buckets[i] > DEC_STEP)
		buckets[i] -= DEC_STEP;
		else
		buckets[i] = 0;
		} else {
		step = buckets[i] >= CONSISTENT_THRES ?
		INC_STEP_BIG : INC_STEP;
		if (buckets[i] > U8_MAX - step)
		buckets[i] = U8_MAX;
		else
		buckets[i] += step;
		}
		}
		}

		static inline int busy_to_bucket(u32 normalized_rt)
		{
		int bidx;

		bidx = mult_frac(normalized_rt, NUM_BUSY_BUCKETS, max_task_load());
		bidx = min(bidx, NUM_BUSY_BUCKETS - 1);

		/*
		* Combine lowest two buckets. The lowest frequency falls into
		* 2nd bucket and thus keep predicting lowest bucket is not
		* useful.
		*/
		if (!bidx)
		bidx++;

		return bidx;
		}

		static inline u64
		scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
		{
		return div64_u64(load * (u64)src_freq, (u64)dst_freq);
		}

		#define HEAVY_TASK_SKIP 2
		#define HEAVY_TASK_SKIP_LIMIT 4
		/*
		* get_pred_busy - calculate predicted demand for a task on runqueue
		*
		* @rq: runqueue of task p
		* @p: task whose prediction is being updated
		* @start: starting bucket. returned prediction should not be lower than
		* this bucket.
		* @runtime: runtime of the task. returned prediction should not be lower
		* than this runtime.
		* Note: @start can be derived from @runtime. It's passed in only to
		* avoid duplicated calculation in some cases.
		*
		* A new predicted busy time is returned for task @p based on @runtime
		* passed in. The function searches through buckets that represent busy
		* time equal to or bigger than @runtime and attempts to find the bucket to
		* to use for prediction. Once found, it searches through historical busy
		* time and returns the latest that falls into the bucket. If no such busy
		* time exists, it returns the medium of that bucket.
		*/
		static u32 get_pred_busy(struct rq rq, struct task_struct p,
		int start, u32 runtime)
		{
		int i;
		u8 *buckets = p->ravg.busy_buckets;
		u32 *hist = p->ravg.sum_history;
		u32 dmin, dmax;
		u64 cur_freq_runtime = 0;
		int first = NUM_BUSY_BUCKETS, final, skip_to;
		u32 ret = runtime;

		/* skip prediction for new tasks due to lack of history */
		if (unlikely(is_new_task(p)))
		goto out;

		/* find minimal bucket index to pick */
		for (i = start; i < NUM_BUSY_BUCKETS; i++) {
		if (buckets[i]) {
		first = i;
		break;
		}
		}
		/* if no higher buckets are filled, predict runtime */
		if (first >= NUM_BUSY_BUCKETS)
		goto out;

		/* compute the bucket for prediction */
		final = first;
		if (first < HEAVY_TASK_SKIP_LIMIT) {
		/* compute runtime at current CPU frequency */
		cur_freq_runtime = mult_frac(runtime, max_possible_efficiency,
		rq->cluster->efficiency);
		cur_freq_runtime = scale_load_to_freq(cur_freq_runtime,
		max_possible_freq, rq->cluster->cur_freq);
		/*
		* if the task runs for majority of the window, try to
		* pick higher buckets.
		*/
		if (cur_freq_runtime >= sched_major_task_runtime) {
		int next = NUM_BUSY_BUCKETS;
		/*
		* if there is a higher bucket that's consistently
		* hit, don't jump beyond that.
		*/
		for (i = start + 1; i <= HEAVY_TASK_SKIP_LIMIT &&
		i < NUM_BUSY_BUCKETS; i++) {
		if (buckets[i] > CONSISTENT_THRES) {
		next = i;
		break;
		}
		}
		skip_to = min(next, start + HEAVY_TASK_SKIP);
		/* don't jump beyond HEAVY_TASK_SKIP_LIMIT */
		skip_to = min(HEAVY_TASK_SKIP_LIMIT, skip_to);
		/* don't go below first non-empty bucket, if any */
		final = max(first, skip_to);
		}
		}

		/* determine demand range for the predicted bucket */
		if (final < 2) {
		/* lowest two buckets are combined */
		dmin = 0;
		final = 1;
		} else {
		dmin = mult_frac(final, max_task_load(), NUM_BUSY_BUCKETS);
		}
		dmax = mult_frac(final + 1, max_task_load(), NUM_BUSY_BUCKETS);

		/*
		* search through runtime history and return first runtime that falls
		* into the range of predicted bucket.
		*/
		for (i = 0; i < sched_ravg_hist_size; i++) {
		if (hist[i] >= dmin && hist[i] < dmax) {
		ret = hist[i];
		break;
		}
		}
		/* no historical runtime within bucket found, use average of the bin */
		if (ret < dmin)
		ret = (dmin + dmax) / 2;
		/*
		* when updating in middle of a window, runtime could be higher
		* than all recorded history. Always predict at least runtime.
		*/
		ret = max(runtime, ret);
		out:
		trace_sched_update_pred_demand(rq, p, runtime,
		mult_frac((unsigned int)cur_freq_runtime, 100,
		sched_ravg_window), ret);
		return ret;
		}

		static inline u32 calc_pred_demand(struct rq rq, struct task_struct p)
		{
		if (p->ravg.pred_demand >= p->ravg.curr_window)
		return p->ravg.pred_demand;

		return get_pred_busy(rq, p, busy_to_bucket(p->ravg.curr_window),
		p->ravg.curr_window);
		}

		/*
		* predictive demand of a task is calculated at the window roll-over.
		* if the task current window busy time exceeds the predicted
		* demand, update it here to reflect the task needs.
		*/
		void update_task_pred_demand(struct rq rq, struct task_struct p, int event)
		{
		u32 new, old;

		if (is_idle_task(p) \|\| exiting_task(p))
		return;

		if (event != PUT_PREV_TASK && event != TASK_UPDATE &&
		(!sched_freq_account_wait_time \|\|
		(event != TASK_MIGRATE &&
		event != PICK_NEXT_TASK)))
		return;

		new = calc_pred_demand(rq, p);
		old = p->ravg.pred_demand;

		if (old >= new)
		return;

		if (task_on_rq_queued(p) && (!task_has_dl_policy(p) \|\|
		!p->dl.dl_throttled))
		p->sched_class->fixup_hmp_sched_stats(rq, p,
		p->ravg.demand,
		new);

		p->ravg.pred_demand = new;
		}

		/*
		* Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
		*/
		@@ -2239,13 +2488,40 @@ fail:
		spin_unlock_irqrestore(&freq_max_load_lock, flags);
		return ret;
		}

		static inline u32 predict_and_update_buckets(struct rq *rq,
		struct task_struct *p, u32 runtime) {

		int bidx;
		u32 pred_demand;

		bidx = busy_to_bucket(runtime);
		pred_demand = get_pred_busy(rq, p, bidx, runtime);
		bucket_increase(p->ravg.busy_buckets, bidx);

		return pred_demand;
		}
		#define assign_ravg_pred_demand(x) (p->ravg.pred_demand = x)

		#else /* CONFIG_SCHED_FREQ_INPUT */

		static inline void
		update_task_pred_demand(struct rq rq, struct task_struct p, int event)
		{
		}

		static inline void update_cpu_busy_time(struct task_struct p, struct rq rq,
		int event, u64 wallclock, u64 irqtime)
		{
		}

		static inline u32 predict_and_update_buckets(struct rq *rq,
		struct task_struct *p, u32 runtime)
		{
		return 0;
		}
		#define assign_ravg_pred_demand(x)

		#endif /* CONFIG_SCHED_FREQ_INPUT */

		static int account_busy_for_task_demand(struct task_struct *p, int event)
		@@ -2277,7 +2553,7 @@ static void update_history(struct rq rq, struct task_struct p,
		{
		u32 *hist = &p->ravg.sum_history[0];
		int ridx, widx;
		u32 max = 0, avg, demand;
		u32 max = 0, avg, demand, pred_demand;
		u64 sum = 0;

		/* Ignore windows where task had no activity */
		@@ -2314,6 +2590,7 @@ static void update_history(struct rq rq, struct task_struct p,
		else
		demand = max(avg, runtime);
		}
		pred_demand = predict_and_update_buckets(rq, p, runtime);

		/*
		* A throttled deadline sched class task gets dequeued without
		@@ -2322,9 +2599,11 @@ static void update_history(struct rq rq, struct task_struct p,
		*/
		if (task_on_rq_queued(p) && (!task_has_dl_policy(p) \|\|
		!p->dl.dl_throttled))
		p->sched_class->fixup_hmp_sched_stats(rq, p, demand);
		p->sched_class->fixup_hmp_sched_stats(rq, p, demand,
		pred_demand);

		p->ravg.demand = demand;
		assign_ravg_pred_demand(pred_demand);

		done:
		trace_sched_update_history(rq, p, runtime, samples, event);
		@@ -2457,7 +2736,7 @@ static void update_task_ravg(struct task_struct p, struct rq rq,

		update_task_demand(p, rq, event, wallclock);
		update_cpu_busy_time(p, rq, event, wallclock, irqtime);

		update_task_pred_demand(rq, p, event);
		done:
		trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime);

		@@ -2733,12 +3012,6 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)

		#ifdef CONFIG_SCHED_FREQ_INPUT

		static inline u64
		scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
		{
		return div64_u64(load * (u64)src_freq, (u64)dst_freq);
		}

		void sched_get_cpus_busy(struct sched_load *busy,
		const struct cpumask *query_cpus)
		{
		@@ -2746,6 +3019,7 @@ void sched_get_cpus_busy(struct sched_load *busy,
		struct rq *rq;
		const int cpus = cpumask_weight(query_cpus);
		u64 load[cpus], nload[cpus];
		u64 pload[cpus];
		unsigned int cur_freq[cpus], max_freq[cpus];
		int notifier_sent[cpus];
		int early_detection[cpus];
		@@ -2773,6 +3047,8 @@ void sched_get_cpus_busy(struct sched_load *busy,
		sched_ktime_clock(), 0);
		load[i] = rq->old_busy_time = rq->prev_runnable_sum;
		nload[i] = rq->nt_prev_runnable_sum;
		pload[i] = rq->hmp_stats.pred_demands_sum;
		rq->old_estimated_time = pload[i];
		/*
		* Scale load in reference to cluster max_possible_freq.
		*
		@@ -2781,6 +3057,7 @@ void sched_get_cpus_busy(struct sched_load *busy,
		*/
		load[i] = scale_load_to_cpu(load[i], cpu);
		nload[i] = scale_load_to_cpu(nload[i], cpu);
		pload[i] = scale_load_to_cpu(pload[i], cpu);

		notifier_sent[i] = rq->notifier_sent;
		early_detection[i] = (rq->ed_task != NULL);
		@@ -2825,13 +3102,18 @@ void sched_get_cpus_busy(struct sched_load *busy,
		nload[i] = scale_load_to_freq(nload[i], max_freq[i],
		cpu_max_possible_freq(cpu));
		}
		pload[i] = scale_load_to_freq(pload[i], max_freq[i],
		rq->cluster->max_possible_freq);

		busy[i].prev_load = div64_u64(load[i], NSEC_PER_USEC);
		busy[i].new_task_load = div64_u64(nload[i], NSEC_PER_USEC);
		busy[i].predicted_load = div64_u64(pload[i], NSEC_PER_USEC);

		exit_early:
		trace_sched_get_busy(cpu, busy[i].prev_load,
		busy[i].new_task_load, early_detection[i]);
		busy[i].new_task_load,
		busy[i].predicted_load,
		early_detection[i]);
		i++;
		}
		}
		@@ -4190,10 +4472,12 @@ out:

		if (freq_notif_allowed) {
		if (!same_freq_domain(src_cpu, cpu)) {
		check_for_freq_change(cpu_rq(cpu));
		check_for_freq_change(cpu_rq(src_cpu));
		check_for_freq_change(cpu_rq(cpu), false);
		check_for_freq_change(cpu_rq(src_cpu), false);
		} else if (heavy_task) {
		check_for_freq_change(cpu_rq(cpu));
		check_for_freq_change(cpu_rq(cpu), false);
		} else if (success) {
		check_for_freq_change(cpu_rq(cpu), true);
		}
		}

		@@ -7383,8 +7667,10 @@ fail:
		raw_spin_unlock(&rq->lock);
		raw_spin_unlock(&p->pi_lock);
		if (moved && !same_freq_domain(src_cpu, dest_cpu)) {
		check_for_freq_change(cpu_rq(src_cpu));
		check_for_freq_change(cpu_rq(dest_cpu));
		check_for_freq_change(cpu_rq(src_cpu), false);
		check_for_freq_change(cpu_rq(dest_cpu), false);
		} else if (moved) {
		check_for_freq_change(cpu_rq(dest_cpu), true);
		}
		if (moved && task_notify_on_migrate(p)) {
		struct migration_notify_data mnd;
		@@ -9766,7 +10052,9 @@ void __init sched_init(void)
		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
		rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
		rq->old_busy_time = 0;
		rq->old_estimated_time = 0;
		rq->notifier_sent = 0;
		rq->hmp_stats.pred_demands_sum = 0;
		#endif
		#endif
		rq->max_idle_balance_cost = sysctl_sched_migration_cost;

kernel/sched/deadline.c

+4 −2

Original line number	Diff line number	Diff line
		@@ -749,11 +749,13 @@ fixup_hmp_sched_stats_dl(struct rq rq, struct task_struct p,
		#else
		static void
		fixup_hmp_sched_stats_dl(struct rq rq, struct task_struct p,
		u32 new_task_load)
		u32 new_task_load, u32 new_pred_demand)
		{
		s64 task_load_delta = (s64)new_task_load - task_load(p);
		s64 pred_demand_delta = PRED_DEMAND_DELTA;

		fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta);
		fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta,
		pred_demand_delta);
		}
		#endif