Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1302ef8b authored by Linux Build Service Account's avatar Linux Build Service Account Committed by Gerrit - the friendly Code Review server
Browse files

Merge "sched: Add separate load tracking histogram to predict loads"

parents e6112e1c 6e35cb2f
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -1137,6 +1137,7 @@ struct sched_statistics {
#endif

#define RAVG_HIST_SIZE_MAX  5
#define NUM_BUSY_BUCKETS 10

/* ravg represents frequency scaled cpu-demand of tasks */
struct ravg {
@@ -1161,6 +1162,11 @@ struct ravg {
	 *
	 * 'prev_window' represents task's contribution to cpu busy time
	 * statistics (rq->prev_runnable_sum) in previous window
	 *
	 * 'pred_demand' represents task's current predicted cpu busy time
	 *
	 * 'busy_buckets' groups historical busy time into different buckets
	 * used for prediction
	 */
	u64 mark_start;
	u32 sum, demand;
@@ -1168,6 +1174,8 @@ struct ravg {
#ifdef CONFIG_SCHED_FREQ_INPUT
	u32 curr_window, prev_window;
	u16 active_windows;
	u32 pred_demand;
	u8 busy_buckets[NUM_BUSY_BUCKETS];
#endif
};

@@ -1972,6 +1980,7 @@ extern int task_free_unregister(struct notifier_block *n);
struct sched_load {
	unsigned long prev_load;
	unsigned long new_task_load;
	unsigned long predicted_load;
};

#if defined(CONFIG_SCHED_FREQ_INPUT)
+1 −0
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@ extern unsigned int sysctl_sched_enable_colocation;
extern unsigned int sysctl_sched_restrict_cluster_spill;
#if defined(CONFIG_SCHED_FREQ_INPUT)
extern unsigned int sysctl_sched_new_task_windows;
extern unsigned int sysctl_sched_pred_alert_freq;
#endif
#endif

+92 −14
Original line number Diff line number Diff line
@@ -77,6 +77,9 @@ TRACE_EVENT(sched_enq_deq_task,
		__field(unsigned int,	cpus_allowed		)
#ifdef CONFIG_SCHED_HMP
		__field(unsigned int,	demand			)
#ifdef CONFIG_SCHED_FREQ_INPUT
		__field(unsigned int,	pred_demand		)
#endif
#endif
	),

@@ -92,12 +95,18 @@ TRACE_EVENT(sched_enq_deq_task,
		__entry->cpus_allowed	= cpus_allowed;
#ifdef CONFIG_SCHED_HMP
		__entry->demand		= p->ravg.demand;
#ifdef CONFIG_SCHED_FREQ_INPUT
		__entry->pred_demand	= p->ravg.pred_demand;
#endif
#endif
	),

	TP_printk("cpu=%d %s comm=%s pid=%d prio=%d nr_running=%u cpu_load=%lu rt_nr_running=%u affine=%x"
#ifdef CONFIG_SCHED_HMP
		 " demand=%u"
#ifdef CONFIG_SCHED_FREQ_INPUT
		 " pred_demand=%u"
#endif
#endif
			, __entry->cpu,
			__entry->enqueue ? "enqueue" : "dequeue",
@@ -106,6 +115,9 @@ TRACE_EVENT(sched_enq_deq_task,
			__entry->cpu_load, __entry->rt_nr_running, __entry->cpus_allowed
#ifdef CONFIG_SCHED_HMP
			, __entry->demand
#ifdef CONFIG_SCHED_FREQ_INPUT
			, __entry->pred_demand
#endif
#endif
			)
);
@@ -282,6 +294,7 @@ TRACE_EVENT(sched_update_task_ravg,
		__field(unsigned int,	sum			)
		__field(	 int,	cpu			)
#ifdef CONFIG_SCHED_FREQ_INPUT
		__field(unsigned int,	pred_demand		)
		__field(	u64,	cs			)
		__field(	u64,	ps			)
		__field(	u32,	curr_window		)
@@ -308,6 +321,7 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->sum            = p->ravg.sum;
		__entry->irqtime        = irqtime;
#ifdef CONFIG_SCHED_FREQ_INPUT
		__entry->pred_demand     = p->ravg.pred_demand;
		__entry->cs             = rq->curr_runnable_sum;
		__entry->ps             = rq->prev_runnable_sum;
		__entry->curr_window	= p->ravg.curr_window;
@@ -320,7 +334,7 @@ TRACE_EVENT(sched_update_task_ravg,

	TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
#ifdef CONFIG_SCHED_FREQ_INPUT
		" cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
		" pred_demand %u cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
#endif
		, __entry->wallclock, __entry->win_start, __entry->delta,
		task_event_names[__entry->evt], __entry->cpu,
@@ -329,8 +343,8 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->delta_m, __entry->demand,
		__entry->sum, __entry->irqtime
#ifdef CONFIG_SCHED_FREQ_INPUT
		, __entry->cs, __entry->ps, __entry->curr_window,
		  __entry->prev_window,
		, __entry->pred_demand, __entry->cs, __entry->ps,
		__entry->curr_window, __entry->prev_window,
		  __entry->nt_cs, __entry->nt_ps,
		  __entry->active_windows
#endif
@@ -351,6 +365,9 @@ TRACE_EVENT(sched_update_history,
		__field(	 int,	samples			)
		__field(enum task_event,	evt		)
		__field(unsigned int,	demand			)
#ifdef CONFIG_SCHED_FREQ_INPUT
		__field(unsigned int,	pred_demand		)
#endif
		__array(	 u32,	hist, RAVG_HIST_SIZE_MAX)
		__field(unsigned int,	nr_big_tasks		)
		__field(	 int,	cpu			)
@@ -363,18 +380,29 @@ TRACE_EVENT(sched_update_history,
		__entry->samples        = samples;
		__entry->evt            = evt;
		__entry->demand         = p->ravg.demand;
#ifdef CONFIG_SCHED_FREQ_INPUT
		__entry->pred_demand     = p->ravg.pred_demand;
#endif
		memcpy(__entry->hist, p->ravg.sum_history,
					RAVG_HIST_SIZE_MAX * sizeof(u32));
		__entry->nr_big_tasks   = rq->hmp_stats.nr_big_tasks;
		__entry->cpu            = rq->cpu;
	),

	TP_printk("%d (%s): runtime %u samples %d event %s demand %u (hist: %u %u %u %u %u) cpu %d nr_big %u",
	TP_printk("%d (%s): runtime %u samples %d event %s demand %u"
#ifdef CONFIG_SCHED_FREQ_INPUT
		" pred_demand %u"
#endif
		" (hist: %u %u %u %u %u) cpu %d nr_big %u",
		__entry->pid, __entry->comm,
		__entry->runtime, __entry->samples,
		task_event_names[__entry->evt],
		__entry->demand, __entry->hist[0],
		__entry->hist[1], __entry->hist[2], __entry->hist[3],
		__entry->demand,
#ifdef CONFIG_SCHED_FREQ_INPUT
		__entry->pred_demand,
#endif
		__entry->hist[0], __entry->hist[1],
		__entry->hist[2], __entry->hist[3],
		__entry->hist[4], __entry->cpu, __entry->nr_big_tasks)
);

@@ -413,6 +441,43 @@ TRACE_EVENT(sched_reset_all_window_stats,

#ifdef CONFIG_SCHED_FREQ_INPUT

TRACE_EVENT(sched_update_pred_demand,

	TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int pct,
		 unsigned int pred_demand),

	TP_ARGS(rq, p, runtime, pct, pred_demand),

	TP_STRUCT__entry(
		__array(	char,	comm,   TASK_COMM_LEN	)
		__field(       pid_t,	pid			)
		__field(unsigned int,	runtime			)
		__field(	 int,	pct			)
		__field(unsigned int,	pred_demand		)
		__array(	  u8,	bucket, NUM_BUSY_BUCKETS)
		__field(	 int,	cpu			)
	),

	TP_fast_assign(
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
		__entry->pid            = p->pid;
		__entry->runtime        = runtime;
		__entry->pct            = pct;
		__entry->pred_demand     = pred_demand;
		memcpy(__entry->bucket, p->ravg.busy_buckets,
					NUM_BUSY_BUCKETS * sizeof(u8));
		__entry->cpu            = rq->cpu;
	),

	TP_printk("%d (%s): runtime %u pct %d cpu %d pred_demand %u (buckets: %u %u %u %u %u %u %u %u %u %u)",
		__entry->pid, __entry->comm,
		__entry->runtime, __entry->pct, __entry->cpu,
		__entry->pred_demand, __entry->bucket[0], __entry->bucket[1],
		__entry->bucket[2], __entry->bucket[3],__entry->bucket[4],
		__entry->bucket[5], __entry->bucket[6], __entry->bucket[7],
		__entry->bucket[8], __entry->bucket[9])
);

TRACE_EVENT(sched_migration_update_sum,

	TP_PROTO(struct rq *rq, struct task_struct *p),
@@ -444,14 +509,15 @@ TRACE_EVENT(sched_migration_update_sum,

TRACE_EVENT(sched_get_busy,

	TP_PROTO(int cpu, u64 load, u64 nload, int early),
	TP_PROTO(int cpu, u64 load, u64 nload, u64 pload, int early),

	TP_ARGS(cpu, load, nload, early),
	TP_ARGS(cpu, load, nload, pload, early),

	TP_STRUCT__entry(
		__field(	int,	cpu			)
		__field(	u64,	load			)
		__field(	u64,	nload			)
		__field(	u64,	pload			)
		__field(	int,	early			)
	),

@@ -459,33 +525,45 @@ TRACE_EVENT(sched_get_busy,
		__entry->cpu		= cpu;
		__entry->load		= load;
		__entry->nload		= nload;
		__entry->pload		= pload;
		__entry->early		= early;
	),

	TP_printk("cpu %d load %lld new_task_load %lld early %d",
		__entry->cpu, __entry->load, __entry->nload, __entry->early)
	TP_printk("cpu %d load %lld new_task_load %lld predicted_load %lld early %d",
		__entry->cpu, __entry->load, __entry->nload,
		__entry->pload, __entry->early)
);

TRACE_EVENT(sched_freq_alert,

	TP_PROTO(int cpu, u64 old_load, u64 new_load),
	TP_PROTO(int cpu, int pd_notif, u64 old_load, u64 new_load,
		u64 old_pred, u64 new_pred),

	TP_ARGS(cpu, old_load, new_load),
	TP_ARGS(cpu, pd_notif, old_load, new_load, old_pred, new_pred),

	TP_STRUCT__entry(
		__field(	int,	cpu			)
		__field(	int,	pd_notif		)
		__field(	u64,	old_load		)
		__field(	u64,	new_load		)
		__field(	u64,	old_pred		)
		__field(	u64,	new_pred		)
	),

	TP_fast_assign(
		__entry->cpu		= cpu;
		__entry->pd_notif	= pd_notif;
		__entry->old_load	= old_load;
		__entry->new_load	= new_load;
		__entry->old_pred	= old_pred;
		__entry->new_pred	= new_pred;
	),

	TP_printk("cpu %d old_load=%llu new_load=%llu",
		__entry->cpu, __entry->old_load, __entry->new_load)
	TP_printk("cpu %d pd_notif=%d old_load=%llu new_load=%llu "
		"old_pred=%llu new_pred=%llu",
		__entry->cpu, __entry->pd_notif, __entry->old_load,
		__entry->new_load, __entry->old_pred,
		 __entry->new_pred)
);

#endif	/* CONFIG_SCHED_FREQ_INPUT */
+313 −25
Original line number Diff line number Diff line
@@ -1656,8 +1656,6 @@ static __read_mostly unsigned int sched_window_stats_policy =
__read_mostly unsigned int sysctl_sched_window_stats_policy =
	WINDOW_STATS_MAX_RECENT_AVG;

__read_mostly unsigned int sysctl_sched_new_task_windows = 5;

static __read_mostly unsigned int sched_account_wait_time = 1;
__read_mostly unsigned int sysctl_sched_account_wait_time = 1;

@@ -1667,6 +1665,8 @@ unsigned int __read_mostly sysctl_sched_enable_colocation = 1;

#ifdef CONFIG_SCHED_FREQ_INPUT

__read_mostly unsigned int sysctl_sched_new_task_windows = 5;

static __read_mostly unsigned int sched_migration_fixup = 1;
__read_mostly unsigned int sysctl_sched_migration_fixup = 1;

@@ -1686,6 +1686,9 @@ __read_mostly int sysctl_sched_freq_inc_notify = 10 * 1024 * 1024; /* + 10GHz */
__read_mostly int sysctl_sched_freq_dec_notify = 10 * 1024 * 1024; /* - 10GHz */

static __read_mostly unsigned int sched_io_is_busy;

__read_mostly unsigned int sysctl_sched_pred_alert_freq = 10 * 1024 * 1024;

#endif	/* CONFIG_SCHED_FREQ_INPUT */

/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
@@ -1723,6 +1726,15 @@ __read_mostly unsigned int sched_ravg_window = 10000000;
/* Temporarily disable window-stats activity on all cpus */
unsigned int __read_mostly sched_disable_window_stats;

/*
 * Major task runtime. If a task runs for more than sched_major_task_runtime
 * in a window, it's considered to be generating majority of workload
 * for this window. Prediction could be adjusted for such tasks.
 */
#ifdef CONFIG_SCHED_FREQ_INPUT
__read_mostly unsigned int sched_major_task_runtime = 10000000;
#endif

static unsigned int sync_cpu;

#define EXITING_TASK_MARKER	0xdeaddead
@@ -1820,7 +1832,7 @@ static inline unsigned int load_to_freq(struct rq *rq, u64 load)
}

/* Should scheduler alert governor for changing frequency? */
static int send_notification(struct rq *rq)
static int send_notification(struct rq *rq, int check_pred)
{
	unsigned int cur_freq, freq_required;
	unsigned long flags;
@@ -1829,11 +1841,29 @@ static int send_notification(struct rq *rq)
	if (!sched_enable_hmp)
		return 0;

	if (check_pred) {
		u64 prev = rq->old_busy_time;
		u64 predicted = rq->hmp_stats.pred_demands_sum;

		if (rq->cluster->cur_freq == rq->cluster->max_freq)
			return 0;

		prev = max(prev, rq->old_estimated_time);
		if (prev > predicted)
			return 0;

		cur_freq = load_to_freq(rq, prev);
		freq_required = load_to_freq(rq, predicted);

		if (freq_required < cur_freq + sysctl_sched_pred_alert_freq)
			return 0;
	} else {
		cur_freq = load_to_freq(rq, rq->old_busy_time);
		freq_required = load_to_freq(rq, rq->prev_runnable_sum);

		if (nearly_same_freq(cur_freq, freq_required))
			return 0;
	}

	raw_spin_lock_irqsave(&rq->lock, flags);
	if (!rq->notifier_sent) {
@@ -1846,14 +1876,16 @@ static int send_notification(struct rq *rq)
}

/* Alert governor if there is a need to change frequency */
void check_for_freq_change(struct rq *rq)
void check_for_freq_change(struct rq *rq, bool check_pred)
{
	int cpu = cpu_of(rq);

	if (!send_notification(rq))
	if (!send_notification(rq, check_pred))
		return;

	trace_sched_freq_alert(cpu, rq->old_busy_time, rq->prev_runnable_sum);
	trace_sched_freq_alert(cpu, check_pred, rq->old_busy_time,
			rq->prev_runnable_sum, rq->old_estimated_time,
			rq->hmp_stats.pred_demands_sum);

	atomic_notifier_call_chain(
		&load_alert_notifier_head, 0,
@@ -1904,6 +1936,223 @@ static inline bool is_new_task(struct task_struct *p)
	return p->ravg.active_windows < sysctl_sched_new_task_windows;
}

#define INC_STEP 8
#define DEC_STEP 2
#define CONSISTENT_THRES 16
#define INC_STEP_BIG 16
/*
 * bucket_increase - update the count of all buckets
 *
 * @buckets: array of buckets tracking busy time of a task
 * @idx: the index of bucket to be incremented
 *
 * Each time a complete window finishes, count of bucket that runtime
 * falls in (@idx) is incremented. Counts of all other buckets are
 * decayed. The rate of increase and decay could be different based
 * on current count in the bucket.
 */
static inline void bucket_increase(u8 *buckets, int idx)
{
	int i, step;

	for (i = 0; i < NUM_BUSY_BUCKETS; i++) {
		if (idx != i) {
			if (buckets[i] > DEC_STEP)
				buckets[i] -= DEC_STEP;
			else
				buckets[i] = 0;
		} else {
			step = buckets[i] >= CONSISTENT_THRES ?
						INC_STEP_BIG : INC_STEP;
			if (buckets[i] > U8_MAX - step)
				buckets[i] = U8_MAX;
			else
				buckets[i] += step;
		}
	}
}

static inline int busy_to_bucket(u32 normalized_rt)
{
	int bidx;

	bidx = mult_frac(normalized_rt, NUM_BUSY_BUCKETS, max_task_load());
	bidx = min(bidx, NUM_BUSY_BUCKETS - 1);

	/*
	 * Combine lowest two buckets. The lowest frequency falls into
	 * 2nd bucket and thus keep predicting lowest bucket is not
	 * useful.
	 */
	if (!bidx)
		bidx++;

	return bidx;
}

static inline u64
scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
{
	return div64_u64(load * (u64)src_freq, (u64)dst_freq);
}

#define HEAVY_TASK_SKIP 2
#define HEAVY_TASK_SKIP_LIMIT 4
/*
 * get_pred_busy - calculate predicted demand for a task on runqueue
 *
 * @rq: runqueue of task p
 * @p: task whose prediction is being updated
 * @start: starting bucket. returned prediction should not be lower than
 *         this bucket.
 * @runtime: runtime of the task. returned prediction should not be lower
 *           than this runtime.
 * Note: @start can be derived from @runtime. It's passed in only to
 * avoid duplicated calculation in some cases.
 *
 * A new predicted busy time is returned for task @p based on @runtime
 * passed in. The function searches through buckets that represent busy
 * time equal to or bigger than @runtime and attempts to find the bucket to
 * to use for prediction. Once found, it searches through historical busy
 * time and returns the latest that falls into the bucket. If no such busy
 * time exists, it returns the medium of that bucket.
 */
static u32 get_pred_busy(struct rq *rq, struct task_struct *p,
				int start, u32 runtime)
{
	int i;
	u8 *buckets = p->ravg.busy_buckets;
	u32 *hist = p->ravg.sum_history;
	u32 dmin, dmax;
	u64 cur_freq_runtime = 0;
	int first = NUM_BUSY_BUCKETS, final, skip_to;
	u32 ret = runtime;

	/* skip prediction for new tasks due to lack of history */
	if (unlikely(is_new_task(p)))
		goto out;

	/* find minimal bucket index to pick */
	for (i = start; i < NUM_BUSY_BUCKETS; i++) {
		if (buckets[i]) {
			first = i;
			break;
		}
	}
	/* if no higher buckets are filled, predict runtime */
	if (first >= NUM_BUSY_BUCKETS)
		goto out;

	/* compute the bucket for prediction */
	final = first;
	if (first < HEAVY_TASK_SKIP_LIMIT) {
		/* compute runtime at current CPU frequency */
		cur_freq_runtime = mult_frac(runtime, max_possible_efficiency,
					     rq->cluster->efficiency);
		cur_freq_runtime = scale_load_to_freq(cur_freq_runtime,
				max_possible_freq, rq->cluster->cur_freq);
		/*
		 * if the task runs for majority of the window, try to
		 * pick higher buckets.
		 */
		if (cur_freq_runtime >= sched_major_task_runtime) {
			int next = NUM_BUSY_BUCKETS;
			/*
			 * if there is a higher bucket that's consistently
			 * hit, don't jump beyond that.
			 */
			for (i = start + 1; i <= HEAVY_TASK_SKIP_LIMIT &&
			     i < NUM_BUSY_BUCKETS; i++) {
				if (buckets[i] > CONSISTENT_THRES) {
					next = i;
					break;
				}
			}
			skip_to = min(next, start + HEAVY_TASK_SKIP);
			/* don't jump beyond HEAVY_TASK_SKIP_LIMIT */
			skip_to = min(HEAVY_TASK_SKIP_LIMIT, skip_to);
			/* don't go below first non-empty bucket, if any */
			final = max(first, skip_to);
		}
	}

	/* determine demand range for the predicted bucket */
	if (final < 2) {
		/* lowest two buckets are combined */
		dmin = 0;
		final = 1;
	} else {
		dmin = mult_frac(final, max_task_load(), NUM_BUSY_BUCKETS);
	}
	dmax = mult_frac(final + 1, max_task_load(), NUM_BUSY_BUCKETS);

	/*
	 * search through runtime history and return first runtime that falls
	 * into the range of predicted bucket.
	 */
	for (i = 0; i < sched_ravg_hist_size; i++) {
		if (hist[i] >= dmin && hist[i] < dmax) {
			ret = hist[i];
			break;
		}
	}
	/* no historical runtime within bucket found, use average of the bin */
	if (ret < dmin)
		ret = (dmin + dmax) / 2;
	/*
	 * when updating in middle of a window, runtime could be higher
	 * than all recorded history. Always predict at least runtime.
	 */
	ret = max(runtime, ret);
out:
	trace_sched_update_pred_demand(rq, p, runtime,
		mult_frac((unsigned int)cur_freq_runtime, 100,
			  sched_ravg_window), ret);
	return ret;
}

static inline u32 calc_pred_demand(struct rq *rq, struct task_struct *p)
{
	if (p->ravg.pred_demand >= p->ravg.curr_window)
		return p->ravg.pred_demand;

	return get_pred_busy(rq, p, busy_to_bucket(p->ravg.curr_window),
			     p->ravg.curr_window);
}

/*
 * predictive demand of a task is calculated at the window roll-over.
 * if the task current window busy time exceeds the predicted
 * demand, update it here to reflect the task needs.
 */
void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event)
{
	u32 new, old;

	if (is_idle_task(p) || exiting_task(p))
		return;

	if (event != PUT_PREV_TASK && event != TASK_UPDATE &&
			(!sched_freq_account_wait_time ||
			 (event != TASK_MIGRATE &&
			 event != PICK_NEXT_TASK)))
		return;

	new = calc_pred_demand(rq, p);
	old = p->ravg.pred_demand;

	if (old >= new)
		return;

	if (task_on_rq_queued(p) && (!task_has_dl_policy(p) ||
				!p->dl.dl_throttled))
		p->sched_class->fixup_hmp_sched_stats(rq, p,
				p->ravg.demand,
				new);

	p->ravg.pred_demand = new;
}

/*
 * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
 */
@@ -2239,13 +2488,40 @@ fail:
	spin_unlock_irqrestore(&freq_max_load_lock, flags);
	return ret;
}

static inline u32 predict_and_update_buckets(struct rq *rq,
			struct task_struct *p, u32 runtime) {

	int bidx;
	u32 pred_demand;

	bidx = busy_to_bucket(runtime);
	pred_demand = get_pred_busy(rq, p, bidx, runtime);
	bucket_increase(p->ravg.busy_buckets, bidx);

	return pred_demand;
}
#define assign_ravg_pred_demand(x) (p->ravg.pred_demand = x)

#else	/* CONFIG_SCHED_FREQ_INPUT */

static inline void
update_task_pred_demand(struct rq *rq, struct task_struct *p, int event)
{
}

static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
	     int event, u64 wallclock, u64 irqtime)
{
}

static inline u32 predict_and_update_buckets(struct rq *rq,
			struct task_struct *p, u32 runtime)
{
	return 0;
}
#define assign_ravg_pred_demand(x)

#endif	/* CONFIG_SCHED_FREQ_INPUT */

static int account_busy_for_task_demand(struct task_struct *p, int event)
@@ -2277,7 +2553,7 @@ static void update_history(struct rq *rq, struct task_struct *p,
{
	u32 *hist = &p->ravg.sum_history[0];
	int ridx, widx;
	u32 max = 0, avg, demand;
	u32 max = 0, avg, demand, pred_demand;
	u64 sum = 0;

	/* Ignore windows where task had no activity */
@@ -2314,6 +2590,7 @@ static void update_history(struct rq *rq, struct task_struct *p,
		else
			demand = max(avg, runtime);
	}
	pred_demand = predict_and_update_buckets(rq, p, runtime);

	/*
	 * A throttled deadline sched class task gets dequeued without
@@ -2322,9 +2599,11 @@ static void update_history(struct rq *rq, struct task_struct *p,
	 */
	if (task_on_rq_queued(p) && (!task_has_dl_policy(p) ||
						!p->dl.dl_throttled))
		p->sched_class->fixup_hmp_sched_stats(rq, p, demand);
		p->sched_class->fixup_hmp_sched_stats(rq, p, demand,
						      pred_demand);

	p->ravg.demand = demand;
	assign_ravg_pred_demand(pred_demand);

done:
	trace_sched_update_history(rq, p, runtime, samples, event);
@@ -2457,7 +2736,7 @@ static void update_task_ravg(struct task_struct *p, struct rq *rq,

	update_task_demand(p, rq, event, wallclock);
	update_cpu_busy_time(p, rq, event, wallclock, irqtime);

	update_task_pred_demand(rq, p, event);
done:
	trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime);

@@ -2733,12 +3012,6 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)

#ifdef CONFIG_SCHED_FREQ_INPUT

static inline u64
scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
{
	return div64_u64(load * (u64)src_freq, (u64)dst_freq);
}

void sched_get_cpus_busy(struct sched_load *busy,
			 const struct cpumask *query_cpus)
{
@@ -2746,6 +3019,7 @@ void sched_get_cpus_busy(struct sched_load *busy,
	struct rq *rq;
	const int cpus = cpumask_weight(query_cpus);
	u64 load[cpus], nload[cpus];
	u64 pload[cpus];
	unsigned int cur_freq[cpus], max_freq[cpus];
	int notifier_sent[cpus];
	int early_detection[cpus];
@@ -2773,6 +3047,8 @@ void sched_get_cpus_busy(struct sched_load *busy,
				 sched_ktime_clock(), 0);
		load[i] = rq->old_busy_time = rq->prev_runnable_sum;
		nload[i] = rq->nt_prev_runnable_sum;
		pload[i] = rq->hmp_stats.pred_demands_sum;
		rq->old_estimated_time = pload[i];
		/*
		 * Scale load in reference to cluster max_possible_freq.
		 *
@@ -2781,6 +3057,7 @@ void sched_get_cpus_busy(struct sched_load *busy,
		 */
		load[i] = scale_load_to_cpu(load[i], cpu);
		nload[i] = scale_load_to_cpu(nload[i], cpu);
		pload[i] = scale_load_to_cpu(pload[i], cpu);

		notifier_sent[i] = rq->notifier_sent;
		early_detection[i] = (rq->ed_task != NULL);
@@ -2825,13 +3102,18 @@ void sched_get_cpus_busy(struct sched_load *busy,
			nload[i] = scale_load_to_freq(nload[i], max_freq[i],
						    cpu_max_possible_freq(cpu));
		}
		pload[i] = scale_load_to_freq(pload[i], max_freq[i],
					     rq->cluster->max_possible_freq);

		busy[i].prev_load = div64_u64(load[i], NSEC_PER_USEC);
		busy[i].new_task_load = div64_u64(nload[i], NSEC_PER_USEC);
		busy[i].predicted_load = div64_u64(pload[i], NSEC_PER_USEC);

exit_early:
		trace_sched_get_busy(cpu, busy[i].prev_load,
				     busy[i].new_task_load, early_detection[i]);
				     busy[i].new_task_load,
				     busy[i].predicted_load,
				     early_detection[i]);
		i++;
	}
}
@@ -4190,10 +4472,12 @@ out:

	if (freq_notif_allowed) {
		if (!same_freq_domain(src_cpu, cpu)) {
			check_for_freq_change(cpu_rq(cpu));
			check_for_freq_change(cpu_rq(src_cpu));
			check_for_freq_change(cpu_rq(cpu), false);
			check_for_freq_change(cpu_rq(src_cpu), false);
		} else if (heavy_task) {
			check_for_freq_change(cpu_rq(cpu));
			check_for_freq_change(cpu_rq(cpu), false);
		} else if (success) {
			check_for_freq_change(cpu_rq(cpu), true);
		}
	}

@@ -7383,8 +7667,10 @@ fail:
	raw_spin_unlock(&rq->lock);
	raw_spin_unlock(&p->pi_lock);
	if (moved && !same_freq_domain(src_cpu, dest_cpu)) {
		check_for_freq_change(cpu_rq(src_cpu));
		check_for_freq_change(cpu_rq(dest_cpu));
		check_for_freq_change(cpu_rq(src_cpu), false);
		check_for_freq_change(cpu_rq(dest_cpu), false);
	} else if (moved) {
		check_for_freq_change(cpu_rq(dest_cpu), true);
	}
	if (moved && task_notify_on_migrate(p)) {
		struct migration_notify_data mnd;
@@ -9766,7 +10052,9 @@ void __init sched_init(void)
		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
		rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
		rq->old_busy_time = 0;
		rq->old_estimated_time = 0;
		rq->notifier_sent = 0;
		rq->hmp_stats.pred_demands_sum = 0;
#endif
#endif
		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
+4 −2
Original line number Diff line number Diff line
@@ -749,11 +749,13 @@ fixup_hmp_sched_stats_dl(struct rq *rq, struct task_struct *p,
#else
static void
fixup_hmp_sched_stats_dl(struct rq *rq, struct task_struct *p,
			 u32 new_task_load)
			 u32 new_task_load, u32 new_pred_demand)
{
	s64 task_load_delta = (s64)new_task_load - task_load(p);
	s64 pred_demand_delta = PRED_DEMAND_DELTA;

	fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta);
	fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta,
				      pred_demand_delta);
}
#endif

Loading