Loading include/linux/sched.h +9 −0 Original line number Diff line number Diff line Loading @@ -1137,6 +1137,7 @@ struct sched_statistics { #endif #define RAVG_HIST_SIZE_MAX 5 #define NUM_BUSY_BUCKETS 10 /* ravg represents frequency scaled cpu-demand of tasks */ struct ravg { Loading @@ -1161,6 +1162,11 @@ struct ravg { * * 'prev_window' represents task's contribution to cpu busy time * statistics (rq->prev_runnable_sum) in previous window * * 'pred_demand' represents task's current predicted cpu busy time * * 'busy_buckets' groups historical busy time into different buckets * used for prediction */ u64 mark_start; u32 sum, demand; Loading @@ -1168,6 +1174,8 @@ struct ravg { #ifdef CONFIG_SCHED_FREQ_INPUT u32 curr_window, prev_window; u16 active_windows; u32 pred_demand; u8 busy_buckets[NUM_BUSY_BUCKETS]; #endif }; Loading Loading @@ -1972,6 +1980,7 @@ extern int task_free_unregister(struct notifier_block *n); struct sched_load { unsigned long prev_load; unsigned long new_task_load; unsigned long predicted_load; }; #if defined(CONFIG_SCHED_FREQ_INPUT) Loading include/linux/sched/sysctl.h +1 −0 Original line number Diff line number Diff line Loading @@ -77,6 +77,7 @@ extern unsigned int sysctl_sched_enable_colocation; extern unsigned int sysctl_sched_restrict_cluster_spill; #if defined(CONFIG_SCHED_FREQ_INPUT) extern unsigned int sysctl_sched_new_task_windows; extern unsigned int sysctl_sched_pred_alert_freq; #endif #endif Loading include/trace/events/sched.h +92 −14 Original line number Diff line number Diff line Loading @@ -77,6 +77,9 @@ TRACE_EVENT(sched_enq_deq_task, __field(unsigned int, cpus_allowed ) #ifdef CONFIG_SCHED_HMP __field(unsigned int, demand ) #ifdef CONFIG_SCHED_FREQ_INPUT __field(unsigned int, pred_demand ) #endif #endif ), Loading @@ -92,12 +95,18 @@ TRACE_EVENT(sched_enq_deq_task, __entry->cpus_allowed = cpus_allowed; #ifdef CONFIG_SCHED_HMP __entry->demand = p->ravg.demand; #ifdef CONFIG_SCHED_FREQ_INPUT __entry->pred_demand = p->ravg.pred_demand; #endif #endif ), TP_printk("cpu=%d %s comm=%s pid=%d prio=%d nr_running=%u cpu_load=%lu rt_nr_running=%u affine=%x" #ifdef CONFIG_SCHED_HMP " demand=%u" #ifdef CONFIG_SCHED_FREQ_INPUT " pred_demand=%u" #endif #endif , __entry->cpu, __entry->enqueue ? "enqueue" : "dequeue", Loading @@ -106,6 +115,9 @@ TRACE_EVENT(sched_enq_deq_task, __entry->cpu_load, __entry->rt_nr_running, __entry->cpus_allowed #ifdef CONFIG_SCHED_HMP , __entry->demand #ifdef CONFIG_SCHED_FREQ_INPUT , __entry->pred_demand #endif #endif ) ); Loading Loading @@ -282,6 +294,7 @@ TRACE_EVENT(sched_update_task_ravg, __field(unsigned int, sum ) __field( int, cpu ) #ifdef CONFIG_SCHED_FREQ_INPUT __field(unsigned int, pred_demand ) __field( u64, cs ) __field( u64, ps ) __field( u32, curr_window ) Loading @@ -308,6 +321,7 @@ TRACE_EVENT(sched_update_task_ravg, __entry->sum = p->ravg.sum; __entry->irqtime = irqtime; #ifdef CONFIG_SCHED_FREQ_INPUT __entry->pred_demand = p->ravg.pred_demand; __entry->cs = rq->curr_runnable_sum; __entry->ps = rq->prev_runnable_sum; __entry->curr_window = p->ravg.curr_window; Loading @@ -320,7 +334,7 @@ TRACE_EVENT(sched_update_task_ravg, TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu" #ifdef CONFIG_SCHED_FREQ_INPUT " cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u" " pred_demand %u cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u" #endif , __entry->wallclock, __entry->win_start, __entry->delta, task_event_names[__entry->evt], __entry->cpu, Loading @@ -329,8 +343,8 @@ TRACE_EVENT(sched_update_task_ravg, __entry->delta_m, __entry->demand, __entry->sum, __entry->irqtime #ifdef CONFIG_SCHED_FREQ_INPUT , __entry->cs, __entry->ps, __entry->curr_window, __entry->prev_window, , __entry->pred_demand, __entry->cs, __entry->ps, __entry->curr_window, __entry->prev_window, __entry->nt_cs, __entry->nt_ps, __entry->active_windows #endif Loading @@ -351,6 +365,9 @@ TRACE_EVENT(sched_update_history, __field( int, samples ) __field(enum task_event, evt ) __field(unsigned int, demand ) #ifdef CONFIG_SCHED_FREQ_INPUT __field(unsigned int, pred_demand ) #endif __array( u32, hist, RAVG_HIST_SIZE_MAX) __field(unsigned int, nr_big_tasks ) __field( int, cpu ) Loading @@ -363,18 +380,29 @@ TRACE_EVENT(sched_update_history, __entry->samples = samples; __entry->evt = evt; __entry->demand = p->ravg.demand; #ifdef CONFIG_SCHED_FREQ_INPUT __entry->pred_demand = p->ravg.pred_demand; #endif memcpy(__entry->hist, p->ravg.sum_history, RAVG_HIST_SIZE_MAX * sizeof(u32)); __entry->nr_big_tasks = rq->hmp_stats.nr_big_tasks; __entry->cpu = rq->cpu; ), TP_printk("%d (%s): runtime %u samples %d event %s demand %u (hist: %u %u %u %u %u) cpu %d nr_big %u", TP_printk("%d (%s): runtime %u samples %d event %s demand %u" #ifdef CONFIG_SCHED_FREQ_INPUT " pred_demand %u" #endif " (hist: %u %u %u %u %u) cpu %d nr_big %u", __entry->pid, __entry->comm, __entry->runtime, __entry->samples, task_event_names[__entry->evt], __entry->demand, __entry->hist[0], __entry->hist[1], __entry->hist[2], __entry->hist[3], __entry->demand, #ifdef CONFIG_SCHED_FREQ_INPUT __entry->pred_demand, #endif __entry->hist[0], __entry->hist[1], __entry->hist[2], __entry->hist[3], __entry->hist[4], __entry->cpu, __entry->nr_big_tasks) ); Loading Loading @@ -413,6 +441,43 @@ TRACE_EVENT(sched_reset_all_window_stats, #ifdef CONFIG_SCHED_FREQ_INPUT TRACE_EVENT(sched_update_pred_demand, TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int pct, unsigned int pred_demand), TP_ARGS(rq, p, runtime, pct, pred_demand), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field(unsigned int, runtime ) __field( int, pct ) __field(unsigned int, pred_demand ) __array( u8, bucket, NUM_BUSY_BUCKETS) __field( int, cpu ) ), TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->runtime = runtime; __entry->pct = pct; __entry->pred_demand = pred_demand; memcpy(__entry->bucket, p->ravg.busy_buckets, NUM_BUSY_BUCKETS * sizeof(u8)); __entry->cpu = rq->cpu; ), TP_printk("%d (%s): runtime %u pct %d cpu %d pred_demand %u (buckets: %u %u %u %u %u %u %u %u %u %u)", __entry->pid, __entry->comm, __entry->runtime, __entry->pct, __entry->cpu, __entry->pred_demand, __entry->bucket[0], __entry->bucket[1], __entry->bucket[2], __entry->bucket[3],__entry->bucket[4], __entry->bucket[5], __entry->bucket[6], __entry->bucket[7], __entry->bucket[8], __entry->bucket[9]) ); TRACE_EVENT(sched_migration_update_sum, TP_PROTO(struct rq *rq, struct task_struct *p), Loading Loading @@ -444,14 +509,15 @@ TRACE_EVENT(sched_migration_update_sum, TRACE_EVENT(sched_get_busy, TP_PROTO(int cpu, u64 load, u64 nload, int early), TP_PROTO(int cpu, u64 load, u64 nload, u64 pload, int early), TP_ARGS(cpu, load, nload, early), TP_ARGS(cpu, load, nload, pload, early), TP_STRUCT__entry( __field( int, cpu ) __field( u64, load ) __field( u64, nload ) __field( u64, pload ) __field( int, early ) ), Loading @@ -459,33 +525,45 @@ TRACE_EVENT(sched_get_busy, __entry->cpu = cpu; __entry->load = load; __entry->nload = nload; __entry->pload = pload; __entry->early = early; ), TP_printk("cpu %d load %lld new_task_load %lld early %d", __entry->cpu, __entry->load, __entry->nload, __entry->early) TP_printk("cpu %d load %lld new_task_load %lld predicted_load %lld early %d", __entry->cpu, __entry->load, __entry->nload, __entry->pload, __entry->early) ); TRACE_EVENT(sched_freq_alert, TP_PROTO(int cpu, u64 old_load, u64 new_load), TP_PROTO(int cpu, int pd_notif, u64 old_load, u64 new_load, u64 old_pred, u64 new_pred), TP_ARGS(cpu, old_load, new_load), TP_ARGS(cpu, pd_notif, old_load, new_load, old_pred, new_pred), TP_STRUCT__entry( __field( int, cpu ) __field( int, pd_notif ) __field( u64, old_load ) __field( u64, new_load ) __field( u64, old_pred ) __field( u64, new_pred ) ), TP_fast_assign( __entry->cpu = cpu; __entry->pd_notif = pd_notif; __entry->old_load = old_load; __entry->new_load = new_load; __entry->old_pred = old_pred; __entry->new_pred = new_pred; ), TP_printk("cpu %d old_load=%llu new_load=%llu", __entry->cpu, __entry->old_load, __entry->new_load) TP_printk("cpu %d pd_notif=%d old_load=%llu new_load=%llu " "old_pred=%llu new_pred=%llu", __entry->cpu, __entry->pd_notif, __entry->old_load, __entry->new_load, __entry->old_pred, __entry->new_pred) ); #endif /* CONFIG_SCHED_FREQ_INPUT */ Loading kernel/sched/core.c +313 −25 Original line number Diff line number Diff line Loading @@ -1656,8 +1656,6 @@ static __read_mostly unsigned int sched_window_stats_policy = __read_mostly unsigned int sysctl_sched_window_stats_policy = WINDOW_STATS_MAX_RECENT_AVG; __read_mostly unsigned int sysctl_sched_new_task_windows = 5; static __read_mostly unsigned int sched_account_wait_time = 1; __read_mostly unsigned int sysctl_sched_account_wait_time = 1; Loading @@ -1667,6 +1665,8 @@ unsigned int __read_mostly sysctl_sched_enable_colocation = 1; #ifdef CONFIG_SCHED_FREQ_INPUT __read_mostly unsigned int sysctl_sched_new_task_windows = 5; static __read_mostly unsigned int sched_migration_fixup = 1; __read_mostly unsigned int sysctl_sched_migration_fixup = 1; Loading @@ -1686,6 +1686,9 @@ __read_mostly int sysctl_sched_freq_inc_notify = 10 * 1024 * 1024; /* + 10GHz */ __read_mostly int sysctl_sched_freq_dec_notify = 10 * 1024 * 1024; /* - 10GHz */ static __read_mostly unsigned int sched_io_is_busy; __read_mostly unsigned int sysctl_sched_pred_alert_freq = 10 * 1024 * 1024; #endif /* CONFIG_SCHED_FREQ_INPUT */ /* 1 -> use PELT based load stats, 0 -> use window-based load stats */ Loading Loading @@ -1723,6 +1726,15 @@ __read_mostly unsigned int sched_ravg_window = 10000000; /* Temporarily disable window-stats activity on all cpus */ unsigned int __read_mostly sched_disable_window_stats; /* * Major task runtime. If a task runs for more than sched_major_task_runtime * in a window, it's considered to be generating majority of workload * for this window. Prediction could be adjusted for such tasks. */ #ifdef CONFIG_SCHED_FREQ_INPUT __read_mostly unsigned int sched_major_task_runtime = 10000000; #endif static unsigned int sync_cpu; #define EXITING_TASK_MARKER 0xdeaddead Loading Loading @@ -1820,7 +1832,7 @@ static inline unsigned int load_to_freq(struct rq *rq, u64 load) } /* Should scheduler alert governor for changing frequency? */ static int send_notification(struct rq *rq) static int send_notification(struct rq *rq, int check_pred) { unsigned int cur_freq, freq_required; unsigned long flags; Loading @@ -1829,11 +1841,29 @@ static int send_notification(struct rq *rq) if (!sched_enable_hmp) return 0; if (check_pred) { u64 prev = rq->old_busy_time; u64 predicted = rq->hmp_stats.pred_demands_sum; if (rq->cluster->cur_freq == rq->cluster->max_freq) return 0; prev = max(prev, rq->old_estimated_time); if (prev > predicted) return 0; cur_freq = load_to_freq(rq, prev); freq_required = load_to_freq(rq, predicted); if (freq_required < cur_freq + sysctl_sched_pred_alert_freq) return 0; } else { cur_freq = load_to_freq(rq, rq->old_busy_time); freq_required = load_to_freq(rq, rq->prev_runnable_sum); if (nearly_same_freq(cur_freq, freq_required)) return 0; } raw_spin_lock_irqsave(&rq->lock, flags); if (!rq->notifier_sent) { Loading @@ -1846,14 +1876,16 @@ static int send_notification(struct rq *rq) } /* Alert governor if there is a need to change frequency */ void check_for_freq_change(struct rq *rq) void check_for_freq_change(struct rq *rq, bool check_pred) { int cpu = cpu_of(rq); if (!send_notification(rq)) if (!send_notification(rq, check_pred)) return; trace_sched_freq_alert(cpu, rq->old_busy_time, rq->prev_runnable_sum); trace_sched_freq_alert(cpu, check_pred, rq->old_busy_time, rq->prev_runnable_sum, rq->old_estimated_time, rq->hmp_stats.pred_demands_sum); atomic_notifier_call_chain( &load_alert_notifier_head, 0, Loading Loading @@ -1904,6 +1936,223 @@ static inline bool is_new_task(struct task_struct *p) return p->ravg.active_windows < sysctl_sched_new_task_windows; } #define INC_STEP 8 #define DEC_STEP 2 #define CONSISTENT_THRES 16 #define INC_STEP_BIG 16 /* * bucket_increase - update the count of all buckets * * @buckets: array of buckets tracking busy time of a task * @idx: the index of bucket to be incremented * * Each time a complete window finishes, count of bucket that runtime * falls in (@idx) is incremented. Counts of all other buckets are * decayed. The rate of increase and decay could be different based * on current count in the bucket. */ static inline void bucket_increase(u8 *buckets, int idx) { int i, step; for (i = 0; i < NUM_BUSY_BUCKETS; i++) { if (idx != i) { if (buckets[i] > DEC_STEP) buckets[i] -= DEC_STEP; else buckets[i] = 0; } else { step = buckets[i] >= CONSISTENT_THRES ? INC_STEP_BIG : INC_STEP; if (buckets[i] > U8_MAX - step) buckets[i] = U8_MAX; else buckets[i] += step; } } } static inline int busy_to_bucket(u32 normalized_rt) { int bidx; bidx = mult_frac(normalized_rt, NUM_BUSY_BUCKETS, max_task_load()); bidx = min(bidx, NUM_BUSY_BUCKETS - 1); /* * Combine lowest two buckets. The lowest frequency falls into * 2nd bucket and thus keep predicting lowest bucket is not * useful. */ if (!bidx) bidx++; return bidx; } static inline u64 scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq) { return div64_u64(load * (u64)src_freq, (u64)dst_freq); } #define HEAVY_TASK_SKIP 2 #define HEAVY_TASK_SKIP_LIMIT 4 /* * get_pred_busy - calculate predicted demand for a task on runqueue * * @rq: runqueue of task p * @p: task whose prediction is being updated * @start: starting bucket. returned prediction should not be lower than * this bucket. * @runtime: runtime of the task. returned prediction should not be lower * than this runtime. * Note: @start can be derived from @runtime. It's passed in only to * avoid duplicated calculation in some cases. * * A new predicted busy time is returned for task @p based on @runtime * passed in. The function searches through buckets that represent busy * time equal to or bigger than @runtime and attempts to find the bucket to * to use for prediction. Once found, it searches through historical busy * time and returns the latest that falls into the bucket. If no such busy * time exists, it returns the medium of that bucket. */ static u32 get_pred_busy(struct rq *rq, struct task_struct *p, int start, u32 runtime) { int i; u8 *buckets = p->ravg.busy_buckets; u32 *hist = p->ravg.sum_history; u32 dmin, dmax; u64 cur_freq_runtime = 0; int first = NUM_BUSY_BUCKETS, final, skip_to; u32 ret = runtime; /* skip prediction for new tasks due to lack of history */ if (unlikely(is_new_task(p))) goto out; /* find minimal bucket index to pick */ for (i = start; i < NUM_BUSY_BUCKETS; i++) { if (buckets[i]) { first = i; break; } } /* if no higher buckets are filled, predict runtime */ if (first >= NUM_BUSY_BUCKETS) goto out; /* compute the bucket for prediction */ final = first; if (first < HEAVY_TASK_SKIP_LIMIT) { /* compute runtime at current CPU frequency */ cur_freq_runtime = mult_frac(runtime, max_possible_efficiency, rq->cluster->efficiency); cur_freq_runtime = scale_load_to_freq(cur_freq_runtime, max_possible_freq, rq->cluster->cur_freq); /* * if the task runs for majority of the window, try to * pick higher buckets. */ if (cur_freq_runtime >= sched_major_task_runtime) { int next = NUM_BUSY_BUCKETS; /* * if there is a higher bucket that's consistently * hit, don't jump beyond that. */ for (i = start + 1; i <= HEAVY_TASK_SKIP_LIMIT && i < NUM_BUSY_BUCKETS; i++) { if (buckets[i] > CONSISTENT_THRES) { next = i; break; } } skip_to = min(next, start + HEAVY_TASK_SKIP); /* don't jump beyond HEAVY_TASK_SKIP_LIMIT */ skip_to = min(HEAVY_TASK_SKIP_LIMIT, skip_to); /* don't go below first non-empty bucket, if any */ final = max(first, skip_to); } } /* determine demand range for the predicted bucket */ if (final < 2) { /* lowest two buckets are combined */ dmin = 0; final = 1; } else { dmin = mult_frac(final, max_task_load(), NUM_BUSY_BUCKETS); } dmax = mult_frac(final + 1, max_task_load(), NUM_BUSY_BUCKETS); /* * search through runtime history and return first runtime that falls * into the range of predicted bucket. */ for (i = 0; i < sched_ravg_hist_size; i++) { if (hist[i] >= dmin && hist[i] < dmax) { ret = hist[i]; break; } } /* no historical runtime within bucket found, use average of the bin */ if (ret < dmin) ret = (dmin + dmax) / 2; /* * when updating in middle of a window, runtime could be higher * than all recorded history. Always predict at least runtime. */ ret = max(runtime, ret); out: trace_sched_update_pred_demand(rq, p, runtime, mult_frac((unsigned int)cur_freq_runtime, 100, sched_ravg_window), ret); return ret; } static inline u32 calc_pred_demand(struct rq *rq, struct task_struct *p) { if (p->ravg.pred_demand >= p->ravg.curr_window) return p->ravg.pred_demand; return get_pred_busy(rq, p, busy_to_bucket(p->ravg.curr_window), p->ravg.curr_window); } /* * predictive demand of a task is calculated at the window roll-over. * if the task current window busy time exceeds the predicted * demand, update it here to reflect the task needs. */ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) { u32 new, old; if (is_idle_task(p) || exiting_task(p)) return; if (event != PUT_PREV_TASK && event != TASK_UPDATE && (!sched_freq_account_wait_time || (event != TASK_MIGRATE && event != PICK_NEXT_TASK))) return; new = calc_pred_demand(rq, p); old = p->ravg.pred_demand; if (old >= new) return; if (task_on_rq_queued(p) && (!task_has_dl_policy(p) || !p->dl.dl_throttled)) p->sched_class->fixup_hmp_sched_stats(rq, p, p->ravg.demand, new); p->ravg.pred_demand = new; } /* * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum) */ Loading Loading @@ -2239,13 +2488,40 @@ fail: spin_unlock_irqrestore(&freq_max_load_lock, flags); return ret; } static inline u32 predict_and_update_buckets(struct rq *rq, struct task_struct *p, u32 runtime) { int bidx; u32 pred_demand; bidx = busy_to_bucket(runtime); pred_demand = get_pred_busy(rq, p, bidx, runtime); bucket_increase(p->ravg.busy_buckets, bidx); return pred_demand; } #define assign_ravg_pred_demand(x) (p->ravg.pred_demand = x) #else /* CONFIG_SCHED_FREQ_INPUT */ static inline void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) { } static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) { } static inline u32 predict_and_update_buckets(struct rq *rq, struct task_struct *p, u32 runtime) { return 0; } #define assign_ravg_pred_demand(x) #endif /* CONFIG_SCHED_FREQ_INPUT */ static int account_busy_for_task_demand(struct task_struct *p, int event) Loading Loading @@ -2277,7 +2553,7 @@ static void update_history(struct rq *rq, struct task_struct *p, { u32 *hist = &p->ravg.sum_history[0]; int ridx, widx; u32 max = 0, avg, demand; u32 max = 0, avg, demand, pred_demand; u64 sum = 0; /* Ignore windows where task had no activity */ Loading Loading @@ -2314,6 +2590,7 @@ static void update_history(struct rq *rq, struct task_struct *p, else demand = max(avg, runtime); } pred_demand = predict_and_update_buckets(rq, p, runtime); /* * A throttled deadline sched class task gets dequeued without Loading @@ -2322,9 +2599,11 @@ static void update_history(struct rq *rq, struct task_struct *p, */ if (task_on_rq_queued(p) && (!task_has_dl_policy(p) || !p->dl.dl_throttled)) p->sched_class->fixup_hmp_sched_stats(rq, p, demand); p->sched_class->fixup_hmp_sched_stats(rq, p, demand, pred_demand); p->ravg.demand = demand; assign_ravg_pred_demand(pred_demand); done: trace_sched_update_history(rq, p, runtime, samples, event); Loading Loading @@ -2457,7 +2736,7 @@ static void update_task_ravg(struct task_struct *p, struct rq *rq, update_task_demand(p, rq, event, wallclock); update_cpu_busy_time(p, rq, event, wallclock, irqtime); update_task_pred_demand(rq, p, event); done: trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime); Loading Loading @@ -2733,12 +3012,6 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) #ifdef CONFIG_SCHED_FREQ_INPUT static inline u64 scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq) { return div64_u64(load * (u64)src_freq, (u64)dst_freq); } void sched_get_cpus_busy(struct sched_load *busy, const struct cpumask *query_cpus) { Loading @@ -2746,6 +3019,7 @@ void sched_get_cpus_busy(struct sched_load *busy, struct rq *rq; const int cpus = cpumask_weight(query_cpus); u64 load[cpus], nload[cpus]; u64 pload[cpus]; unsigned int cur_freq[cpus], max_freq[cpus]; int notifier_sent[cpus]; int early_detection[cpus]; Loading Loading @@ -2773,6 +3047,8 @@ void sched_get_cpus_busy(struct sched_load *busy, sched_ktime_clock(), 0); load[i] = rq->old_busy_time = rq->prev_runnable_sum; nload[i] = rq->nt_prev_runnable_sum; pload[i] = rq->hmp_stats.pred_demands_sum; rq->old_estimated_time = pload[i]; /* * Scale load in reference to cluster max_possible_freq. * Loading @@ -2781,6 +3057,7 @@ void sched_get_cpus_busy(struct sched_load *busy, */ load[i] = scale_load_to_cpu(load[i], cpu); nload[i] = scale_load_to_cpu(nload[i], cpu); pload[i] = scale_load_to_cpu(pload[i], cpu); notifier_sent[i] = rq->notifier_sent; early_detection[i] = (rq->ed_task != NULL); Loading Loading @@ -2825,13 +3102,18 @@ void sched_get_cpus_busy(struct sched_load *busy, nload[i] = scale_load_to_freq(nload[i], max_freq[i], cpu_max_possible_freq(cpu)); } pload[i] = scale_load_to_freq(pload[i], max_freq[i], rq->cluster->max_possible_freq); busy[i].prev_load = div64_u64(load[i], NSEC_PER_USEC); busy[i].new_task_load = div64_u64(nload[i], NSEC_PER_USEC); busy[i].predicted_load = div64_u64(pload[i], NSEC_PER_USEC); exit_early: trace_sched_get_busy(cpu, busy[i].prev_load, busy[i].new_task_load, early_detection[i]); busy[i].new_task_load, busy[i].predicted_load, early_detection[i]); i++; } } Loading Loading @@ -4190,10 +4472,12 @@ out: if (freq_notif_allowed) { if (!same_freq_domain(src_cpu, cpu)) { check_for_freq_change(cpu_rq(cpu)); check_for_freq_change(cpu_rq(src_cpu)); check_for_freq_change(cpu_rq(cpu), false); check_for_freq_change(cpu_rq(src_cpu), false); } else if (heavy_task) { check_for_freq_change(cpu_rq(cpu)); check_for_freq_change(cpu_rq(cpu), false); } else if (success) { check_for_freq_change(cpu_rq(cpu), true); } } Loading Loading @@ -7383,8 +7667,10 @@ fail: raw_spin_unlock(&rq->lock); raw_spin_unlock(&p->pi_lock); if (moved && !same_freq_domain(src_cpu, dest_cpu)) { check_for_freq_change(cpu_rq(src_cpu)); check_for_freq_change(cpu_rq(dest_cpu)); check_for_freq_change(cpu_rq(src_cpu), false); check_for_freq_change(cpu_rq(dest_cpu), false); } else if (moved) { check_for_freq_change(cpu_rq(dest_cpu), true); } if (moved && task_notify_on_migrate(p)) { struct migration_notify_data mnd; Loading Loading @@ -9766,7 +10052,9 @@ void __init sched_init(void) rq->curr_runnable_sum = rq->prev_runnable_sum = 0; rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0; rq->old_busy_time = 0; rq->old_estimated_time = 0; rq->notifier_sent = 0; rq->hmp_stats.pred_demands_sum = 0; #endif #endif rq->max_idle_balance_cost = sysctl_sched_migration_cost; Loading kernel/sched/deadline.c +4 −2 Original line number Diff line number Diff line Loading @@ -749,11 +749,13 @@ fixup_hmp_sched_stats_dl(struct rq *rq, struct task_struct *p, #else static void fixup_hmp_sched_stats_dl(struct rq *rq, struct task_struct *p, u32 new_task_load) u32 new_task_load, u32 new_pred_demand) { s64 task_load_delta = (s64)new_task_load - task_load(p); s64 pred_demand_delta = PRED_DEMAND_DELTA; fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta); fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta, pred_demand_delta); } #endif Loading Loading
include/linux/sched.h +9 −0 Original line number Diff line number Diff line Loading @@ -1137,6 +1137,7 @@ struct sched_statistics { #endif #define RAVG_HIST_SIZE_MAX 5 #define NUM_BUSY_BUCKETS 10 /* ravg represents frequency scaled cpu-demand of tasks */ struct ravg { Loading @@ -1161,6 +1162,11 @@ struct ravg { * * 'prev_window' represents task's contribution to cpu busy time * statistics (rq->prev_runnable_sum) in previous window * * 'pred_demand' represents task's current predicted cpu busy time * * 'busy_buckets' groups historical busy time into different buckets * used for prediction */ u64 mark_start; u32 sum, demand; Loading @@ -1168,6 +1174,8 @@ struct ravg { #ifdef CONFIG_SCHED_FREQ_INPUT u32 curr_window, prev_window; u16 active_windows; u32 pred_demand; u8 busy_buckets[NUM_BUSY_BUCKETS]; #endif }; Loading Loading @@ -1972,6 +1980,7 @@ extern int task_free_unregister(struct notifier_block *n); struct sched_load { unsigned long prev_load; unsigned long new_task_load; unsigned long predicted_load; }; #if defined(CONFIG_SCHED_FREQ_INPUT) Loading
include/linux/sched/sysctl.h +1 −0 Original line number Diff line number Diff line Loading @@ -77,6 +77,7 @@ extern unsigned int sysctl_sched_enable_colocation; extern unsigned int sysctl_sched_restrict_cluster_spill; #if defined(CONFIG_SCHED_FREQ_INPUT) extern unsigned int sysctl_sched_new_task_windows; extern unsigned int sysctl_sched_pred_alert_freq; #endif #endif Loading
include/trace/events/sched.h +92 −14 Original line number Diff line number Diff line Loading @@ -77,6 +77,9 @@ TRACE_EVENT(sched_enq_deq_task, __field(unsigned int, cpus_allowed ) #ifdef CONFIG_SCHED_HMP __field(unsigned int, demand ) #ifdef CONFIG_SCHED_FREQ_INPUT __field(unsigned int, pred_demand ) #endif #endif ), Loading @@ -92,12 +95,18 @@ TRACE_EVENT(sched_enq_deq_task, __entry->cpus_allowed = cpus_allowed; #ifdef CONFIG_SCHED_HMP __entry->demand = p->ravg.demand; #ifdef CONFIG_SCHED_FREQ_INPUT __entry->pred_demand = p->ravg.pred_demand; #endif #endif ), TP_printk("cpu=%d %s comm=%s pid=%d prio=%d nr_running=%u cpu_load=%lu rt_nr_running=%u affine=%x" #ifdef CONFIG_SCHED_HMP " demand=%u" #ifdef CONFIG_SCHED_FREQ_INPUT " pred_demand=%u" #endif #endif , __entry->cpu, __entry->enqueue ? "enqueue" : "dequeue", Loading @@ -106,6 +115,9 @@ TRACE_EVENT(sched_enq_deq_task, __entry->cpu_load, __entry->rt_nr_running, __entry->cpus_allowed #ifdef CONFIG_SCHED_HMP , __entry->demand #ifdef CONFIG_SCHED_FREQ_INPUT , __entry->pred_demand #endif #endif ) ); Loading Loading @@ -282,6 +294,7 @@ TRACE_EVENT(sched_update_task_ravg, __field(unsigned int, sum ) __field( int, cpu ) #ifdef CONFIG_SCHED_FREQ_INPUT __field(unsigned int, pred_demand ) __field( u64, cs ) __field( u64, ps ) __field( u32, curr_window ) Loading @@ -308,6 +321,7 @@ TRACE_EVENT(sched_update_task_ravg, __entry->sum = p->ravg.sum; __entry->irqtime = irqtime; #ifdef CONFIG_SCHED_FREQ_INPUT __entry->pred_demand = p->ravg.pred_demand; __entry->cs = rq->curr_runnable_sum; __entry->ps = rq->prev_runnable_sum; __entry->curr_window = p->ravg.curr_window; Loading @@ -320,7 +334,7 @@ TRACE_EVENT(sched_update_task_ravg, TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu" #ifdef CONFIG_SCHED_FREQ_INPUT " cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u" " pred_demand %u cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u" #endif , __entry->wallclock, __entry->win_start, __entry->delta, task_event_names[__entry->evt], __entry->cpu, Loading @@ -329,8 +343,8 @@ TRACE_EVENT(sched_update_task_ravg, __entry->delta_m, __entry->demand, __entry->sum, __entry->irqtime #ifdef CONFIG_SCHED_FREQ_INPUT , __entry->cs, __entry->ps, __entry->curr_window, __entry->prev_window, , __entry->pred_demand, __entry->cs, __entry->ps, __entry->curr_window, __entry->prev_window, __entry->nt_cs, __entry->nt_ps, __entry->active_windows #endif Loading @@ -351,6 +365,9 @@ TRACE_EVENT(sched_update_history, __field( int, samples ) __field(enum task_event, evt ) __field(unsigned int, demand ) #ifdef CONFIG_SCHED_FREQ_INPUT __field(unsigned int, pred_demand ) #endif __array( u32, hist, RAVG_HIST_SIZE_MAX) __field(unsigned int, nr_big_tasks ) __field( int, cpu ) Loading @@ -363,18 +380,29 @@ TRACE_EVENT(sched_update_history, __entry->samples = samples; __entry->evt = evt; __entry->demand = p->ravg.demand; #ifdef CONFIG_SCHED_FREQ_INPUT __entry->pred_demand = p->ravg.pred_demand; #endif memcpy(__entry->hist, p->ravg.sum_history, RAVG_HIST_SIZE_MAX * sizeof(u32)); __entry->nr_big_tasks = rq->hmp_stats.nr_big_tasks; __entry->cpu = rq->cpu; ), TP_printk("%d (%s): runtime %u samples %d event %s demand %u (hist: %u %u %u %u %u) cpu %d nr_big %u", TP_printk("%d (%s): runtime %u samples %d event %s demand %u" #ifdef CONFIG_SCHED_FREQ_INPUT " pred_demand %u" #endif " (hist: %u %u %u %u %u) cpu %d nr_big %u", __entry->pid, __entry->comm, __entry->runtime, __entry->samples, task_event_names[__entry->evt], __entry->demand, __entry->hist[0], __entry->hist[1], __entry->hist[2], __entry->hist[3], __entry->demand, #ifdef CONFIG_SCHED_FREQ_INPUT __entry->pred_demand, #endif __entry->hist[0], __entry->hist[1], __entry->hist[2], __entry->hist[3], __entry->hist[4], __entry->cpu, __entry->nr_big_tasks) ); Loading Loading @@ -413,6 +441,43 @@ TRACE_EVENT(sched_reset_all_window_stats, #ifdef CONFIG_SCHED_FREQ_INPUT TRACE_EVENT(sched_update_pred_demand, TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int pct, unsigned int pred_demand), TP_ARGS(rq, p, runtime, pct, pred_demand), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field(unsigned int, runtime ) __field( int, pct ) __field(unsigned int, pred_demand ) __array( u8, bucket, NUM_BUSY_BUCKETS) __field( int, cpu ) ), TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->runtime = runtime; __entry->pct = pct; __entry->pred_demand = pred_demand; memcpy(__entry->bucket, p->ravg.busy_buckets, NUM_BUSY_BUCKETS * sizeof(u8)); __entry->cpu = rq->cpu; ), TP_printk("%d (%s): runtime %u pct %d cpu %d pred_demand %u (buckets: %u %u %u %u %u %u %u %u %u %u)", __entry->pid, __entry->comm, __entry->runtime, __entry->pct, __entry->cpu, __entry->pred_demand, __entry->bucket[0], __entry->bucket[1], __entry->bucket[2], __entry->bucket[3],__entry->bucket[4], __entry->bucket[5], __entry->bucket[6], __entry->bucket[7], __entry->bucket[8], __entry->bucket[9]) ); TRACE_EVENT(sched_migration_update_sum, TP_PROTO(struct rq *rq, struct task_struct *p), Loading Loading @@ -444,14 +509,15 @@ TRACE_EVENT(sched_migration_update_sum, TRACE_EVENT(sched_get_busy, TP_PROTO(int cpu, u64 load, u64 nload, int early), TP_PROTO(int cpu, u64 load, u64 nload, u64 pload, int early), TP_ARGS(cpu, load, nload, early), TP_ARGS(cpu, load, nload, pload, early), TP_STRUCT__entry( __field( int, cpu ) __field( u64, load ) __field( u64, nload ) __field( u64, pload ) __field( int, early ) ), Loading @@ -459,33 +525,45 @@ TRACE_EVENT(sched_get_busy, __entry->cpu = cpu; __entry->load = load; __entry->nload = nload; __entry->pload = pload; __entry->early = early; ), TP_printk("cpu %d load %lld new_task_load %lld early %d", __entry->cpu, __entry->load, __entry->nload, __entry->early) TP_printk("cpu %d load %lld new_task_load %lld predicted_load %lld early %d", __entry->cpu, __entry->load, __entry->nload, __entry->pload, __entry->early) ); TRACE_EVENT(sched_freq_alert, TP_PROTO(int cpu, u64 old_load, u64 new_load), TP_PROTO(int cpu, int pd_notif, u64 old_load, u64 new_load, u64 old_pred, u64 new_pred), TP_ARGS(cpu, old_load, new_load), TP_ARGS(cpu, pd_notif, old_load, new_load, old_pred, new_pred), TP_STRUCT__entry( __field( int, cpu ) __field( int, pd_notif ) __field( u64, old_load ) __field( u64, new_load ) __field( u64, old_pred ) __field( u64, new_pred ) ), TP_fast_assign( __entry->cpu = cpu; __entry->pd_notif = pd_notif; __entry->old_load = old_load; __entry->new_load = new_load; __entry->old_pred = old_pred; __entry->new_pred = new_pred; ), TP_printk("cpu %d old_load=%llu new_load=%llu", __entry->cpu, __entry->old_load, __entry->new_load) TP_printk("cpu %d pd_notif=%d old_load=%llu new_load=%llu " "old_pred=%llu new_pred=%llu", __entry->cpu, __entry->pd_notif, __entry->old_load, __entry->new_load, __entry->old_pred, __entry->new_pred) ); #endif /* CONFIG_SCHED_FREQ_INPUT */ Loading
kernel/sched/core.c +313 −25 Original line number Diff line number Diff line Loading @@ -1656,8 +1656,6 @@ static __read_mostly unsigned int sched_window_stats_policy = __read_mostly unsigned int sysctl_sched_window_stats_policy = WINDOW_STATS_MAX_RECENT_AVG; __read_mostly unsigned int sysctl_sched_new_task_windows = 5; static __read_mostly unsigned int sched_account_wait_time = 1; __read_mostly unsigned int sysctl_sched_account_wait_time = 1; Loading @@ -1667,6 +1665,8 @@ unsigned int __read_mostly sysctl_sched_enable_colocation = 1; #ifdef CONFIG_SCHED_FREQ_INPUT __read_mostly unsigned int sysctl_sched_new_task_windows = 5; static __read_mostly unsigned int sched_migration_fixup = 1; __read_mostly unsigned int sysctl_sched_migration_fixup = 1; Loading @@ -1686,6 +1686,9 @@ __read_mostly int sysctl_sched_freq_inc_notify = 10 * 1024 * 1024; /* + 10GHz */ __read_mostly int sysctl_sched_freq_dec_notify = 10 * 1024 * 1024; /* - 10GHz */ static __read_mostly unsigned int sched_io_is_busy; __read_mostly unsigned int sysctl_sched_pred_alert_freq = 10 * 1024 * 1024; #endif /* CONFIG_SCHED_FREQ_INPUT */ /* 1 -> use PELT based load stats, 0 -> use window-based load stats */ Loading Loading @@ -1723,6 +1726,15 @@ __read_mostly unsigned int sched_ravg_window = 10000000; /* Temporarily disable window-stats activity on all cpus */ unsigned int __read_mostly sched_disable_window_stats; /* * Major task runtime. If a task runs for more than sched_major_task_runtime * in a window, it's considered to be generating majority of workload * for this window. Prediction could be adjusted for such tasks. */ #ifdef CONFIG_SCHED_FREQ_INPUT __read_mostly unsigned int sched_major_task_runtime = 10000000; #endif static unsigned int sync_cpu; #define EXITING_TASK_MARKER 0xdeaddead Loading Loading @@ -1820,7 +1832,7 @@ static inline unsigned int load_to_freq(struct rq *rq, u64 load) } /* Should scheduler alert governor for changing frequency? */ static int send_notification(struct rq *rq) static int send_notification(struct rq *rq, int check_pred) { unsigned int cur_freq, freq_required; unsigned long flags; Loading @@ -1829,11 +1841,29 @@ static int send_notification(struct rq *rq) if (!sched_enable_hmp) return 0; if (check_pred) { u64 prev = rq->old_busy_time; u64 predicted = rq->hmp_stats.pred_demands_sum; if (rq->cluster->cur_freq == rq->cluster->max_freq) return 0; prev = max(prev, rq->old_estimated_time); if (prev > predicted) return 0; cur_freq = load_to_freq(rq, prev); freq_required = load_to_freq(rq, predicted); if (freq_required < cur_freq + sysctl_sched_pred_alert_freq) return 0; } else { cur_freq = load_to_freq(rq, rq->old_busy_time); freq_required = load_to_freq(rq, rq->prev_runnable_sum); if (nearly_same_freq(cur_freq, freq_required)) return 0; } raw_spin_lock_irqsave(&rq->lock, flags); if (!rq->notifier_sent) { Loading @@ -1846,14 +1876,16 @@ static int send_notification(struct rq *rq) } /* Alert governor if there is a need to change frequency */ void check_for_freq_change(struct rq *rq) void check_for_freq_change(struct rq *rq, bool check_pred) { int cpu = cpu_of(rq); if (!send_notification(rq)) if (!send_notification(rq, check_pred)) return; trace_sched_freq_alert(cpu, rq->old_busy_time, rq->prev_runnable_sum); trace_sched_freq_alert(cpu, check_pred, rq->old_busy_time, rq->prev_runnable_sum, rq->old_estimated_time, rq->hmp_stats.pred_demands_sum); atomic_notifier_call_chain( &load_alert_notifier_head, 0, Loading Loading @@ -1904,6 +1936,223 @@ static inline bool is_new_task(struct task_struct *p) return p->ravg.active_windows < sysctl_sched_new_task_windows; } #define INC_STEP 8 #define DEC_STEP 2 #define CONSISTENT_THRES 16 #define INC_STEP_BIG 16 /* * bucket_increase - update the count of all buckets * * @buckets: array of buckets tracking busy time of a task * @idx: the index of bucket to be incremented * * Each time a complete window finishes, count of bucket that runtime * falls in (@idx) is incremented. Counts of all other buckets are * decayed. The rate of increase and decay could be different based * on current count in the bucket. */ static inline void bucket_increase(u8 *buckets, int idx) { int i, step; for (i = 0; i < NUM_BUSY_BUCKETS; i++) { if (idx != i) { if (buckets[i] > DEC_STEP) buckets[i] -= DEC_STEP; else buckets[i] = 0; } else { step = buckets[i] >= CONSISTENT_THRES ? INC_STEP_BIG : INC_STEP; if (buckets[i] > U8_MAX - step) buckets[i] = U8_MAX; else buckets[i] += step; } } } static inline int busy_to_bucket(u32 normalized_rt) { int bidx; bidx = mult_frac(normalized_rt, NUM_BUSY_BUCKETS, max_task_load()); bidx = min(bidx, NUM_BUSY_BUCKETS - 1); /* * Combine lowest two buckets. The lowest frequency falls into * 2nd bucket and thus keep predicting lowest bucket is not * useful. */ if (!bidx) bidx++; return bidx; } static inline u64 scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq) { return div64_u64(load * (u64)src_freq, (u64)dst_freq); } #define HEAVY_TASK_SKIP 2 #define HEAVY_TASK_SKIP_LIMIT 4 /* * get_pred_busy - calculate predicted demand for a task on runqueue * * @rq: runqueue of task p * @p: task whose prediction is being updated * @start: starting bucket. returned prediction should not be lower than * this bucket. * @runtime: runtime of the task. returned prediction should not be lower * than this runtime. * Note: @start can be derived from @runtime. It's passed in only to * avoid duplicated calculation in some cases. * * A new predicted busy time is returned for task @p based on @runtime * passed in. The function searches through buckets that represent busy * time equal to or bigger than @runtime and attempts to find the bucket to * to use for prediction. Once found, it searches through historical busy * time and returns the latest that falls into the bucket. If no such busy * time exists, it returns the medium of that bucket. */ static u32 get_pred_busy(struct rq *rq, struct task_struct *p, int start, u32 runtime) { int i; u8 *buckets = p->ravg.busy_buckets; u32 *hist = p->ravg.sum_history; u32 dmin, dmax; u64 cur_freq_runtime = 0; int first = NUM_BUSY_BUCKETS, final, skip_to; u32 ret = runtime; /* skip prediction for new tasks due to lack of history */ if (unlikely(is_new_task(p))) goto out; /* find minimal bucket index to pick */ for (i = start; i < NUM_BUSY_BUCKETS; i++) { if (buckets[i]) { first = i; break; } } /* if no higher buckets are filled, predict runtime */ if (first >= NUM_BUSY_BUCKETS) goto out; /* compute the bucket for prediction */ final = first; if (first < HEAVY_TASK_SKIP_LIMIT) { /* compute runtime at current CPU frequency */ cur_freq_runtime = mult_frac(runtime, max_possible_efficiency, rq->cluster->efficiency); cur_freq_runtime = scale_load_to_freq(cur_freq_runtime, max_possible_freq, rq->cluster->cur_freq); /* * if the task runs for majority of the window, try to * pick higher buckets. */ if (cur_freq_runtime >= sched_major_task_runtime) { int next = NUM_BUSY_BUCKETS; /* * if there is a higher bucket that's consistently * hit, don't jump beyond that. */ for (i = start + 1; i <= HEAVY_TASK_SKIP_LIMIT && i < NUM_BUSY_BUCKETS; i++) { if (buckets[i] > CONSISTENT_THRES) { next = i; break; } } skip_to = min(next, start + HEAVY_TASK_SKIP); /* don't jump beyond HEAVY_TASK_SKIP_LIMIT */ skip_to = min(HEAVY_TASK_SKIP_LIMIT, skip_to); /* don't go below first non-empty bucket, if any */ final = max(first, skip_to); } } /* determine demand range for the predicted bucket */ if (final < 2) { /* lowest two buckets are combined */ dmin = 0; final = 1; } else { dmin = mult_frac(final, max_task_load(), NUM_BUSY_BUCKETS); } dmax = mult_frac(final + 1, max_task_load(), NUM_BUSY_BUCKETS); /* * search through runtime history and return first runtime that falls * into the range of predicted bucket. */ for (i = 0; i < sched_ravg_hist_size; i++) { if (hist[i] >= dmin && hist[i] < dmax) { ret = hist[i]; break; } } /* no historical runtime within bucket found, use average of the bin */ if (ret < dmin) ret = (dmin + dmax) / 2; /* * when updating in middle of a window, runtime could be higher * than all recorded history. Always predict at least runtime. */ ret = max(runtime, ret); out: trace_sched_update_pred_demand(rq, p, runtime, mult_frac((unsigned int)cur_freq_runtime, 100, sched_ravg_window), ret); return ret; } static inline u32 calc_pred_demand(struct rq *rq, struct task_struct *p) { if (p->ravg.pred_demand >= p->ravg.curr_window) return p->ravg.pred_demand; return get_pred_busy(rq, p, busy_to_bucket(p->ravg.curr_window), p->ravg.curr_window); } /* * predictive demand of a task is calculated at the window roll-over. * if the task current window busy time exceeds the predicted * demand, update it here to reflect the task needs. */ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) { u32 new, old; if (is_idle_task(p) || exiting_task(p)) return; if (event != PUT_PREV_TASK && event != TASK_UPDATE && (!sched_freq_account_wait_time || (event != TASK_MIGRATE && event != PICK_NEXT_TASK))) return; new = calc_pred_demand(rq, p); old = p->ravg.pred_demand; if (old >= new) return; if (task_on_rq_queued(p) && (!task_has_dl_policy(p) || !p->dl.dl_throttled)) p->sched_class->fixup_hmp_sched_stats(rq, p, p->ravg.demand, new); p->ravg.pred_demand = new; } /* * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum) */ Loading Loading @@ -2239,13 +2488,40 @@ fail: spin_unlock_irqrestore(&freq_max_load_lock, flags); return ret; } static inline u32 predict_and_update_buckets(struct rq *rq, struct task_struct *p, u32 runtime) { int bidx; u32 pred_demand; bidx = busy_to_bucket(runtime); pred_demand = get_pred_busy(rq, p, bidx, runtime); bucket_increase(p->ravg.busy_buckets, bidx); return pred_demand; } #define assign_ravg_pred_demand(x) (p->ravg.pred_demand = x) #else /* CONFIG_SCHED_FREQ_INPUT */ static inline void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) { } static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) { } static inline u32 predict_and_update_buckets(struct rq *rq, struct task_struct *p, u32 runtime) { return 0; } #define assign_ravg_pred_demand(x) #endif /* CONFIG_SCHED_FREQ_INPUT */ static int account_busy_for_task_demand(struct task_struct *p, int event) Loading Loading @@ -2277,7 +2553,7 @@ static void update_history(struct rq *rq, struct task_struct *p, { u32 *hist = &p->ravg.sum_history[0]; int ridx, widx; u32 max = 0, avg, demand; u32 max = 0, avg, demand, pred_demand; u64 sum = 0; /* Ignore windows where task had no activity */ Loading Loading @@ -2314,6 +2590,7 @@ static void update_history(struct rq *rq, struct task_struct *p, else demand = max(avg, runtime); } pred_demand = predict_and_update_buckets(rq, p, runtime); /* * A throttled deadline sched class task gets dequeued without Loading @@ -2322,9 +2599,11 @@ static void update_history(struct rq *rq, struct task_struct *p, */ if (task_on_rq_queued(p) && (!task_has_dl_policy(p) || !p->dl.dl_throttled)) p->sched_class->fixup_hmp_sched_stats(rq, p, demand); p->sched_class->fixup_hmp_sched_stats(rq, p, demand, pred_demand); p->ravg.demand = demand; assign_ravg_pred_demand(pred_demand); done: trace_sched_update_history(rq, p, runtime, samples, event); Loading Loading @@ -2457,7 +2736,7 @@ static void update_task_ravg(struct task_struct *p, struct rq *rq, update_task_demand(p, rq, event, wallclock); update_cpu_busy_time(p, rq, event, wallclock, irqtime); update_task_pred_demand(rq, p, event); done: trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime); Loading Loading @@ -2733,12 +3012,6 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) #ifdef CONFIG_SCHED_FREQ_INPUT static inline u64 scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq) { return div64_u64(load * (u64)src_freq, (u64)dst_freq); } void sched_get_cpus_busy(struct sched_load *busy, const struct cpumask *query_cpus) { Loading @@ -2746,6 +3019,7 @@ void sched_get_cpus_busy(struct sched_load *busy, struct rq *rq; const int cpus = cpumask_weight(query_cpus); u64 load[cpus], nload[cpus]; u64 pload[cpus]; unsigned int cur_freq[cpus], max_freq[cpus]; int notifier_sent[cpus]; int early_detection[cpus]; Loading Loading @@ -2773,6 +3047,8 @@ void sched_get_cpus_busy(struct sched_load *busy, sched_ktime_clock(), 0); load[i] = rq->old_busy_time = rq->prev_runnable_sum; nload[i] = rq->nt_prev_runnable_sum; pload[i] = rq->hmp_stats.pred_demands_sum; rq->old_estimated_time = pload[i]; /* * Scale load in reference to cluster max_possible_freq. * Loading @@ -2781,6 +3057,7 @@ void sched_get_cpus_busy(struct sched_load *busy, */ load[i] = scale_load_to_cpu(load[i], cpu); nload[i] = scale_load_to_cpu(nload[i], cpu); pload[i] = scale_load_to_cpu(pload[i], cpu); notifier_sent[i] = rq->notifier_sent; early_detection[i] = (rq->ed_task != NULL); Loading Loading @@ -2825,13 +3102,18 @@ void sched_get_cpus_busy(struct sched_load *busy, nload[i] = scale_load_to_freq(nload[i], max_freq[i], cpu_max_possible_freq(cpu)); } pload[i] = scale_load_to_freq(pload[i], max_freq[i], rq->cluster->max_possible_freq); busy[i].prev_load = div64_u64(load[i], NSEC_PER_USEC); busy[i].new_task_load = div64_u64(nload[i], NSEC_PER_USEC); busy[i].predicted_load = div64_u64(pload[i], NSEC_PER_USEC); exit_early: trace_sched_get_busy(cpu, busy[i].prev_load, busy[i].new_task_load, early_detection[i]); busy[i].new_task_load, busy[i].predicted_load, early_detection[i]); i++; } } Loading Loading @@ -4190,10 +4472,12 @@ out: if (freq_notif_allowed) { if (!same_freq_domain(src_cpu, cpu)) { check_for_freq_change(cpu_rq(cpu)); check_for_freq_change(cpu_rq(src_cpu)); check_for_freq_change(cpu_rq(cpu), false); check_for_freq_change(cpu_rq(src_cpu), false); } else if (heavy_task) { check_for_freq_change(cpu_rq(cpu)); check_for_freq_change(cpu_rq(cpu), false); } else if (success) { check_for_freq_change(cpu_rq(cpu), true); } } Loading Loading @@ -7383,8 +7667,10 @@ fail: raw_spin_unlock(&rq->lock); raw_spin_unlock(&p->pi_lock); if (moved && !same_freq_domain(src_cpu, dest_cpu)) { check_for_freq_change(cpu_rq(src_cpu)); check_for_freq_change(cpu_rq(dest_cpu)); check_for_freq_change(cpu_rq(src_cpu), false); check_for_freq_change(cpu_rq(dest_cpu), false); } else if (moved) { check_for_freq_change(cpu_rq(dest_cpu), true); } if (moved && task_notify_on_migrate(p)) { struct migration_notify_data mnd; Loading Loading @@ -9766,7 +10052,9 @@ void __init sched_init(void) rq->curr_runnable_sum = rq->prev_runnable_sum = 0; rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0; rq->old_busy_time = 0; rq->old_estimated_time = 0; rq->notifier_sent = 0; rq->hmp_stats.pred_demands_sum = 0; #endif #endif rq->max_idle_balance_cost = sysctl_sched_migration_cost; Loading
kernel/sched/deadline.c +4 −2 Original line number Diff line number Diff line Loading @@ -749,11 +749,13 @@ fixup_hmp_sched_stats_dl(struct rq *rq, struct task_struct *p, #else static void fixup_hmp_sched_stats_dl(struct rq *rq, struct task_struct *p, u32 new_task_load) u32 new_task_load, u32 new_pred_demand) { s64 task_load_delta = (s64)new_task_load - task_load(p); s64 pred_demand_delta = PRED_DEMAND_DELTA; fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta); fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta, pred_demand_delta); } #endif Loading