Loading include/linux/sched.h +12 −0 Original line number Diff line number Diff line Loading @@ -1330,6 +1330,7 @@ struct task_struct { #endif struct related_thread_group *grp; struct list_head grp_list; u64 cpu_cycles; #endif #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; Loading Loading @@ -2216,6 +2217,8 @@ extern void sched_set_cpu_cstate(int cpu, int cstate, int wakeup_energy, int wakeup_latency); extern void sched_set_cluster_dstate(const cpumask_t *cluster_cpus, int dstate, int wakeup_energy, int wakeup_latency); extern void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax); #ifdef CONFIG_SCHED_QHMP extern int sched_set_cpu_prefer_idle(int cpu, int prefer_idle); extern int sched_get_cpu_prefer_idle(int cpu); Loading @@ -2242,6 +2245,9 @@ static inline void sched_set_cluster_dstate(const cpumask_t *cluster_cpus, int dstate, int wakeup_energy, int wakeup_latency) { } static inline void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax) { } #endif #ifdef CONFIG_NO_HZ_COMMON Loading Loading @@ -3222,4 +3228,10 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } struct cpu_cycle_counter_cb { u64 (*get_cpu_cycle_counter)(int cpu); u32 (*get_cpu_cycles_max_per_us)(int cpu); }; int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb); #endif include/trace/events/sched.h +37 −7 Original line number Diff line number Diff line Loading @@ -206,7 +206,6 @@ DECLARE_EVENT_CLASS(sched_cpu_load, __field(unsigned int, capacity ) __field( u64, cumulative_runnable_avg ) __field( u64, irqload ) __field(unsigned int, cur_freq ) __field(unsigned int, max_freq ) __field(unsigned int, power_cost ) __field( int, cstate ) Loading @@ -223,7 +222,6 @@ DECLARE_EVENT_CLASS(sched_cpu_load, __entry->capacity = cpu_capacity(rq->cpu); __entry->cumulative_runnable_avg = rq->hmp_stats.cumulative_runnable_avg; __entry->irqload = irqload; __entry->cur_freq = cpu_cur_freq(rq->cpu); __entry->max_freq = cpu_max_freq(rq->cpu); __entry->power_cost = power_cost; __entry->cstate = rq->cstate; Loading @@ -231,10 +229,10 @@ DECLARE_EVENT_CLASS(sched_cpu_load, __entry->temp = temp; ), TP_printk("cpu %u idle %d nr_run %u nr_big %u lsf %u capacity %u cr_avg %llu irqload %llu fcur %u fmax %u power_cost %u cstate %d dstate %d temp %d", TP_printk("cpu %u idle %d nr_run %u nr_big %u lsf %u capacity %u cr_avg %llu irqload %llu fmax %u power_cost %u cstate %d dstate %d temp %d", __entry->cpu, __entry->idle, __entry->nr_running, __entry->nr_big_tasks, __entry->load_scale_factor, __entry->capacity, __entry->cumulative_runnable_avg, __entry->irqload, __entry->cur_freq, __entry->cumulative_runnable_avg, __entry->irqload, __entry->max_freq, __entry->power_cost, __entry->cstate, __entry->dstate, __entry->temp) ); Loading Loading @@ -274,9 +272,9 @@ TRACE_EVENT(sched_set_boost, TRACE_EVENT(sched_update_task_ravg, TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt, u64 wallclock, u64 irqtime), u64 wallclock, u64 irqtime, u32 cycles, u32 exec_time), TP_ARGS(p, rq, evt, wallclock, irqtime), TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) Loading Loading @@ -312,7 +310,8 @@ TRACE_EVENT(sched_update_task_ravg, __entry->evt = evt; __entry->cpu = rq->cpu; __entry->cur_pid = rq->curr->pid; __entry->cur_freq = cpu_cur_freq(rq->cpu); __entry->cur_freq = cpu_cycles_to_freq(rq->cpu, cycles, exec_time); memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->mark_start = p->ravg.mark_start; Loading Loading @@ -351,6 +350,36 @@ TRACE_EVENT(sched_update_task_ravg, ) ); TRACE_EVENT(sched_get_task_cpu_cycles, TP_PROTO(int cpu, int event, u64 cycles, u32 exec_time), TP_ARGS(cpu, event, cycles, exec_time), TP_STRUCT__entry( __field(int, cpu ) __field(int, event ) __field(u64, cycles ) __field(u64, exec_time ) __field(u32, freq ) __field(u32, legacy_freq ) ), TP_fast_assign( __entry->cpu = cpu; __entry->event = event; __entry->cycles = cycles; __entry->exec_time = exec_time; __entry->freq = cpu_cycles_to_freq(cpu, cycles, exec_time); __entry->legacy_freq = cpu_cur_freq(cpu); ), TP_printk("cpu=%d event=%d cycles=%llu exec_time=%llu freq=%u legacy_freq=%u", __entry->cpu, __entry->event, __entry->cycles, __entry->exec_time, __entry->freq, __entry->legacy_freq) ); TRACE_EVENT(sched_update_history, TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples, Loading Loading @@ -1194,6 +1223,7 @@ TRACE_EVENT(sched_get_nr_running_avg, TP_printk("avg=%d big_avg=%d iowait_avg=%d", __entry->avg, __entry->big_avg, __entry->iowait_avg) ); #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ Loading init/Kconfig +8 −0 Original line number Diff line number Diff line Loading @@ -1164,6 +1164,14 @@ config SCHED_HMP in their instructions per-cycle capability or the maximum frequency they can attain. config SCHED_HMP_CSTATE_AWARE bool "CPU C-state aware scheduler" depends on SCHED_HMP help This feature will let the HMP scheduler optimize task placement with CPUs C-state. If this is enabled, scheduler places tasks onto the shallowest C-state CPU among the most power efficient CPUs. config SCHED_QHMP bool "QHMP scheduler extensions" depends on SCHED_HMP Loading kernel/sched/core.c +209 −62 Original line number Diff line number Diff line Loading @@ -835,6 +835,9 @@ void sched_set_cluster_dstate(const cpumask_t *cluster_cpus, int dstate, static ktime_t ktime_last; static bool sched_ktime_suspended; static bool use_cycle_counter; static struct cpu_cycle_counter_cb cpu_cycle_counter_cb; u64 sched_ktime_clock(void) { if (unlikely(sched_ktime_suspended)) Loading Loading @@ -1228,7 +1231,7 @@ capacity_scale_cpu_efficiency(struct sched_cluster *cluster) */ static unsigned long capacity_scale_cpu_freq(struct sched_cluster *cluster) { return (1024 * cluster->max_freq) / min_max_freq; return (1024 * cluster_max_freq(cluster)) / min_max_freq; } /* Loading @@ -1249,7 +1252,8 @@ load_scale_cpu_efficiency(struct sched_cluster *cluster) */ static inline unsigned long load_scale_cpu_freq(struct sched_cluster *cluster) { return DIV_ROUND_UP(1024 * max_possible_freq, cluster->max_freq); return DIV_ROUND_UP(1024 * max_possible_freq, cluster_max_freq(cluster)); } static int compute_capacity(struct sched_cluster *cluster) Loading Loading @@ -1315,8 +1319,10 @@ static struct sched_cluster init_cluster = { .load_scale_factor = 1024, .cur_freq = 1, .max_freq = 1, .max_mitigated_freq = UINT_MAX, .min_freq = 1, .max_possible_freq = 1, .cpu_cycle_max_scale_factor = 1, .dstate = 0, .dstate_wakeup_energy = 0, .dstate_wakeup_latency = 0, Loading Loading @@ -1463,8 +1469,10 @@ static struct sched_cluster *alloc_new_cluster(const struct cpumask *cpus) cluster->load_scale_factor = 1024; cluster->cur_freq = 1; cluster->max_freq = 1; cluster->max_mitigated_freq = UINT_MAX; cluster->min_freq = 1; cluster->max_possible_freq = 1; cluster->cpu_cycle_max_scale_factor = 1; cluster->dstate = 0; cluster->dstate_wakeup_energy = 0; cluster->dstate_wakeup_latency = 0; Loading Loading @@ -1529,6 +1537,44 @@ static void init_clusters(void) INIT_LIST_HEAD(&cluster_head); } static inline void __update_cpu_cycle_max_possible_freq(struct sched_cluster *cluster) { int cpu = cluster_first_cpu(cluster); cluster->cpu_cycle_max_scale_factor = div64_u64(cluster->max_possible_freq * NSEC_PER_USEC, cpu_cycle_counter_cb.get_cpu_cycles_max_per_us(cpu)); } static inline void update_cpu_cycle_max_possible_freq(struct sched_cluster *cluster) { if (!use_cycle_counter) return; __update_cpu_cycle_max_possible_freq(cluster); } int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb) { struct sched_cluster *cluster = NULL; mutex_lock(&cluster_lock); if (!cb->get_cpu_cycle_counter || !cb->get_cpu_cycles_max_per_us) { mutex_unlock(&cluster_lock); return -EINVAL; } cpu_cycle_counter_cb = *cb; for_each_sched_cluster(cluster) __update_cpu_cycle_max_possible_freq(cluster); use_cycle_counter = true; mutex_unlock(&cluster_lock); return 0; } static int __init set_sched_enable_hmp(char *str) { int enable_hmp = 0; Loading Loading @@ -1637,8 +1683,19 @@ static inline void clear_hmp_request(int cpu) { } static inline void update_cluster_topology(void) {} int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb) { return 0; } #endif /* CONFIG_SCHED_HMP */ #define SCHED_MIN_FREQ 1 struct cpu_cycle { u64 cycles; u64 time; }; #if defined(CONFIG_SCHED_HMP) /* Loading Loading @@ -1786,19 +1843,17 @@ update_window_start(struct rq *rq, u64 wallclock) rq->window_start += (u64)nr_windows * (u64)sched_ravg_window; } static inline u64 scale_exec_time(u64 delta, struct rq *rq) #define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y) static inline u64 scale_exec_time(u64 delta, struct rq *rq, const struct cpu_cycle *cc) { int cpu = cpu_of(rq); unsigned int cur_freq = cpu_cur_freq(cpu); int sf; if (unlikely(cur_freq > max_possible_freq)) cur_freq = max_possible_freq; /* round up div64 */ delta = div64_u64(delta * cur_freq + max_possible_freq - 1, max_possible_freq); delta = DIV64_U64_ROUNDUP(delta * cc->cycles * cpu_cycle_max_scale_factor(cpu), max_possible_freq * cc->time); sf = DIV_ROUND_UP(cpu_efficiency(cpu) * 1024, max_possible_efficiency); delta *= sf; Loading Loading @@ -1860,7 +1915,7 @@ static int send_notification(struct rq *rq, int check_pred) u64 prev = rq->old_busy_time; u64 predicted = rq->hmp_stats.pred_demands_sum; if (rq->cluster->cur_freq == rq->cluster->max_freq) if (rq->cluster->cur_freq == cpu_max_freq(cpu_of(rq))) return 0; prev = max(prev, rq->old_estimated_time); Loading Loading @@ -2172,7 +2227,8 @@ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum) */ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) int event, u64 wallclock, u64 irqtime, const struct cpu_cycle *cc) { int new_window, nr_full_windows = 0; int p_is_curr_task = (p == rq->curr); Loading Loading @@ -2262,7 +2318,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, delta = wallclock - mark_start; else delta = irqtime; delta = scale_exec_time(delta, rq); delta = scale_exec_time(delta, rq, cc); rq->curr_runnable_sum += delta; if (new_task) rq->nt_curr_runnable_sum += delta; Loading @@ -2287,14 +2343,15 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (!nr_full_windows) { /* A full window hasn't elapsed, account partial * contribution to previous completed window. */ delta = scale_exec_time(window_start - mark_start, rq); delta = scale_exec_time(window_start - mark_start, rq, cc); if (!exiting_task(p)) p->ravg.prev_window += delta; } else { /* Since at least one full window has elapsed, * the contribution to the previous window is the * full window (window_size). */ delta = scale_exec_time(window_size, rq); delta = scale_exec_time(window_size, rq, cc); if (!exiting_task(p)) p->ravg.prev_window = delta; } Loading @@ -2303,7 +2360,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, rq->nt_prev_runnable_sum += delta; /* Account piece of busy time in the current window. */ delta = scale_exec_time(wallclock - window_start, rq); delta = scale_exec_time(wallclock - window_start, rq, cc); rq->curr_runnable_sum += delta; if (new_task) rq->nt_curr_runnable_sum += delta; Loading @@ -2329,7 +2386,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (!nr_full_windows) { /* A full window hasn't elapsed, account partial * contribution to previous completed window. */ delta = scale_exec_time(window_start - mark_start, rq); delta = scale_exec_time(window_start - mark_start, rq, cc); if (!is_idle_task(p) && !exiting_task(p)) p->ravg.prev_window += delta; Loading @@ -2342,7 +2400,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, /* Since at least one full window has elapsed, * the contribution to the previous window is the * full window (window_size). */ delta = scale_exec_time(window_size, rq); delta = scale_exec_time(window_size, rq, cc); if (!is_idle_task(p) && !exiting_task(p)) p->ravg.prev_window = delta; Loading @@ -2360,7 +2418,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, rq->prev_runnable_sum = delta; /* Account piece of busy time in the current window. */ delta = scale_exec_time(wallclock - window_start, rq); delta = scale_exec_time(wallclock - window_start, rq, cc); rq->curr_runnable_sum = delta; if (new_task) rq->nt_curr_runnable_sum = delta; Loading Loading @@ -2392,7 +2450,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum; rq->nt_curr_runnable_sum = 0; if (mark_start > window_start) { rq->curr_runnable_sum = scale_exec_time(irqtime, rq); rq->curr_runnable_sum = scale_exec_time(irqtime, rq, cc); return; } Loading @@ -2401,12 +2460,12 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, delta = window_start - mark_start; if (delta > window_size) delta = window_size; delta = scale_exec_time(delta, rq); delta = scale_exec_time(delta, rq, cc); rq->prev_runnable_sum += delta; /* Process the remaining IRQ busy time in the current window. */ delta = wallclock - window_start; rq->curr_runnable_sum = scale_exec_time(delta, rq); rq->curr_runnable_sum = scale_exec_time(delta, rq, cc); return; } Loading Loading @@ -2526,7 +2585,7 @@ update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) } static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) int event, u64 wallclock, u64 irqtime, const struct cpu_cycle *cc) { } Loading @@ -2539,6 +2598,41 @@ static inline u32 predict_and_update_buckets(struct rq *rq, #endif /* CONFIG_SCHED_FREQ_INPUT */ static void update_task_cpu_cycles(struct task_struct *p, int cpu) { if (use_cycle_counter) p->cpu_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu); } static struct cpu_cycle get_task_cpu_cycles(struct task_struct *p, struct rq *rq, int event, u64 wallclock) { u64 cur_cycles; struct cpu_cycle cc; int cpu = cpu_of(rq); if (!use_cycle_counter) { cc.cycles = cpu_cur_freq(cpu); cc.time = 1; return cc; } cur_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu); if (unlikely(cur_cycles < p->cpu_cycles)) cc.cycles = cur_cycles + (U64_MAX - p->cpu_cycles); else cc.cycles = cur_cycles - p->cpu_cycles; cc.time = wallclock - p->ravg.mark_start; BUG_ON((s64)cc.time < 0); p->cpu_cycles = cur_cycles; trace_sched_get_task_cpu_cycles(cpu, event, cc.cycles, cc.time); return cc; } static int account_busy_for_task_demand(struct task_struct *p, int event) { /* No need to bother updating task demand for exiting tasks Loading Loading @@ -2625,9 +2719,9 @@ done: } static void add_to_task_demand(struct rq *rq, struct task_struct *p, u64 delta) u64 delta, const struct cpu_cycle *cc) { delta = scale_exec_time(delta, rq); delta = scale_exec_time(delta, rq, cc); p->ravg.sum += delta; if (unlikely(p->ravg.sum > sched_ravg_window)) p->ravg.sum = sched_ravg_window; Loading Loading @@ -2684,7 +2778,8 @@ static void add_to_task_demand(struct rq *rq, struct task_struct *p, * depends on it! */ static void update_task_demand(struct task_struct *p, struct rq *rq, int event, u64 wallclock) int event, u64 wallclock, const struct cpu_cycle *cc) { u64 mark_start = p->ravg.mark_start; u64 delta, window_start = rq->window_start; Loading @@ -2707,7 +2802,7 @@ static void update_task_demand(struct task_struct *p, struct rq *rq, if (!new_window) { /* The simple case - busy time contained within the existing * window. */ add_to_task_demand(rq, p, wallclock - mark_start); add_to_task_demand(rq, p, wallclock - mark_start, cc); return; } Loading @@ -2718,12 +2813,12 @@ static void update_task_demand(struct task_struct *p, struct rq *rq, window_start -= (u64)nr_full_windows * (u64)window_size; /* Process (window_start - mark_start) first */ add_to_task_demand(rq, p, window_start - mark_start); add_to_task_demand(rq, p, window_start - mark_start, cc); /* Push new sample(s) into task's demand history */ update_history(rq, p, p->ravg.sum, 1, event); if (nr_full_windows) update_history(rq, p, scale_exec_time(window_size, rq), update_history(rq, p, scale_exec_time(window_size, rq, cc), nr_full_windows, event); /* Roll window_start back to current to process any remainder Loading @@ -2732,30 +2827,39 @@ static void update_task_demand(struct task_struct *p, struct rq *rq, /* Process (wallclock - window_start) next */ mark_start = window_start; add_to_task_demand(rq, p, wallclock - mark_start); add_to_task_demand(rq, p, wallclock - mark_start, cc); } /* Reflect task activity on its demand and cpu's busy time statistics */ static void update_task_ravg(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) static struct cpu_cycle update_task_ravg(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) { struct cpu_cycle cc = { .cycles = SCHED_MIN_FREQ, .time = 1 }; if (sched_use_pelt || !rq->window_start || sched_disable_window_stats) return; return cc; lockdep_assert_held(&rq->lock); update_window_start(rq, wallclock); if (!p->ravg.mark_start) if (!p->ravg.mark_start) { update_task_cpu_cycles(p, cpu_of(rq)); goto done; } update_task_demand(p, rq, event, wallclock); update_cpu_busy_time(p, rq, event, wallclock, irqtime); cc = get_task_cpu_cycles(p, rq, event, wallclock); update_task_demand(p, rq, event, wallclock, &cc); update_cpu_busy_time(p, rq, event, wallclock, irqtime, &cc); update_task_pred_demand(rq, p, event); done: trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime); trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime, cc.cycles, cc.time); p->ravg.mark_start = wallclock; return cc; } void sched_account_irqtime(int cpu, struct task_struct *curr, Loading Loading @@ -2824,6 +2928,7 @@ static inline void mark_task_starting(struct task_struct *p) p->ravg.mark_start = p->last_wake_ts = wallclock; p->last_cpu_selected_ts = wallclock; p->last_switch_out_ts = 0; update_task_cpu_cycles(p, cpu_of(rq)); } static inline void set_window_start(struct rq *rq) Loading Loading @@ -3041,6 +3146,7 @@ void sched_get_cpus_busy(struct sched_load *busy, int early_detection[cpus]; int cpu, i = 0; unsigned int window_size; struct cpu_cycle cc; if (unlikely(cpus == 0)) return; Loading @@ -3059,8 +3165,10 @@ void sched_get_cpus_busy(struct sched_load *busy, for_each_cpu(cpu, query_cpus) { rq = cpu_rq(cpu); update_task_ravg(rq->curr, rq, TASK_UPDATE, cc = update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_ktime_clock(), 0); cur_freq[i] = cpu_cycles_to_freq(i, cc.cycles, cc.time); load[i] = rq->old_busy_time = rq->prev_runnable_sum; nload[i] = rq->nt_prev_runnable_sum; pload[i] = rq->hmp_stats.pred_demands_sum; Loading @@ -3078,7 +3186,6 @@ void sched_get_cpus_busy(struct sched_load *busy, notifier_sent[i] = rq->notifier_sent; early_detection[i] = (rq->ed_task != NULL); rq->notifier_sent = 0; cur_freq[i] = cpu_cur_freq(cpu); max_freq[i] = cpu_max_freq(cpu); i++; } Loading Loading @@ -3224,6 +3331,8 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu) update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0); update_task_cpu_cycles(p, new_cpu); new_task = is_new_task(p); if (p->ravg.curr_window) { Loading Loading @@ -3532,6 +3641,53 @@ unsigned int sched_get_group_id(struct task_struct *p) return group_id; } static void update_cpu_cluster_capacity(const cpumask_t *cpus) { int i; struct sched_cluster *cluster; struct cpumask cpumask; cpumask_copy(&cpumask, cpus); pre_big_task_count_change(cpu_possible_mask); for_each_cpu(i, &cpumask) { cluster = cpu_rq(i)->cluster; cpumask_andnot(&cpumask, &cpumask, &cluster->cpus); cluster->capacity = compute_capacity(cluster); cluster->load_scale_factor = compute_load_scale_factor(cluster); /* 'cpus' can contain cpumask more than one cluster */ check_for_up_down_migrate_update(&cluster->cpus); } __update_min_max_capacity(); post_big_task_count_change(cpu_possible_mask); } void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax) { struct cpumask cpumask; struct sched_cluster *cluster; unsigned int orig_max_freq; int i, update_capacity = 0; cpumask_copy(&cpumask, cpus); for_each_cpu(i, &cpumask) { cluster = cpu_rq(i)->cluster; cpumask_andnot(&cpumask, &cpumask, &cluster->cpus); orig_max_freq = cpu_max_freq(i); cluster->max_mitigated_freq = fmax; update_capacity += (orig_max_freq != cpu_max_freq(i)); } if (update_capacity) update_cpu_cluster_capacity(cpus); } static int cpufreq_notifier_policy(struct notifier_block *nb, unsigned long val, void *data) { Loading Loading @@ -3562,7 +3718,7 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, cpumask_andnot(&policy_cluster, &policy_cluster, &cluster->cpus); orig_max_freq = cluster->max_freq; orig_max_freq = cpu_max_freq(i); cluster->min_freq = policy->min; cluster->max_freq = policy->max; cluster->cur_freq = policy->cur; Loading @@ -3579,31 +3735,16 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, sort_clusters(); update_all_clusters_stats(); update_cpu_cycle_max_possible_freq(cluster); mutex_unlock(&cluster_lock); continue; } update_capacity += (orig_max_freq != policy->max); } if (!update_capacity) return 0; policy_cluster = *policy->related_cpus; pre_big_task_count_change(cpu_possible_mask); for_each_cpu(i, &policy_cluster) { cluster = cpu_rq(i)->cluster; cpumask_andnot(&policy_cluster, &policy_cluster, &cluster->cpus); cluster->capacity = compute_capacity(cluster); cluster->load_scale_factor = compute_load_scale_factor(cluster); update_capacity += (orig_max_freq != cpu_max_freq(i)); } __update_min_max_capacity(); check_for_up_down_migrate_update(policy->related_cpus); post_big_task_count_change(cpu_possible_mask); if (update_capacity) update_cpu_cluster_capacity(policy->related_cpus); return 0; } Loading Loading @@ -3733,10 +3874,16 @@ heavy_task_wakeup(struct task_struct *p, struct rq *rq, int event) return 0; } static inline void static struct cpu_cycle update_task_ravg(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) { static const struct cpu_cycle cc = { .cycles = SCHED_MIN_FREQ, .time = 1 }; return cc; } static inline void mark_task_starting(struct task_struct *p) {} Loading kernel/sched/fair.c +91 −13 Original line number Diff line number Diff line Loading @@ -2911,9 +2911,11 @@ struct cpu_select_env { }; struct cluster_cpu_stats { int best_idle_cpu, best_capacity_cpu, best_cpu, best_sibling_cpu; int best_idle_cpu, least_loaded_cpu; int best_capacity_cpu, best_cpu, best_sibling_cpu; int min_cost, best_sibling_cpu_cost; u64 min_load, best_sibling_cpu_load; int best_cpu_cstate; u64 min_load, best_load, best_sibling_cpu_load; s64 highest_spare_capacity; }; Loading Loading @@ -3146,22 +3148,79 @@ next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env, return next; } static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, struct cpu_select_env *env) #ifdef CONFIG_SCHED_HMP_CSTATE_AWARE static void __update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, struct cpu_select_env *env, int cpu_cost) { int cpu_cost; int cpu_cstate; int prev_cpu = env->prev_cpu; cpu_cost = power_cost(cpu, task_load(env->p) + cpu_cravg_sync(cpu, env->sync)); if (cpu_cost > stats->min_cost) cpu_cstate = cpu_rq(cpu)->cstate; if (env->need_idle) { stats->min_cost = cpu_cost; if (idle_cpu(cpu)) { if (cpu_cstate < stats->best_cpu_cstate || (cpu_cstate == stats->best_cpu_cstate && cpu == prev_cpu)) { stats->best_idle_cpu = cpu; stats->best_cpu_cstate = cpu_cstate; } } else { if (env->cpu_load < stats->min_load || (env->cpu_load == stats->min_load && cpu == prev_cpu)) { stats->least_loaded_cpu = cpu; stats->min_load = env->cpu_load; } } return; } if (cpu_cost < stats->min_cost) { stats->min_cost = cpu_cost; stats->best_cpu_cstate = cpu_cstate; stats->best_load = env->cpu_load; stats->best_cpu = cpu; return; } /* CPU cost is the same. Start breaking the tie by C-state */ if (cpu_cstate > stats->best_cpu_cstate) return; if (cpu_cstate < stats->best_cpu_cstate) { stats->best_cpu_cstate = cpu_cstate; stats->best_load = env->cpu_load; stats->best_cpu = cpu; return; } /* C-state is the same. Use prev CPU to break the tie */ if (cpu == prev_cpu) { stats->best_cpu = cpu; return; } if (stats->best_cpu != prev_cpu && ((cpu_cstate == 0 && env->cpu_load < stats->best_load) || (cpu_cstate > 0 && env->cpu_load > stats->best_load))) { stats->best_load = env->cpu_load; stats->best_cpu = cpu; } } #else /* CONFIG_SCHED_HMP_CSTATE_AWARE */ static void __update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, struct cpu_select_env *env, int cpu_cost) { int prev_cpu = env->prev_cpu; if (cpu != prev_cpu && cpus_share_cache(prev_cpu, cpu)) { if (stats->best_sibling_cpu_cost > cpu_cost || (stats->best_sibling_cpu_cost == cpu_cost && stats->best_sibling_cpu_load > env->cpu_load)) { stats->best_sibling_cpu_cost = cpu_cost; stats->best_sibling_cpu_load = env->cpu_load; stats->best_sibling_cpu = cpu; Loading @@ -3169,8 +3228,8 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, } if ((cpu_cost < stats->min_cost) || ((stats->best_cpu != prev_cpu && stats->min_load > env->cpu_load) || cpu == prev_cpu)) { ((stats->best_cpu != prev_cpu && stats->min_load > env->cpu_load) || cpu == prev_cpu)) { if (env->need_idle) { if (idle_cpu(cpu)) { stats->min_cost = cpu_cost; Loading @@ -3183,6 +3242,18 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, } } } #endif static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, struct cpu_select_env *env) { int cpu_cost; cpu_cost = power_cost(cpu, task_load(env->p) + cpu_cravg_sync(cpu, env->sync)); if (cpu_cost <= stats->min_cost) __update_cluster_stats(cpu, stats, env, cpu_cost); } static void find_best_cpu_in_cluster(struct sched_cluster *c, struct cpu_select_env *env, struct cluster_cpu_stats *stats) Loading Loading @@ -3224,6 +3295,9 @@ static inline void init_cluster_cpu_stats(struct cluster_cpu_stats *stats) stats->min_cost = stats->best_sibling_cpu_cost = INT_MAX; stats->min_load = stats->best_sibling_cpu_load = ULLONG_MAX; stats->highest_spare_capacity = 0; stats->least_loaded_cpu = -1; stats->best_cpu_cstate = INT_MAX; /* No need to initialize stats->best_load */ } /* Loading Loading @@ -3390,8 +3464,11 @@ retry: } while ((cluster = next_best_cluster(cluster, &env, &stats))); if (stats.best_idle_cpu >= 0) { if (env.need_idle) { if (stats.best_idle_cpu >= 0) target = stats.best_idle_cpu; else if (stats.least_loaded_cpu >= 0) target = stats.least_loaded_cpu; } else if (stats.best_cpu >= 0) { if (stats.best_cpu != task_cpu(p) && stats.min_cost == stats.best_sibling_cpu_cost) Loading Loading @@ -4076,6 +4153,7 @@ void init_new_task_load(struct task_struct *p) p->init_load_pct = 0; memset(&p->ravg, 0, sizeof(struct ravg)); p->cpu_cycles = 0; p->se.avg.decay_count = 0; rcu_assign_pointer(p->grp, NULL); INIT_LIST_HEAD(&p->grp_list); Loading Loading
include/linux/sched.h +12 −0 Original line number Diff line number Diff line Loading @@ -1330,6 +1330,7 @@ struct task_struct { #endif struct related_thread_group *grp; struct list_head grp_list; u64 cpu_cycles; #endif #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; Loading Loading @@ -2216,6 +2217,8 @@ extern void sched_set_cpu_cstate(int cpu, int cstate, int wakeup_energy, int wakeup_latency); extern void sched_set_cluster_dstate(const cpumask_t *cluster_cpus, int dstate, int wakeup_energy, int wakeup_latency); extern void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax); #ifdef CONFIG_SCHED_QHMP extern int sched_set_cpu_prefer_idle(int cpu, int prefer_idle); extern int sched_get_cpu_prefer_idle(int cpu); Loading @@ -2242,6 +2245,9 @@ static inline void sched_set_cluster_dstate(const cpumask_t *cluster_cpus, int dstate, int wakeup_energy, int wakeup_latency) { } static inline void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax) { } #endif #ifdef CONFIG_NO_HZ_COMMON Loading Loading @@ -3222,4 +3228,10 @@ static inline unsigned long rlimit_max(unsigned int limit) return task_rlimit_max(current, limit); } struct cpu_cycle_counter_cb { u64 (*get_cpu_cycle_counter)(int cpu); u32 (*get_cpu_cycles_max_per_us)(int cpu); }; int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb); #endif
include/trace/events/sched.h +37 −7 Original line number Diff line number Diff line Loading @@ -206,7 +206,6 @@ DECLARE_EVENT_CLASS(sched_cpu_load, __field(unsigned int, capacity ) __field( u64, cumulative_runnable_avg ) __field( u64, irqload ) __field(unsigned int, cur_freq ) __field(unsigned int, max_freq ) __field(unsigned int, power_cost ) __field( int, cstate ) Loading @@ -223,7 +222,6 @@ DECLARE_EVENT_CLASS(sched_cpu_load, __entry->capacity = cpu_capacity(rq->cpu); __entry->cumulative_runnable_avg = rq->hmp_stats.cumulative_runnable_avg; __entry->irqload = irqload; __entry->cur_freq = cpu_cur_freq(rq->cpu); __entry->max_freq = cpu_max_freq(rq->cpu); __entry->power_cost = power_cost; __entry->cstate = rq->cstate; Loading @@ -231,10 +229,10 @@ DECLARE_EVENT_CLASS(sched_cpu_load, __entry->temp = temp; ), TP_printk("cpu %u idle %d nr_run %u nr_big %u lsf %u capacity %u cr_avg %llu irqload %llu fcur %u fmax %u power_cost %u cstate %d dstate %d temp %d", TP_printk("cpu %u idle %d nr_run %u nr_big %u lsf %u capacity %u cr_avg %llu irqload %llu fmax %u power_cost %u cstate %d dstate %d temp %d", __entry->cpu, __entry->idle, __entry->nr_running, __entry->nr_big_tasks, __entry->load_scale_factor, __entry->capacity, __entry->cumulative_runnable_avg, __entry->irqload, __entry->cur_freq, __entry->cumulative_runnable_avg, __entry->irqload, __entry->max_freq, __entry->power_cost, __entry->cstate, __entry->dstate, __entry->temp) ); Loading Loading @@ -274,9 +272,9 @@ TRACE_EVENT(sched_set_boost, TRACE_EVENT(sched_update_task_ravg, TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt, u64 wallclock, u64 irqtime), u64 wallclock, u64 irqtime, u32 cycles, u32 exec_time), TP_ARGS(p, rq, evt, wallclock, irqtime), TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) Loading Loading @@ -312,7 +310,8 @@ TRACE_EVENT(sched_update_task_ravg, __entry->evt = evt; __entry->cpu = rq->cpu; __entry->cur_pid = rq->curr->pid; __entry->cur_freq = cpu_cur_freq(rq->cpu); __entry->cur_freq = cpu_cycles_to_freq(rq->cpu, cycles, exec_time); memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->mark_start = p->ravg.mark_start; Loading Loading @@ -351,6 +350,36 @@ TRACE_EVENT(sched_update_task_ravg, ) ); TRACE_EVENT(sched_get_task_cpu_cycles, TP_PROTO(int cpu, int event, u64 cycles, u32 exec_time), TP_ARGS(cpu, event, cycles, exec_time), TP_STRUCT__entry( __field(int, cpu ) __field(int, event ) __field(u64, cycles ) __field(u64, exec_time ) __field(u32, freq ) __field(u32, legacy_freq ) ), TP_fast_assign( __entry->cpu = cpu; __entry->event = event; __entry->cycles = cycles; __entry->exec_time = exec_time; __entry->freq = cpu_cycles_to_freq(cpu, cycles, exec_time); __entry->legacy_freq = cpu_cur_freq(cpu); ), TP_printk("cpu=%d event=%d cycles=%llu exec_time=%llu freq=%u legacy_freq=%u", __entry->cpu, __entry->event, __entry->cycles, __entry->exec_time, __entry->freq, __entry->legacy_freq) ); TRACE_EVENT(sched_update_history, TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples, Loading Loading @@ -1194,6 +1223,7 @@ TRACE_EVENT(sched_get_nr_running_avg, TP_printk("avg=%d big_avg=%d iowait_avg=%d", __entry->avg, __entry->big_avg, __entry->iowait_avg) ); #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ Loading
init/Kconfig +8 −0 Original line number Diff line number Diff line Loading @@ -1164,6 +1164,14 @@ config SCHED_HMP in their instructions per-cycle capability or the maximum frequency they can attain. config SCHED_HMP_CSTATE_AWARE bool "CPU C-state aware scheduler" depends on SCHED_HMP help This feature will let the HMP scheduler optimize task placement with CPUs C-state. If this is enabled, scheduler places tasks onto the shallowest C-state CPU among the most power efficient CPUs. config SCHED_QHMP bool "QHMP scheduler extensions" depends on SCHED_HMP Loading
kernel/sched/core.c +209 −62 Original line number Diff line number Diff line Loading @@ -835,6 +835,9 @@ void sched_set_cluster_dstate(const cpumask_t *cluster_cpus, int dstate, static ktime_t ktime_last; static bool sched_ktime_suspended; static bool use_cycle_counter; static struct cpu_cycle_counter_cb cpu_cycle_counter_cb; u64 sched_ktime_clock(void) { if (unlikely(sched_ktime_suspended)) Loading Loading @@ -1228,7 +1231,7 @@ capacity_scale_cpu_efficiency(struct sched_cluster *cluster) */ static unsigned long capacity_scale_cpu_freq(struct sched_cluster *cluster) { return (1024 * cluster->max_freq) / min_max_freq; return (1024 * cluster_max_freq(cluster)) / min_max_freq; } /* Loading @@ -1249,7 +1252,8 @@ load_scale_cpu_efficiency(struct sched_cluster *cluster) */ static inline unsigned long load_scale_cpu_freq(struct sched_cluster *cluster) { return DIV_ROUND_UP(1024 * max_possible_freq, cluster->max_freq); return DIV_ROUND_UP(1024 * max_possible_freq, cluster_max_freq(cluster)); } static int compute_capacity(struct sched_cluster *cluster) Loading Loading @@ -1315,8 +1319,10 @@ static struct sched_cluster init_cluster = { .load_scale_factor = 1024, .cur_freq = 1, .max_freq = 1, .max_mitigated_freq = UINT_MAX, .min_freq = 1, .max_possible_freq = 1, .cpu_cycle_max_scale_factor = 1, .dstate = 0, .dstate_wakeup_energy = 0, .dstate_wakeup_latency = 0, Loading Loading @@ -1463,8 +1469,10 @@ static struct sched_cluster *alloc_new_cluster(const struct cpumask *cpus) cluster->load_scale_factor = 1024; cluster->cur_freq = 1; cluster->max_freq = 1; cluster->max_mitigated_freq = UINT_MAX; cluster->min_freq = 1; cluster->max_possible_freq = 1; cluster->cpu_cycle_max_scale_factor = 1; cluster->dstate = 0; cluster->dstate_wakeup_energy = 0; cluster->dstate_wakeup_latency = 0; Loading Loading @@ -1529,6 +1537,44 @@ static void init_clusters(void) INIT_LIST_HEAD(&cluster_head); } static inline void __update_cpu_cycle_max_possible_freq(struct sched_cluster *cluster) { int cpu = cluster_first_cpu(cluster); cluster->cpu_cycle_max_scale_factor = div64_u64(cluster->max_possible_freq * NSEC_PER_USEC, cpu_cycle_counter_cb.get_cpu_cycles_max_per_us(cpu)); } static inline void update_cpu_cycle_max_possible_freq(struct sched_cluster *cluster) { if (!use_cycle_counter) return; __update_cpu_cycle_max_possible_freq(cluster); } int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb) { struct sched_cluster *cluster = NULL; mutex_lock(&cluster_lock); if (!cb->get_cpu_cycle_counter || !cb->get_cpu_cycles_max_per_us) { mutex_unlock(&cluster_lock); return -EINVAL; } cpu_cycle_counter_cb = *cb; for_each_sched_cluster(cluster) __update_cpu_cycle_max_possible_freq(cluster); use_cycle_counter = true; mutex_unlock(&cluster_lock); return 0; } static int __init set_sched_enable_hmp(char *str) { int enable_hmp = 0; Loading Loading @@ -1637,8 +1683,19 @@ static inline void clear_hmp_request(int cpu) { } static inline void update_cluster_topology(void) {} int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb) { return 0; } #endif /* CONFIG_SCHED_HMP */ #define SCHED_MIN_FREQ 1 struct cpu_cycle { u64 cycles; u64 time; }; #if defined(CONFIG_SCHED_HMP) /* Loading Loading @@ -1786,19 +1843,17 @@ update_window_start(struct rq *rq, u64 wallclock) rq->window_start += (u64)nr_windows * (u64)sched_ravg_window; } static inline u64 scale_exec_time(u64 delta, struct rq *rq) #define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y) static inline u64 scale_exec_time(u64 delta, struct rq *rq, const struct cpu_cycle *cc) { int cpu = cpu_of(rq); unsigned int cur_freq = cpu_cur_freq(cpu); int sf; if (unlikely(cur_freq > max_possible_freq)) cur_freq = max_possible_freq; /* round up div64 */ delta = div64_u64(delta * cur_freq + max_possible_freq - 1, max_possible_freq); delta = DIV64_U64_ROUNDUP(delta * cc->cycles * cpu_cycle_max_scale_factor(cpu), max_possible_freq * cc->time); sf = DIV_ROUND_UP(cpu_efficiency(cpu) * 1024, max_possible_efficiency); delta *= sf; Loading Loading @@ -1860,7 +1915,7 @@ static int send_notification(struct rq *rq, int check_pred) u64 prev = rq->old_busy_time; u64 predicted = rq->hmp_stats.pred_demands_sum; if (rq->cluster->cur_freq == rq->cluster->max_freq) if (rq->cluster->cur_freq == cpu_max_freq(cpu_of(rq))) return 0; prev = max(prev, rq->old_estimated_time); Loading Loading @@ -2172,7 +2227,8 @@ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum) */ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) int event, u64 wallclock, u64 irqtime, const struct cpu_cycle *cc) { int new_window, nr_full_windows = 0; int p_is_curr_task = (p == rq->curr); Loading Loading @@ -2262,7 +2318,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, delta = wallclock - mark_start; else delta = irqtime; delta = scale_exec_time(delta, rq); delta = scale_exec_time(delta, rq, cc); rq->curr_runnable_sum += delta; if (new_task) rq->nt_curr_runnable_sum += delta; Loading @@ -2287,14 +2343,15 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (!nr_full_windows) { /* A full window hasn't elapsed, account partial * contribution to previous completed window. */ delta = scale_exec_time(window_start - mark_start, rq); delta = scale_exec_time(window_start - mark_start, rq, cc); if (!exiting_task(p)) p->ravg.prev_window += delta; } else { /* Since at least one full window has elapsed, * the contribution to the previous window is the * full window (window_size). */ delta = scale_exec_time(window_size, rq); delta = scale_exec_time(window_size, rq, cc); if (!exiting_task(p)) p->ravg.prev_window = delta; } Loading @@ -2303,7 +2360,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, rq->nt_prev_runnable_sum += delta; /* Account piece of busy time in the current window. */ delta = scale_exec_time(wallclock - window_start, rq); delta = scale_exec_time(wallclock - window_start, rq, cc); rq->curr_runnable_sum += delta; if (new_task) rq->nt_curr_runnable_sum += delta; Loading @@ -2329,7 +2386,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (!nr_full_windows) { /* A full window hasn't elapsed, account partial * contribution to previous completed window. */ delta = scale_exec_time(window_start - mark_start, rq); delta = scale_exec_time(window_start - mark_start, rq, cc); if (!is_idle_task(p) && !exiting_task(p)) p->ravg.prev_window += delta; Loading @@ -2342,7 +2400,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, /* Since at least one full window has elapsed, * the contribution to the previous window is the * full window (window_size). */ delta = scale_exec_time(window_size, rq); delta = scale_exec_time(window_size, rq, cc); if (!is_idle_task(p) && !exiting_task(p)) p->ravg.prev_window = delta; Loading @@ -2360,7 +2418,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, rq->prev_runnable_sum = delta; /* Account piece of busy time in the current window. */ delta = scale_exec_time(wallclock - window_start, rq); delta = scale_exec_time(wallclock - window_start, rq, cc); rq->curr_runnable_sum = delta; if (new_task) rq->nt_curr_runnable_sum = delta; Loading Loading @@ -2392,7 +2450,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum; rq->nt_curr_runnable_sum = 0; if (mark_start > window_start) { rq->curr_runnable_sum = scale_exec_time(irqtime, rq); rq->curr_runnable_sum = scale_exec_time(irqtime, rq, cc); return; } Loading @@ -2401,12 +2460,12 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, delta = window_start - mark_start; if (delta > window_size) delta = window_size; delta = scale_exec_time(delta, rq); delta = scale_exec_time(delta, rq, cc); rq->prev_runnable_sum += delta; /* Process the remaining IRQ busy time in the current window. */ delta = wallclock - window_start; rq->curr_runnable_sum = scale_exec_time(delta, rq); rq->curr_runnable_sum = scale_exec_time(delta, rq, cc); return; } Loading Loading @@ -2526,7 +2585,7 @@ update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) } static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) int event, u64 wallclock, u64 irqtime, const struct cpu_cycle *cc) { } Loading @@ -2539,6 +2598,41 @@ static inline u32 predict_and_update_buckets(struct rq *rq, #endif /* CONFIG_SCHED_FREQ_INPUT */ static void update_task_cpu_cycles(struct task_struct *p, int cpu) { if (use_cycle_counter) p->cpu_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu); } static struct cpu_cycle get_task_cpu_cycles(struct task_struct *p, struct rq *rq, int event, u64 wallclock) { u64 cur_cycles; struct cpu_cycle cc; int cpu = cpu_of(rq); if (!use_cycle_counter) { cc.cycles = cpu_cur_freq(cpu); cc.time = 1; return cc; } cur_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu); if (unlikely(cur_cycles < p->cpu_cycles)) cc.cycles = cur_cycles + (U64_MAX - p->cpu_cycles); else cc.cycles = cur_cycles - p->cpu_cycles; cc.time = wallclock - p->ravg.mark_start; BUG_ON((s64)cc.time < 0); p->cpu_cycles = cur_cycles; trace_sched_get_task_cpu_cycles(cpu, event, cc.cycles, cc.time); return cc; } static int account_busy_for_task_demand(struct task_struct *p, int event) { /* No need to bother updating task demand for exiting tasks Loading Loading @@ -2625,9 +2719,9 @@ done: } static void add_to_task_demand(struct rq *rq, struct task_struct *p, u64 delta) u64 delta, const struct cpu_cycle *cc) { delta = scale_exec_time(delta, rq); delta = scale_exec_time(delta, rq, cc); p->ravg.sum += delta; if (unlikely(p->ravg.sum > sched_ravg_window)) p->ravg.sum = sched_ravg_window; Loading Loading @@ -2684,7 +2778,8 @@ static void add_to_task_demand(struct rq *rq, struct task_struct *p, * depends on it! */ static void update_task_demand(struct task_struct *p, struct rq *rq, int event, u64 wallclock) int event, u64 wallclock, const struct cpu_cycle *cc) { u64 mark_start = p->ravg.mark_start; u64 delta, window_start = rq->window_start; Loading @@ -2707,7 +2802,7 @@ static void update_task_demand(struct task_struct *p, struct rq *rq, if (!new_window) { /* The simple case - busy time contained within the existing * window. */ add_to_task_demand(rq, p, wallclock - mark_start); add_to_task_demand(rq, p, wallclock - mark_start, cc); return; } Loading @@ -2718,12 +2813,12 @@ static void update_task_demand(struct task_struct *p, struct rq *rq, window_start -= (u64)nr_full_windows * (u64)window_size; /* Process (window_start - mark_start) first */ add_to_task_demand(rq, p, window_start - mark_start); add_to_task_demand(rq, p, window_start - mark_start, cc); /* Push new sample(s) into task's demand history */ update_history(rq, p, p->ravg.sum, 1, event); if (nr_full_windows) update_history(rq, p, scale_exec_time(window_size, rq), update_history(rq, p, scale_exec_time(window_size, rq, cc), nr_full_windows, event); /* Roll window_start back to current to process any remainder Loading @@ -2732,30 +2827,39 @@ static void update_task_demand(struct task_struct *p, struct rq *rq, /* Process (wallclock - window_start) next */ mark_start = window_start; add_to_task_demand(rq, p, wallclock - mark_start); add_to_task_demand(rq, p, wallclock - mark_start, cc); } /* Reflect task activity on its demand and cpu's busy time statistics */ static void update_task_ravg(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) static struct cpu_cycle update_task_ravg(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) { struct cpu_cycle cc = { .cycles = SCHED_MIN_FREQ, .time = 1 }; if (sched_use_pelt || !rq->window_start || sched_disable_window_stats) return; return cc; lockdep_assert_held(&rq->lock); update_window_start(rq, wallclock); if (!p->ravg.mark_start) if (!p->ravg.mark_start) { update_task_cpu_cycles(p, cpu_of(rq)); goto done; } update_task_demand(p, rq, event, wallclock); update_cpu_busy_time(p, rq, event, wallclock, irqtime); cc = get_task_cpu_cycles(p, rq, event, wallclock); update_task_demand(p, rq, event, wallclock, &cc); update_cpu_busy_time(p, rq, event, wallclock, irqtime, &cc); update_task_pred_demand(rq, p, event); done: trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime); trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime, cc.cycles, cc.time); p->ravg.mark_start = wallclock; return cc; } void sched_account_irqtime(int cpu, struct task_struct *curr, Loading Loading @@ -2824,6 +2928,7 @@ static inline void mark_task_starting(struct task_struct *p) p->ravg.mark_start = p->last_wake_ts = wallclock; p->last_cpu_selected_ts = wallclock; p->last_switch_out_ts = 0; update_task_cpu_cycles(p, cpu_of(rq)); } static inline void set_window_start(struct rq *rq) Loading Loading @@ -3041,6 +3146,7 @@ void sched_get_cpus_busy(struct sched_load *busy, int early_detection[cpus]; int cpu, i = 0; unsigned int window_size; struct cpu_cycle cc; if (unlikely(cpus == 0)) return; Loading @@ -3059,8 +3165,10 @@ void sched_get_cpus_busy(struct sched_load *busy, for_each_cpu(cpu, query_cpus) { rq = cpu_rq(cpu); update_task_ravg(rq->curr, rq, TASK_UPDATE, cc = update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_ktime_clock(), 0); cur_freq[i] = cpu_cycles_to_freq(i, cc.cycles, cc.time); load[i] = rq->old_busy_time = rq->prev_runnable_sum; nload[i] = rq->nt_prev_runnable_sum; pload[i] = rq->hmp_stats.pred_demands_sum; Loading @@ -3078,7 +3186,6 @@ void sched_get_cpus_busy(struct sched_load *busy, notifier_sent[i] = rq->notifier_sent; early_detection[i] = (rq->ed_task != NULL); rq->notifier_sent = 0; cur_freq[i] = cpu_cur_freq(cpu); max_freq[i] = cpu_max_freq(cpu); i++; } Loading Loading @@ -3224,6 +3331,8 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu) update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0); update_task_cpu_cycles(p, new_cpu); new_task = is_new_task(p); if (p->ravg.curr_window) { Loading Loading @@ -3532,6 +3641,53 @@ unsigned int sched_get_group_id(struct task_struct *p) return group_id; } static void update_cpu_cluster_capacity(const cpumask_t *cpus) { int i; struct sched_cluster *cluster; struct cpumask cpumask; cpumask_copy(&cpumask, cpus); pre_big_task_count_change(cpu_possible_mask); for_each_cpu(i, &cpumask) { cluster = cpu_rq(i)->cluster; cpumask_andnot(&cpumask, &cpumask, &cluster->cpus); cluster->capacity = compute_capacity(cluster); cluster->load_scale_factor = compute_load_scale_factor(cluster); /* 'cpus' can contain cpumask more than one cluster */ check_for_up_down_migrate_update(&cluster->cpus); } __update_min_max_capacity(); post_big_task_count_change(cpu_possible_mask); } void sched_update_cpu_freq_min_max(const cpumask_t *cpus, u32 fmin, u32 fmax) { struct cpumask cpumask; struct sched_cluster *cluster; unsigned int orig_max_freq; int i, update_capacity = 0; cpumask_copy(&cpumask, cpus); for_each_cpu(i, &cpumask) { cluster = cpu_rq(i)->cluster; cpumask_andnot(&cpumask, &cpumask, &cluster->cpus); orig_max_freq = cpu_max_freq(i); cluster->max_mitigated_freq = fmax; update_capacity += (orig_max_freq != cpu_max_freq(i)); } if (update_capacity) update_cpu_cluster_capacity(cpus); } static int cpufreq_notifier_policy(struct notifier_block *nb, unsigned long val, void *data) { Loading Loading @@ -3562,7 +3718,7 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, cpumask_andnot(&policy_cluster, &policy_cluster, &cluster->cpus); orig_max_freq = cluster->max_freq; orig_max_freq = cpu_max_freq(i); cluster->min_freq = policy->min; cluster->max_freq = policy->max; cluster->cur_freq = policy->cur; Loading @@ -3579,31 +3735,16 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, sort_clusters(); update_all_clusters_stats(); update_cpu_cycle_max_possible_freq(cluster); mutex_unlock(&cluster_lock); continue; } update_capacity += (orig_max_freq != policy->max); } if (!update_capacity) return 0; policy_cluster = *policy->related_cpus; pre_big_task_count_change(cpu_possible_mask); for_each_cpu(i, &policy_cluster) { cluster = cpu_rq(i)->cluster; cpumask_andnot(&policy_cluster, &policy_cluster, &cluster->cpus); cluster->capacity = compute_capacity(cluster); cluster->load_scale_factor = compute_load_scale_factor(cluster); update_capacity += (orig_max_freq != cpu_max_freq(i)); } __update_min_max_capacity(); check_for_up_down_migrate_update(policy->related_cpus); post_big_task_count_change(cpu_possible_mask); if (update_capacity) update_cpu_cluster_capacity(policy->related_cpus); return 0; } Loading Loading @@ -3733,10 +3874,16 @@ heavy_task_wakeup(struct task_struct *p, struct rq *rq, int event) return 0; } static inline void static struct cpu_cycle update_task_ravg(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) { static const struct cpu_cycle cc = { .cycles = SCHED_MIN_FREQ, .time = 1 }; return cc; } static inline void mark_task_starting(struct task_struct *p) {} Loading
kernel/sched/fair.c +91 −13 Original line number Diff line number Diff line Loading @@ -2911,9 +2911,11 @@ struct cpu_select_env { }; struct cluster_cpu_stats { int best_idle_cpu, best_capacity_cpu, best_cpu, best_sibling_cpu; int best_idle_cpu, least_loaded_cpu; int best_capacity_cpu, best_cpu, best_sibling_cpu; int min_cost, best_sibling_cpu_cost; u64 min_load, best_sibling_cpu_load; int best_cpu_cstate; u64 min_load, best_load, best_sibling_cpu_load; s64 highest_spare_capacity; }; Loading Loading @@ -3146,22 +3148,79 @@ next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env, return next; } static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, struct cpu_select_env *env) #ifdef CONFIG_SCHED_HMP_CSTATE_AWARE static void __update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, struct cpu_select_env *env, int cpu_cost) { int cpu_cost; int cpu_cstate; int prev_cpu = env->prev_cpu; cpu_cost = power_cost(cpu, task_load(env->p) + cpu_cravg_sync(cpu, env->sync)); if (cpu_cost > stats->min_cost) cpu_cstate = cpu_rq(cpu)->cstate; if (env->need_idle) { stats->min_cost = cpu_cost; if (idle_cpu(cpu)) { if (cpu_cstate < stats->best_cpu_cstate || (cpu_cstate == stats->best_cpu_cstate && cpu == prev_cpu)) { stats->best_idle_cpu = cpu; stats->best_cpu_cstate = cpu_cstate; } } else { if (env->cpu_load < stats->min_load || (env->cpu_load == stats->min_load && cpu == prev_cpu)) { stats->least_loaded_cpu = cpu; stats->min_load = env->cpu_load; } } return; } if (cpu_cost < stats->min_cost) { stats->min_cost = cpu_cost; stats->best_cpu_cstate = cpu_cstate; stats->best_load = env->cpu_load; stats->best_cpu = cpu; return; } /* CPU cost is the same. Start breaking the tie by C-state */ if (cpu_cstate > stats->best_cpu_cstate) return; if (cpu_cstate < stats->best_cpu_cstate) { stats->best_cpu_cstate = cpu_cstate; stats->best_load = env->cpu_load; stats->best_cpu = cpu; return; } /* C-state is the same. Use prev CPU to break the tie */ if (cpu == prev_cpu) { stats->best_cpu = cpu; return; } if (stats->best_cpu != prev_cpu && ((cpu_cstate == 0 && env->cpu_load < stats->best_load) || (cpu_cstate > 0 && env->cpu_load > stats->best_load))) { stats->best_load = env->cpu_load; stats->best_cpu = cpu; } } #else /* CONFIG_SCHED_HMP_CSTATE_AWARE */ static void __update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, struct cpu_select_env *env, int cpu_cost) { int prev_cpu = env->prev_cpu; if (cpu != prev_cpu && cpus_share_cache(prev_cpu, cpu)) { if (stats->best_sibling_cpu_cost > cpu_cost || (stats->best_sibling_cpu_cost == cpu_cost && stats->best_sibling_cpu_load > env->cpu_load)) { stats->best_sibling_cpu_cost = cpu_cost; stats->best_sibling_cpu_load = env->cpu_load; stats->best_sibling_cpu = cpu; Loading @@ -3169,8 +3228,8 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, } if ((cpu_cost < stats->min_cost) || ((stats->best_cpu != prev_cpu && stats->min_load > env->cpu_load) || cpu == prev_cpu)) { ((stats->best_cpu != prev_cpu && stats->min_load > env->cpu_load) || cpu == prev_cpu)) { if (env->need_idle) { if (idle_cpu(cpu)) { stats->min_cost = cpu_cost; Loading @@ -3183,6 +3242,18 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, } } } #endif static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats, struct cpu_select_env *env) { int cpu_cost; cpu_cost = power_cost(cpu, task_load(env->p) + cpu_cravg_sync(cpu, env->sync)); if (cpu_cost <= stats->min_cost) __update_cluster_stats(cpu, stats, env, cpu_cost); } static void find_best_cpu_in_cluster(struct sched_cluster *c, struct cpu_select_env *env, struct cluster_cpu_stats *stats) Loading Loading @@ -3224,6 +3295,9 @@ static inline void init_cluster_cpu_stats(struct cluster_cpu_stats *stats) stats->min_cost = stats->best_sibling_cpu_cost = INT_MAX; stats->min_load = stats->best_sibling_cpu_load = ULLONG_MAX; stats->highest_spare_capacity = 0; stats->least_loaded_cpu = -1; stats->best_cpu_cstate = INT_MAX; /* No need to initialize stats->best_load */ } /* Loading Loading @@ -3390,8 +3464,11 @@ retry: } while ((cluster = next_best_cluster(cluster, &env, &stats))); if (stats.best_idle_cpu >= 0) { if (env.need_idle) { if (stats.best_idle_cpu >= 0) target = stats.best_idle_cpu; else if (stats.least_loaded_cpu >= 0) target = stats.least_loaded_cpu; } else if (stats.best_cpu >= 0) { if (stats.best_cpu != task_cpu(p) && stats.min_cost == stats.best_sibling_cpu_cost) Loading Loading @@ -4076,6 +4153,7 @@ void init_new_task_load(struct task_struct *p) p->init_load_pct = 0; memset(&p->ravg, 0, sizeof(struct ravg)); p->cpu_cycles = 0; p->se.avg.decay_count = 0; rcu_assign_pointer(p->grp, NULL); INIT_LIST_HEAD(&p->grp_list); Loading