Loading Documentation/scheduler/sched-zone.txt +19 −29 Original line number Diff line number Diff line Loading @@ -1238,35 +1238,6 @@ power mode. It ignores the actual D-state that a cluster may be in and assumes the worst case power cost of the highest D-state. It is means of biasing task placement away from idle clusters when necessary. *** 7.21 sched_lowspill_freq Default value: 0 Appears at /proc/sys/kernel/sched_lowspill_freq This is the first of two tunables designed to govern the load balancer behavior at various frequency levels. This tunable defines the frequency of the little cluster below which the big cluster is not permitted to pull tasks from the little cluster as part of load balance. The idea is that below a certain frequency, a cluster has enough remaining capacity that may not necessitate migration of tasks. This helps in achieving consolidation of workload within the little cluster when needed. *** 7.22 sched_pack_freq Default value: INT_MAX Appears at /proc/sys/kernel/sched_pack_freq This is the second of two tunables designed to govern the load balancer behavior at various frequency levels. This tunable defines the frequency of the little cluster beyond which the little cluster is now allowed to pull tasks from the big cluster as part of load balance. The idea is that above a certain frequency threshold the little cluster may not want to pull additional work from another cluster. This helps in achieving consolidation of workload within the big cluster when needed. ***7.23 sched_early_detection_duration Default value: 9500000 Loading @@ -1278,6 +1249,25 @@ tick for it to be eligible for the scheduler's early detection feature under scheduler boost. For more information on the feature itself please refer to section 5.2.1. *** 7.24 sched_restrict_cluster_spill Default value: 0 Appears at /proc/sys/kernel/sched_restrict_cluster_spill This tunable can be used to restrict tasks spilling to the higher capacity (higher power) cluster. When this tunable is enabled, - Restrict the higher capacity cluster pulling tasks from the lower capacity cluster in the load balance path. The restriction is lifted if all of the CPUS in the lower capacity cluster are above spill. The power cost is used to break the ties if the capacity of clusters are same for applying this restriction. - The current CPU selection algorithm for RT tasks looks for the least loaded CPU across all clusters. When this tunable is enabled, the RT tasks are restricted to the lowest possible power cluster. ========================= 8. HMP SCHEDULER TRACE POINTS ========================= Loading include/linux/sched/sysctl.h +1 −2 Original line number Diff line number Diff line Loading @@ -72,10 +72,9 @@ extern unsigned int sysctl_early_detection_duration; extern unsigned int sysctl_sched_min_runtime; extern unsigned int sysctl_sched_small_task_pct; #else extern unsigned int sysctl_sched_lowspill_freq; extern unsigned int sysctl_sched_pack_freq; extern unsigned int sysctl_sched_select_prev_cpu_us; extern unsigned int sysctl_sched_enable_colocation; extern unsigned int sysctl_sched_restrict_cluster_spill; #if defined(CONFIG_SCHED_FREQ_INPUT) extern unsigned int sysctl_sched_new_task_windows; #endif Loading kernel/sched/core.c +13 −2 Original line number Diff line number Diff line Loading @@ -1301,6 +1301,7 @@ static struct sched_cluster init_cluster = { .list = LIST_HEAD_INIT(init_cluster.list), .id = 0, .max_power_cost = 1, .min_power_cost = 1, .capacity = 1024, .max_possible_capacity = 1024, .efficiency = 1, Loading @@ -1317,7 +1318,7 @@ static struct sched_cluster init_cluster = { void update_all_clusters_stats(void) { struct sched_cluster *cluster; u64 highest_mpc = 0; u64 highest_mpc = 0, lowest_mpc = U64_MAX; pre_big_task_count_change(cpu_possible_mask); Loading @@ -1331,9 +1332,13 @@ void update_all_clusters_stats(void) if (mpc > highest_mpc) highest_mpc = mpc; if (mpc < lowest_mpc) lowest_mpc = mpc; } max_possible_capacity = highest_mpc; min_max_possible_capacity = lowest_mpc; __update_min_max_capacity(); sched_update_freq_max_load(cpu_possible_mask); Loading Loading @@ -1397,9 +1402,12 @@ static void sort_clusters(void) INIT_LIST_HEAD(&new_head); for_each_sched_cluster(cluster) for_each_sched_cluster(cluster) { cluster->max_power_cost = power_cost(cluster_first_cpu(cluster), max_task_load()); cluster->min_power_cost = power_cost(cluster_first_cpu(cluster), 0); } move_list(&new_head, &cluster_head, true); Loading Loading @@ -1441,6 +1449,7 @@ static struct sched_cluster *alloc_new_cluster(const struct cpumask *cpus) INIT_LIST_HEAD(&cluster->list); cluster->max_power_cost = 1; cluster->min_power_cost = 1; cluster->capacity = 1024; cluster->max_possible_capacity = 1024; cluster->efficiency = 1; Loading Loading @@ -1699,6 +1708,8 @@ unsigned int min_max_freq = 1; unsigned int max_capacity = 1024; /* max(rq->capacity) */ unsigned int min_capacity = 1024; /* min(rq->capacity) */ unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */ unsigned int min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */ /* Window size (in ns) */ __read_mostly unsigned int sched_ravg_window = 10000000; Loading kernel/sched/fair.c +97 −32 Original line number Diff line number Diff line Loading @@ -2469,9 +2469,6 @@ unsigned int __read_mostly sysctl_sched_spill_nr_run = 10; */ unsigned int __read_mostly sysctl_sched_enable_power_aware = 0; unsigned int __read_mostly sysctl_sched_lowspill_freq; unsigned int __read_mostly sysctl_sched_pack_freq = UINT_MAX; /* * CPUs with load greater than the sched_spill_load_threshold are not * eligible for task placement. When all CPUs in a cluster achieve a Loading Loading @@ -2541,6 +2538,8 @@ static unsigned int __read_mostly sched_short_sleep_task_threshold = 2000 * NSEC_PER_USEC; unsigned int __read_mostly sysctl_sched_select_prev_cpu_us = 2000; unsigned int __read_mostly sysctl_sched_restrict_cluster_spill; void update_up_down_migrate(void) { unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct); Loading Loading @@ -3064,7 +3063,8 @@ struct cpu_select_env *env, struct cluster_cpu_stats *stats) } struct sched_cluster * next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env) next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env, struct cluster_cpu_stats *stats) { struct sched_cluster *next = NULL; Loading @@ -3078,9 +3078,16 @@ next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env) return NULL; next = next_candidate(env->candidate_list, 0, num_clusters); if (next) if (next) { if (next->min_power_cost > stats->min_cost) { clear_bit(next->id, env->candidate_list); next = NULL; continue; } if (skip_cluster(next, env)) next = NULL; } } while (!next); env->task_load = scale_load_to_cpu(task_load(env->p), Loading Loading @@ -3297,7 +3304,7 @@ retry: do { find_best_cpu_in_cluster(cluster, &env, &stats); } while ((cluster = next_best_cluster(cluster, &env))); } while ((cluster = next_best_cluster(cluster, &env, &stats))); if (stats.best_idle_cpu >= 0) { target = stats.best_idle_cpu; Loading Loading @@ -7716,9 +7723,10 @@ bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds) { int local_cpu, busiest_cpu; int local_capacity, busiest_capacity; unsigned int local_freq, busiest_freq, busiest_max_freq; int local_pwr_cost, busiest_pwr_cost; int nr_cpus; if (sched_boost()) if (!sysctl_sched_restrict_cluster_spill || sched_boost()) return 0; local_cpu = group_first_cpu(sds->local); Loading @@ -7726,21 +7734,24 @@ bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds) local_capacity = cpu_max_possible_capacity(local_cpu); busiest_capacity = cpu_max_possible_capacity(busiest_cpu); local_freq = cpu_cur_freq(local_cpu); busiest_freq = cpu_cur_freq(busiest_cpu); busiest_max_freq = cpu_max_freq(busiest_cpu); if (local_capacity < busiest_capacity) { if (local_freq >= sysctl_sched_pack_freq && busiest_freq < busiest_max_freq) return 1; } else if (local_capacity > busiest_capacity) { if (sds->busiest_stat.sum_nr_big_tasks) local_pwr_cost = cpu_max_power_cost(local_cpu); busiest_pwr_cost = cpu_max_power_cost(busiest_cpu); if (local_capacity < busiest_capacity || (local_capacity == busiest_capacity && local_pwr_cost <= busiest_pwr_cost)) return 0; if (busiest_freq <= sysctl_sched_lowspill_freq) if (local_capacity > busiest_capacity && sds->busiest_stat.sum_nr_big_tasks) return 0; nr_cpus = cpumask_weight(sched_group_cpus(sds->busiest)); if ((sds->busiest_stat.group_cpu_load < nr_cpus * sched_spill_load) && (sds->busiest_stat.sum_nr_running < nr_cpus * sysctl_sched_spill_nr_run)) return 1; } return 0; } Loading Loading @@ -9383,7 +9394,7 @@ static struct { } nohz ____cacheline_aligned; #ifdef CONFIG_SCHED_HMP static inline int find_new_hmp_ilb(void) static inline int find_new_hmp_ilb(int type) { int call_cpu = raw_smp_processor_id(); struct sched_domain *sd; Loading @@ -9395,7 +9406,12 @@ static inline int find_new_hmp_ilb(void) for_each_domain(call_cpu, sd) { for_each_cpu_and(ilb, nohz.idle_cpus_mask, sched_domain_span(sd)) { if (idle_cpu(ilb)) { if (idle_cpu(ilb) && (type != NOHZ_KICK_RESTRICT || (hmp_capable() && cpu_max_possible_capacity(ilb) <= cpu_max_possible_capacity(call_cpu)) || cpu_max_power_cost(ilb) <= cpu_max_power_cost(call_cpu))) { rcu_read_unlock(); reset_balance_interval(ilb); return ilb; Loading @@ -9413,12 +9429,12 @@ static inline int find_new_hmp_ilb(void) } #endif /* CONFIG_SCHED_HMP */ static inline int find_new_ilb(void) static inline int find_new_ilb(int type) { int ilb; if (sched_enable_hmp) return find_new_hmp_ilb(); return find_new_hmp_ilb(type); ilb = cpumask_first(nohz.idle_cpus_mask); Loading @@ -9433,13 +9449,13 @@ static inline int find_new_ilb(void) * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle * CPU (if there is one). */ static void nohz_balancer_kick(void) static void nohz_balancer_kick(int type) { int ilb_cpu; nohz.next_balance++; ilb_cpu = find_new_ilb(); ilb_cpu = find_new_ilb(type); if (ilb_cpu >= nr_cpu_ids) return; Loading Loading @@ -9698,7 +9714,51 @@ end: clear_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)); } static inline int _nohz_kick_needed(struct rq *rq, int cpu) #ifdef CONFIG_SCHED_HMP static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type) { struct sched_domain *sd; int i; if (rq->nr_running < 2) return 0; if (!sysctl_sched_restrict_cluster_spill) return 1; if (hmp_capable() && cpu_max_possible_capacity(cpu) == max_possible_capacity) return 1; rcu_read_lock(); sd = rcu_dereference_check_sched_domain(rq->sd); if (!sd) { rcu_read_unlock(); return 0; } for_each_cpu(i, sched_domain_span(sd)) { if (cpu_load(i) < sched_spill_load && cpu_rq(i)->nr_running < sysctl_sched_spill_nr_run) { /* Change the kick type to limit to CPUs that * are of equal or lower capacity. */ *type = NOHZ_KICK_RESTRICT; break; } } rcu_read_unlock(); return 1; } #else static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type) { return 0; } #endif static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type) { unsigned long now = jiffies; Loading @@ -9709,6 +9769,9 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu) if (likely(!atomic_read(&nohz.nr_cpus))) return 0; if (sched_enable_hmp) return _nohz_kick_needed_hmp(rq, cpu, type); if (time_before(now, nohz.next_balance)) return 0; Loading @@ -9724,7 +9787,7 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu) * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler * domain span are idle. */ static inline int nohz_kick_needed(struct rq *rq) static inline int nohz_kick_needed(struct rq *rq, int *type) { int cpu = rq->cpu; #ifndef CONFIG_SCHED_HMP Loading @@ -9743,7 +9806,7 @@ static inline int nohz_kick_needed(struct rq *rq) set_cpu_sd_state_busy(); nohz_balance_exit_idle(cpu); if (_nohz_kick_needed(rq, cpu)) if (_nohz_kick_needed(rq, cpu, type)) goto need_kick; #ifndef CONFIG_SCHED_HMP Loading Loading @@ -9805,6 +9868,8 @@ static void run_rebalance_domains(struct softirq_action *h) */ void trigger_load_balance(struct rq *rq) { int type = NOHZ_KICK_ANY; /* Don't need to rebalance while attached to NULL domain */ if (unlikely(on_null_domain(rq))) return; Loading @@ -9812,8 +9877,8 @@ void trigger_load_balance(struct rq *rq) if (time_after_eq(jiffies, rq->next_balance)) raise_softirq(SCHED_SOFTIRQ); #ifdef CONFIG_NO_HZ_COMMON if (nohz_kick_needed(rq)) nohz_balancer_kick(); if (nohz_kick_needed(rq, &type)) nohz_balancer_kick(type); #endif } Loading kernel/sched/rt.c +6 −2 Original line number Diff line number Diff line Loading @@ -1655,6 +1655,7 @@ static int find_lowest_rq_hmp(struct task_struct *task) int prev_cpu = task_cpu(task); u64 cpu_load, min_load = ULLONG_MAX; int i; int restrict_cluster = sysctl_sched_restrict_cluster_spill; /* Make sure the mask is initialized first */ if (unlikely(!lowest_mask)) Loading Loading @@ -1682,8 +1683,9 @@ static int find_lowest_rq_hmp(struct task_struct *task) if (sched_cpu_high_irqload(i)) continue; cpu_load = scale_load_to_cpu( cpu_rq(i)->hmp_stats.cumulative_runnable_avg, i); cpu_load = cpu_rq(i)->hmp_stats.cumulative_runnable_avg; if (!restrict_cluster) cpu_load = scale_load_to_cpu(cpu_load, i); if (cpu_load < min_load || (cpu_load == min_load && Loading @@ -1693,6 +1695,8 @@ static int find_lowest_rq_hmp(struct task_struct *task) best_cpu = i; } } if (restrict_cluster && best_cpu != -1) break; } return best_cpu; Loading Loading
Documentation/scheduler/sched-zone.txt +19 −29 Original line number Diff line number Diff line Loading @@ -1238,35 +1238,6 @@ power mode. It ignores the actual D-state that a cluster may be in and assumes the worst case power cost of the highest D-state. It is means of biasing task placement away from idle clusters when necessary. *** 7.21 sched_lowspill_freq Default value: 0 Appears at /proc/sys/kernel/sched_lowspill_freq This is the first of two tunables designed to govern the load balancer behavior at various frequency levels. This tunable defines the frequency of the little cluster below which the big cluster is not permitted to pull tasks from the little cluster as part of load balance. The idea is that below a certain frequency, a cluster has enough remaining capacity that may not necessitate migration of tasks. This helps in achieving consolidation of workload within the little cluster when needed. *** 7.22 sched_pack_freq Default value: INT_MAX Appears at /proc/sys/kernel/sched_pack_freq This is the second of two tunables designed to govern the load balancer behavior at various frequency levels. This tunable defines the frequency of the little cluster beyond which the little cluster is now allowed to pull tasks from the big cluster as part of load balance. The idea is that above a certain frequency threshold the little cluster may not want to pull additional work from another cluster. This helps in achieving consolidation of workload within the big cluster when needed. ***7.23 sched_early_detection_duration Default value: 9500000 Loading @@ -1278,6 +1249,25 @@ tick for it to be eligible for the scheduler's early detection feature under scheduler boost. For more information on the feature itself please refer to section 5.2.1. *** 7.24 sched_restrict_cluster_spill Default value: 0 Appears at /proc/sys/kernel/sched_restrict_cluster_spill This tunable can be used to restrict tasks spilling to the higher capacity (higher power) cluster. When this tunable is enabled, - Restrict the higher capacity cluster pulling tasks from the lower capacity cluster in the load balance path. The restriction is lifted if all of the CPUS in the lower capacity cluster are above spill. The power cost is used to break the ties if the capacity of clusters are same for applying this restriction. - The current CPU selection algorithm for RT tasks looks for the least loaded CPU across all clusters. When this tunable is enabled, the RT tasks are restricted to the lowest possible power cluster. ========================= 8. HMP SCHEDULER TRACE POINTS ========================= Loading
include/linux/sched/sysctl.h +1 −2 Original line number Diff line number Diff line Loading @@ -72,10 +72,9 @@ extern unsigned int sysctl_early_detection_duration; extern unsigned int sysctl_sched_min_runtime; extern unsigned int sysctl_sched_small_task_pct; #else extern unsigned int sysctl_sched_lowspill_freq; extern unsigned int sysctl_sched_pack_freq; extern unsigned int sysctl_sched_select_prev_cpu_us; extern unsigned int sysctl_sched_enable_colocation; extern unsigned int sysctl_sched_restrict_cluster_spill; #if defined(CONFIG_SCHED_FREQ_INPUT) extern unsigned int sysctl_sched_new_task_windows; #endif Loading
kernel/sched/core.c +13 −2 Original line number Diff line number Diff line Loading @@ -1301,6 +1301,7 @@ static struct sched_cluster init_cluster = { .list = LIST_HEAD_INIT(init_cluster.list), .id = 0, .max_power_cost = 1, .min_power_cost = 1, .capacity = 1024, .max_possible_capacity = 1024, .efficiency = 1, Loading @@ -1317,7 +1318,7 @@ static struct sched_cluster init_cluster = { void update_all_clusters_stats(void) { struct sched_cluster *cluster; u64 highest_mpc = 0; u64 highest_mpc = 0, lowest_mpc = U64_MAX; pre_big_task_count_change(cpu_possible_mask); Loading @@ -1331,9 +1332,13 @@ void update_all_clusters_stats(void) if (mpc > highest_mpc) highest_mpc = mpc; if (mpc < lowest_mpc) lowest_mpc = mpc; } max_possible_capacity = highest_mpc; min_max_possible_capacity = lowest_mpc; __update_min_max_capacity(); sched_update_freq_max_load(cpu_possible_mask); Loading Loading @@ -1397,9 +1402,12 @@ static void sort_clusters(void) INIT_LIST_HEAD(&new_head); for_each_sched_cluster(cluster) for_each_sched_cluster(cluster) { cluster->max_power_cost = power_cost(cluster_first_cpu(cluster), max_task_load()); cluster->min_power_cost = power_cost(cluster_first_cpu(cluster), 0); } move_list(&new_head, &cluster_head, true); Loading Loading @@ -1441,6 +1449,7 @@ static struct sched_cluster *alloc_new_cluster(const struct cpumask *cpus) INIT_LIST_HEAD(&cluster->list); cluster->max_power_cost = 1; cluster->min_power_cost = 1; cluster->capacity = 1024; cluster->max_possible_capacity = 1024; cluster->efficiency = 1; Loading Loading @@ -1699,6 +1708,8 @@ unsigned int min_max_freq = 1; unsigned int max_capacity = 1024; /* max(rq->capacity) */ unsigned int min_capacity = 1024; /* min(rq->capacity) */ unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */ unsigned int min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */ /* Window size (in ns) */ __read_mostly unsigned int sched_ravg_window = 10000000; Loading
kernel/sched/fair.c +97 −32 Original line number Diff line number Diff line Loading @@ -2469,9 +2469,6 @@ unsigned int __read_mostly sysctl_sched_spill_nr_run = 10; */ unsigned int __read_mostly sysctl_sched_enable_power_aware = 0; unsigned int __read_mostly sysctl_sched_lowspill_freq; unsigned int __read_mostly sysctl_sched_pack_freq = UINT_MAX; /* * CPUs with load greater than the sched_spill_load_threshold are not * eligible for task placement. When all CPUs in a cluster achieve a Loading Loading @@ -2541,6 +2538,8 @@ static unsigned int __read_mostly sched_short_sleep_task_threshold = 2000 * NSEC_PER_USEC; unsigned int __read_mostly sysctl_sched_select_prev_cpu_us = 2000; unsigned int __read_mostly sysctl_sched_restrict_cluster_spill; void update_up_down_migrate(void) { unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct); Loading Loading @@ -3064,7 +3063,8 @@ struct cpu_select_env *env, struct cluster_cpu_stats *stats) } struct sched_cluster * next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env) next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env, struct cluster_cpu_stats *stats) { struct sched_cluster *next = NULL; Loading @@ -3078,9 +3078,16 @@ next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env) return NULL; next = next_candidate(env->candidate_list, 0, num_clusters); if (next) if (next) { if (next->min_power_cost > stats->min_cost) { clear_bit(next->id, env->candidate_list); next = NULL; continue; } if (skip_cluster(next, env)) next = NULL; } } while (!next); env->task_load = scale_load_to_cpu(task_load(env->p), Loading Loading @@ -3297,7 +3304,7 @@ retry: do { find_best_cpu_in_cluster(cluster, &env, &stats); } while ((cluster = next_best_cluster(cluster, &env))); } while ((cluster = next_best_cluster(cluster, &env, &stats))); if (stats.best_idle_cpu >= 0) { target = stats.best_idle_cpu; Loading Loading @@ -7716,9 +7723,10 @@ bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds) { int local_cpu, busiest_cpu; int local_capacity, busiest_capacity; unsigned int local_freq, busiest_freq, busiest_max_freq; int local_pwr_cost, busiest_pwr_cost; int nr_cpus; if (sched_boost()) if (!sysctl_sched_restrict_cluster_spill || sched_boost()) return 0; local_cpu = group_first_cpu(sds->local); Loading @@ -7726,21 +7734,24 @@ bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds) local_capacity = cpu_max_possible_capacity(local_cpu); busiest_capacity = cpu_max_possible_capacity(busiest_cpu); local_freq = cpu_cur_freq(local_cpu); busiest_freq = cpu_cur_freq(busiest_cpu); busiest_max_freq = cpu_max_freq(busiest_cpu); if (local_capacity < busiest_capacity) { if (local_freq >= sysctl_sched_pack_freq && busiest_freq < busiest_max_freq) return 1; } else if (local_capacity > busiest_capacity) { if (sds->busiest_stat.sum_nr_big_tasks) local_pwr_cost = cpu_max_power_cost(local_cpu); busiest_pwr_cost = cpu_max_power_cost(busiest_cpu); if (local_capacity < busiest_capacity || (local_capacity == busiest_capacity && local_pwr_cost <= busiest_pwr_cost)) return 0; if (busiest_freq <= sysctl_sched_lowspill_freq) if (local_capacity > busiest_capacity && sds->busiest_stat.sum_nr_big_tasks) return 0; nr_cpus = cpumask_weight(sched_group_cpus(sds->busiest)); if ((sds->busiest_stat.group_cpu_load < nr_cpus * sched_spill_load) && (sds->busiest_stat.sum_nr_running < nr_cpus * sysctl_sched_spill_nr_run)) return 1; } return 0; } Loading Loading @@ -9383,7 +9394,7 @@ static struct { } nohz ____cacheline_aligned; #ifdef CONFIG_SCHED_HMP static inline int find_new_hmp_ilb(void) static inline int find_new_hmp_ilb(int type) { int call_cpu = raw_smp_processor_id(); struct sched_domain *sd; Loading @@ -9395,7 +9406,12 @@ static inline int find_new_hmp_ilb(void) for_each_domain(call_cpu, sd) { for_each_cpu_and(ilb, nohz.idle_cpus_mask, sched_domain_span(sd)) { if (idle_cpu(ilb)) { if (idle_cpu(ilb) && (type != NOHZ_KICK_RESTRICT || (hmp_capable() && cpu_max_possible_capacity(ilb) <= cpu_max_possible_capacity(call_cpu)) || cpu_max_power_cost(ilb) <= cpu_max_power_cost(call_cpu))) { rcu_read_unlock(); reset_balance_interval(ilb); return ilb; Loading @@ -9413,12 +9429,12 @@ static inline int find_new_hmp_ilb(void) } #endif /* CONFIG_SCHED_HMP */ static inline int find_new_ilb(void) static inline int find_new_ilb(int type) { int ilb; if (sched_enable_hmp) return find_new_hmp_ilb(); return find_new_hmp_ilb(type); ilb = cpumask_first(nohz.idle_cpus_mask); Loading @@ -9433,13 +9449,13 @@ static inline int find_new_ilb(void) * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle * CPU (if there is one). */ static void nohz_balancer_kick(void) static void nohz_balancer_kick(int type) { int ilb_cpu; nohz.next_balance++; ilb_cpu = find_new_ilb(); ilb_cpu = find_new_ilb(type); if (ilb_cpu >= nr_cpu_ids) return; Loading Loading @@ -9698,7 +9714,51 @@ end: clear_bit(NOHZ_BALANCE_KICK, nohz_flags(this_cpu)); } static inline int _nohz_kick_needed(struct rq *rq, int cpu) #ifdef CONFIG_SCHED_HMP static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type) { struct sched_domain *sd; int i; if (rq->nr_running < 2) return 0; if (!sysctl_sched_restrict_cluster_spill) return 1; if (hmp_capable() && cpu_max_possible_capacity(cpu) == max_possible_capacity) return 1; rcu_read_lock(); sd = rcu_dereference_check_sched_domain(rq->sd); if (!sd) { rcu_read_unlock(); return 0; } for_each_cpu(i, sched_domain_span(sd)) { if (cpu_load(i) < sched_spill_load && cpu_rq(i)->nr_running < sysctl_sched_spill_nr_run) { /* Change the kick type to limit to CPUs that * are of equal or lower capacity. */ *type = NOHZ_KICK_RESTRICT; break; } } rcu_read_unlock(); return 1; } #else static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type) { return 0; } #endif static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type) { unsigned long now = jiffies; Loading @@ -9709,6 +9769,9 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu) if (likely(!atomic_read(&nohz.nr_cpus))) return 0; if (sched_enable_hmp) return _nohz_kick_needed_hmp(rq, cpu, type); if (time_before(now, nohz.next_balance)) return 0; Loading @@ -9724,7 +9787,7 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu) * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler * domain span are idle. */ static inline int nohz_kick_needed(struct rq *rq) static inline int nohz_kick_needed(struct rq *rq, int *type) { int cpu = rq->cpu; #ifndef CONFIG_SCHED_HMP Loading @@ -9743,7 +9806,7 @@ static inline int nohz_kick_needed(struct rq *rq) set_cpu_sd_state_busy(); nohz_balance_exit_idle(cpu); if (_nohz_kick_needed(rq, cpu)) if (_nohz_kick_needed(rq, cpu, type)) goto need_kick; #ifndef CONFIG_SCHED_HMP Loading Loading @@ -9805,6 +9868,8 @@ static void run_rebalance_domains(struct softirq_action *h) */ void trigger_load_balance(struct rq *rq) { int type = NOHZ_KICK_ANY; /* Don't need to rebalance while attached to NULL domain */ if (unlikely(on_null_domain(rq))) return; Loading @@ -9812,8 +9877,8 @@ void trigger_load_balance(struct rq *rq) if (time_after_eq(jiffies, rq->next_balance)) raise_softirq(SCHED_SOFTIRQ); #ifdef CONFIG_NO_HZ_COMMON if (nohz_kick_needed(rq)) nohz_balancer_kick(); if (nohz_kick_needed(rq, &type)) nohz_balancer_kick(type); #endif } Loading
kernel/sched/rt.c +6 −2 Original line number Diff line number Diff line Loading @@ -1655,6 +1655,7 @@ static int find_lowest_rq_hmp(struct task_struct *task) int prev_cpu = task_cpu(task); u64 cpu_load, min_load = ULLONG_MAX; int i; int restrict_cluster = sysctl_sched_restrict_cluster_spill; /* Make sure the mask is initialized first */ if (unlikely(!lowest_mask)) Loading Loading @@ -1682,8 +1683,9 @@ static int find_lowest_rq_hmp(struct task_struct *task) if (sched_cpu_high_irqload(i)) continue; cpu_load = scale_load_to_cpu( cpu_rq(i)->hmp_stats.cumulative_runnable_avg, i); cpu_load = cpu_rq(i)->hmp_stats.cumulative_runnable_avg; if (!restrict_cluster) cpu_load = scale_load_to_cpu(cpu_load, i); if (cpu_load < min_load || (cpu_load == min_load && Loading @@ -1693,6 +1695,8 @@ static int find_lowest_rq_hmp(struct task_struct *task) best_cpu = i; } } if (restrict_cluster && best_cpu != -1) break; } return best_cpu; Loading