Loading include/linux/sched/sysctl.h +1 −0 Original line number Diff line number Diff line Loading @@ -53,6 +53,7 @@ extern unsigned int __weak sysctl_sched_window_stats_policy; extern unsigned int __weak sysctl_sched_ravg_window_nr_ticks; extern unsigned int __weak sysctl_sched_many_wakeup_threshold; extern unsigned int __weak sysctl_sched_dynamic_ravg_window_enable; extern unsigned int sysctl_sched_prefer_spread; extern int walt_proc_group_thresholds_handler(struct ctl_table *table, int write, Loading include/trace/events/sched.h +8 −6 Original line number Diff line number Diff line Loading @@ -276,11 +276,11 @@ TRACE_EVENT(sched_load_balance, unsigned long group_mask, int busiest_nr_running, unsigned long imbalance, unsigned int env_flags, int ld_moved, unsigned int balance_interval, int active_balance, int overutilized), int overutilized, int prefer_spread), TP_ARGS(cpu, idle, balance, group_mask, busiest_nr_running, imbalance, env_flags, ld_moved, balance_interval, active_balance, overutilized), active_balance, overutilized, prefer_spread), TP_STRUCT__entry( __field(int, cpu) Loading @@ -294,6 +294,7 @@ TRACE_EVENT(sched_load_balance, __field(unsigned int, balance_interval) __field(int, active_balance) __field(int, overutilized) __field(int, prefer_spread) ), TP_fast_assign( Loading @@ -308,9 +309,10 @@ TRACE_EVENT(sched_load_balance, __entry->balance_interval = balance_interval; __entry->active_balance = active_balance; __entry->overutilized = overutilized; __entry->prefer_spread = prefer_spread; ), TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d active_balance=%d sd_overutilized=%d", TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d active_balance=%d sd_overutilized=%d prefer_spread=%d", __entry->cpu, __entry->idle == CPU_IDLE ? "idle" : (__entry->idle == CPU_NEWLY_IDLE ? "newly_idle" : "busy"), Loading @@ -318,7 +320,7 @@ TRACE_EVENT(sched_load_balance, __entry->group_mask, __entry->busiest_nr_running, __entry->imbalance, __entry->env_flags, __entry->ld_moved, __entry->balance_interval, __entry->active_balance, __entry->overutilized) __entry->overutilized, __entry->prefer_spread) ); TRACE_EVENT(sched_load_balance_nohz_kick, Loading Loading @@ -994,7 +996,7 @@ TRACE_EVENT(sched_compute_energy, TRACE_EVENT(sched_task_util, TP_PROTO(struct task_struct *p, unsigned long candidates, int best_energy_cpu, bool sync, bool need_idle, int fastpath, int best_energy_cpu, bool sync, int need_idle, int fastpath, bool placement_boost, u64 start_t, bool stune_boosted, bool is_rtg, bool rtg_skip_min, int start_cpu), Loading @@ -1011,7 +1013,7 @@ TRACE_EVENT(sched_task_util, __field(int, prev_cpu) __field(int, best_energy_cpu) __field(bool, sync) __field(bool, need_idle) __field(int, need_idle) __field(int, fastpath) __field(int, placement_boost) __field(int, rtg_cpu) Loading kernel/sched/core.c +10 −3 Original line number Diff line number Diff line Loading @@ -5627,6 +5627,7 @@ unsigned int sched_lib_mask_force; bool is_sched_lib_based_app(pid_t pid) { const char *name = NULL; char *lib_list, *libname; struct vm_area_struct *vma; char path_buf[LIB_PATH_LENGTH]; bool found = false; Loading @@ -5636,11 +5637,14 @@ bool is_sched_lib_based_app(pid_t pid) if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0) return false; lib_list = kstrdup(sched_lib_name, GFP_KERNEL); rcu_read_lock(); p = find_process_by_pid(pid); if (!p) { rcu_read_unlock(); kfree(lib_list); return false; } Loading @@ -5660,19 +5664,22 @@ bool is_sched_lib_based_app(pid_t pid) if (IS_ERR(name)) goto release_sem; if (strnstr(name, sched_lib_name, while ((libname = strsep(&lib_list, ","))) { if (strnstr(name, libname, strnlen(name, LIB_PATH_LENGTH))) { found = true; break; } } } } release_sem: up_read(&mm->mmap_sem); mmput(mm); put_task_struct: put_task_struct(p); kfree(lib_list); return found; } Loading kernel/sched/fair.c +116 −26 Original line number Diff line number Diff line Loading @@ -131,6 +131,9 @@ unsigned int sched_capacity_margin_up[NR_CPUS] = { unsigned int sched_capacity_margin_down[NR_CPUS] = { [0 ... NR_CPUS-1] = 1205}; /* ~15% margin */ #ifdef CONFIG_SCHED_WALT __read_mostly unsigned int sysctl_sched_prefer_spread; #endif unsigned int sched_small_task_threshold = 102; static inline void update_load_add(struct load_weight *lw, unsigned long inc) Loading Loading @@ -3877,16 +3880,31 @@ static inline bool task_demand_fits(struct task_struct *p, int cpu) } struct find_best_target_env { bool is_rtg; int placement_boost; bool need_idle; bool boosted; int need_idle; int fastpath; int start_cpu; bool strict_max; int skip_cpu; bool is_rtg; bool boosted; bool strict_max; }; static inline bool prefer_spread_on_idle(int cpu) { #ifdef CONFIG_SCHED_WALT if (likely(!sysctl_sched_prefer_spread)) return false; if (is_min_capacity_cpu(cpu)) return sysctl_sched_prefer_spread >= 1; return sysctl_sched_prefer_spread > 1; #else return false; #endif } static inline void adjust_cpus_for_packing(struct task_struct *p, int *target_cpu, int *best_idle_cpu, int shallowest_idle_cstate, Loading @@ -3898,7 +3916,10 @@ static inline void adjust_cpus_for_packing(struct task_struct *p, if (*best_idle_cpu == -1 || *target_cpu == -1) return; if (task_placement_boost_enabled(p) || fbt_env->need_idle || boosted || if (prefer_spread_on_idle(*best_idle_cpu)) fbt_env->need_idle |= 2; if (fbt_env->need_idle || task_placement_boost_enabled(p) || boosted || shallowest_idle_cstate <= 0) { *target_cpu = -1; return; Loading Loading @@ -7024,6 +7045,7 @@ int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, curr_is_rtg = task_in_related_thread_group(cpu_rq(cpu)->curr); fbt_env.fastpath = 0; fbt_env.need_idle = need_idle; if (trace_sched_task_util_enabled()) start_t = sched_clock(); Loading Loading @@ -7070,7 +7092,6 @@ int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, if (sched_feat(FIND_BEST_TARGET)) { fbt_env.is_rtg = is_rtg; fbt_env.placement_boost = placement_boost; fbt_env.need_idle = need_idle; fbt_env.start_cpu = start_cpu; fbt_env.boosted = boosted; fbt_env.strict_max = is_rtg && Loading @@ -7096,8 +7117,8 @@ int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, if (p->state == TASK_WAKING) delta = task_util(p); #endif if (task_placement_boost_enabled(p) || need_idle || boosted || is_rtg || __cpu_overutilized(prev_cpu, delta) || if (task_placement_boost_enabled(p) || fbt_env.need_idle || boosted || is_rtg || __cpu_overutilized(prev_cpu, delta) || !task_fits_max(p, prev_cpu) || cpu_isolated(prev_cpu)) { best_energy_cpu = cpu; goto unlock; Loading Loading @@ -7231,8 +7252,9 @@ int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, done: trace_sched_task_util(p, cpumask_bits(candidates)[0], best_energy_cpu, sync, need_idle, fbt_env.fastpath, placement_boost, start_t, boosted, is_rtg, get_rtg_status(p), start_cpu); sync, fbt_env.need_idle, fbt_env.fastpath, placement_boost, start_t, boosted, is_rtg, get_rtg_status(p), start_cpu); return best_energy_cpu; Loading Loading @@ -7946,6 +7968,7 @@ struct lb_env { unsigned int loop; unsigned int loop_break; unsigned int loop_max; bool prefer_spread; enum fbq_type fbq_type; enum group_type src_grp_type; Loading Loading @@ -8119,7 +8142,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) struct root_domain *rd = env->dst_rq->rd; if ((rcu_dereference(rd->pd) && !sd_overutilized(env->sd)) && env->idle == CPU_NEWLY_IDLE && env->idle == CPU_NEWLY_IDLE && !env->prefer_spread && !task_in_related_thread_group(p)) { long util_cum_dst, util_cum_src; unsigned long demand; Loading Loading @@ -8282,7 +8305,21 @@ static int detach_tasks(struct lb_env *env) if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed) goto next; if ((load / 2) > env->imbalance) /* * p is not running task when we goes until here, so if p is one * of the 2 task in src cpu rq and not the running one, * that means it is the only task that can be balanced. * So only when there is other tasks can be balanced or * there is situation to ignore big task, it is needed * to skip the task load bigger than 2*imbalance. * * And load based checks are skipped for prefer_spread in * finding busiest group, ignore the task's h_load. */ if (!env->prefer_spread && ((cpu_rq(env->src_cpu)->nr_running > 2) || (env->flags & LBF_IGNORE_BIG_TASKS)) && ((load / 2) > env->imbalance)) goto next; detach_task(p, env); Loading Loading @@ -9082,6 +9119,11 @@ static bool update_sd_pick_busiest(struct lb_env *env, if (sgs->group_type < busiest->group_type) return false; if (env->prefer_spread && env->idle != CPU_NOT_IDLE && (sgs->sum_nr_running > busiest->sum_nr_running) && (sgs->group_util > busiest->group_util)) return true; if (sgs->avg_load <= busiest->avg_load) return false; Loading Loading @@ -9115,6 +9157,11 @@ static bool update_sd_pick_busiest(struct lb_env *env, return false; asym_packing: if (env->prefer_spread && (sgs->sum_nr_running < busiest->sum_nr_running)) return false; /* This is the busiest node in its class. */ if (!(env->sd->flags & SD_ASYM_PACKING)) return true; Loading Loading @@ -9595,6 +9642,15 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s return fix_small_imbalance(env, sds); } /* * If we couldn't find any imbalance, then boost the imbalance * with the group util. */ if (env->prefer_spread && !env->imbalance && env->idle != CPU_NOT_IDLE && busiest->sum_nr_running > busiest->group_weight) env->imbalance = busiest->group_util; } /******* find_busiest_group() helpers end here *********************/ Loading Loading @@ -9988,6 +10044,15 @@ static int load_balance(int this_cpu, struct rq *this_rq, .loop = 0, }; #ifdef CONFIG_SCHED_WALT env.prefer_spread = (prefer_spread_on_idle(this_cpu) && !((sd->flags & SD_ASYM_CPUCAPACITY) && !cpumask_test_cpu(this_cpu, &asym_cap_sibling_cpus))); #else env.prefer_spread = false; #endif cpumask_and(cpus, sched_domain_span(sd), cpu_active_mask); schedstat_inc(sd->lb_count[idle]); Loading Loading @@ -10277,10 +10342,11 @@ static int load_balance(int this_cpu, struct rq *this_rq, env.imbalance, env.flags, ld_moved, sd->balance_interval, active_balance, #ifdef CONFIG_SCHED_WALT sd_overutilized(sd)); sd_overutilized(sd), #else READ_ONCE(this_rq->rd->overutilized)); READ_ONCE(this_rq->rd->overutilized), #endif env.prefer_spread); return ld_moved; } Loading Loading @@ -10525,7 +10591,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) max_cost += sd->max_newidle_lb_cost; #ifdef CONFIG_SCHED_WALT if (!sd_overutilized(sd)) if (!sd_overutilized(sd) && !prefer_spread_on_idle(cpu)) continue; #endif Loading Loading @@ -10615,7 +10681,7 @@ static inline int find_energy_aware_new_ilb(void) int ilb = nr_cpu_ids; struct sched_domain *sd; int cpu = raw_smp_processor_id(); cpumask_t avail_cpus, tmp_cpus; cpumask_t idle_cpus, tmp_cpus; struct sched_group *sg; unsigned long ref_cap = capacity_orig_of(cpu); unsigned long best_cap = 0, best_cap_cpu = -1; Loading @@ -10625,16 +10691,16 @@ static inline int find_energy_aware_new_ilb(void) if (!sd) goto out; cpumask_and(&avail_cpus, nohz.idle_cpus_mask, cpumask_and(&idle_cpus, nohz.idle_cpus_mask, housekeeping_cpumask(HK_FLAG_MISC)); cpumask_andnot(&avail_cpus, &avail_cpus, cpu_isolated_mask); cpumask_andnot(&idle_cpus, &idle_cpus, cpu_isolated_mask); sg = sd->groups; do { int i; unsigned long cap; cpumask_and(&tmp_cpus, &avail_cpus, sched_group_span(sg)); cpumask_and(&tmp_cpus, &idle_cpus, sched_group_span(sg)); i = cpumask_first(&tmp_cpus); /* This sg did not have any idle CPUs */ Loading @@ -10649,7 +10715,18 @@ static inline int find_energy_aware_new_ilb(void) break; } /* The back up CPU is selected from the best capacity CPUs */ /* * When there are no idle CPUs in the same capacity group, * we find the next best capacity CPU. */ if (best_cap > ref_cap) { if (cap > ref_cap && cap < best_cap) { best_cap = cap; best_cap_cpu = i; } continue; } if (cap > best_cap) { best_cap = cap; best_cap_cpu = i; Loading Loading @@ -10761,7 +10838,8 @@ static void nohz_balancer_kick(struct rq *rq) * happens from the tickpath. */ if (sched_energy_enabled()) { if (rq->nr_running >= 2 && cpu_overutilized(cpu)) if (rq->nr_running >= 2 && (cpu_overutilized(cpu) || prefer_spread_on_idle(cpu))) flags = NOHZ_KICK_MASK; goto out; } Loading Loading @@ -11166,6 +11244,11 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) int pulled_task = 0; u64 curr_cost = 0; u64 avg_idle = this_rq->avg_idle; bool prefer_spread = prefer_spread_on_idle(this_cpu); bool force_lb = (!is_min_capacity_cpu(this_cpu) && silver_has_big_tasks() && (atomic_read(&this_rq->nr_iowait) == 0)); if (cpu_isolated(this_cpu)) return 0; Loading @@ -11182,8 +11265,8 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) */ if (!cpu_active(this_cpu)) return 0; if (!is_min_capacity_cpu(this_cpu) && silver_has_big_tasks() && (atomic_read(&this_rq->nr_iowait) == 0)) if (force_lb || prefer_spread) avg_idle = ULLONG_MAX; /* * This is OK, because current is on_cpu, which avoids it being picked Loading Loading @@ -11218,6 +11301,13 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) if (!(sd->flags & SD_LOAD_BALANCE)) continue; #ifdef CONFIG_SCHED_WALT if (prefer_spread && !force_lb && (sd->flags & SD_ASYM_CPUCAPACITY) && !(cpumask_test_cpu(this_cpu, &asym_cap_sibling_cpus))) avg_idle = this_rq->avg_idle; #endif if (avg_idle < curr_cost + sd->max_newidle_lb_cost) { update_next_balance(sd, &next_balance); break; Loading kernel/sysctl.c +9 −0 Original line number Diff line number Diff line Loading @@ -583,6 +583,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sched_updown_migrate_handler, }, { .procname = "sched_prefer_spread", .data = &sysctl_sched_prefer_spread, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, }, #endif #ifdef CONFIG_SCHED_DEBUG { Loading Loading
include/linux/sched/sysctl.h +1 −0 Original line number Diff line number Diff line Loading @@ -53,6 +53,7 @@ extern unsigned int __weak sysctl_sched_window_stats_policy; extern unsigned int __weak sysctl_sched_ravg_window_nr_ticks; extern unsigned int __weak sysctl_sched_many_wakeup_threshold; extern unsigned int __weak sysctl_sched_dynamic_ravg_window_enable; extern unsigned int sysctl_sched_prefer_spread; extern int walt_proc_group_thresholds_handler(struct ctl_table *table, int write, Loading
include/trace/events/sched.h +8 −6 Original line number Diff line number Diff line Loading @@ -276,11 +276,11 @@ TRACE_EVENT(sched_load_balance, unsigned long group_mask, int busiest_nr_running, unsigned long imbalance, unsigned int env_flags, int ld_moved, unsigned int balance_interval, int active_balance, int overutilized), int overutilized, int prefer_spread), TP_ARGS(cpu, idle, balance, group_mask, busiest_nr_running, imbalance, env_flags, ld_moved, balance_interval, active_balance, overutilized), active_balance, overutilized, prefer_spread), TP_STRUCT__entry( __field(int, cpu) Loading @@ -294,6 +294,7 @@ TRACE_EVENT(sched_load_balance, __field(unsigned int, balance_interval) __field(int, active_balance) __field(int, overutilized) __field(int, prefer_spread) ), TP_fast_assign( Loading @@ -308,9 +309,10 @@ TRACE_EVENT(sched_load_balance, __entry->balance_interval = balance_interval; __entry->active_balance = active_balance; __entry->overutilized = overutilized; __entry->prefer_spread = prefer_spread; ), TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d active_balance=%d sd_overutilized=%d", TP_printk("cpu=%d state=%s balance=%d group=%#lx busy_nr=%d imbalance=%ld flags=%#x ld_moved=%d bal_int=%d active_balance=%d sd_overutilized=%d prefer_spread=%d", __entry->cpu, __entry->idle == CPU_IDLE ? "idle" : (__entry->idle == CPU_NEWLY_IDLE ? "newly_idle" : "busy"), Loading @@ -318,7 +320,7 @@ TRACE_EVENT(sched_load_balance, __entry->group_mask, __entry->busiest_nr_running, __entry->imbalance, __entry->env_flags, __entry->ld_moved, __entry->balance_interval, __entry->active_balance, __entry->overutilized) __entry->overutilized, __entry->prefer_spread) ); TRACE_EVENT(sched_load_balance_nohz_kick, Loading Loading @@ -994,7 +996,7 @@ TRACE_EVENT(sched_compute_energy, TRACE_EVENT(sched_task_util, TP_PROTO(struct task_struct *p, unsigned long candidates, int best_energy_cpu, bool sync, bool need_idle, int fastpath, int best_energy_cpu, bool sync, int need_idle, int fastpath, bool placement_boost, u64 start_t, bool stune_boosted, bool is_rtg, bool rtg_skip_min, int start_cpu), Loading @@ -1011,7 +1013,7 @@ TRACE_EVENT(sched_task_util, __field(int, prev_cpu) __field(int, best_energy_cpu) __field(bool, sync) __field(bool, need_idle) __field(int, need_idle) __field(int, fastpath) __field(int, placement_boost) __field(int, rtg_cpu) Loading
kernel/sched/core.c +10 −3 Original line number Diff line number Diff line Loading @@ -5627,6 +5627,7 @@ unsigned int sched_lib_mask_force; bool is_sched_lib_based_app(pid_t pid) { const char *name = NULL; char *lib_list, *libname; struct vm_area_struct *vma; char path_buf[LIB_PATH_LENGTH]; bool found = false; Loading @@ -5636,11 +5637,14 @@ bool is_sched_lib_based_app(pid_t pid) if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0) return false; lib_list = kstrdup(sched_lib_name, GFP_KERNEL); rcu_read_lock(); p = find_process_by_pid(pid); if (!p) { rcu_read_unlock(); kfree(lib_list); return false; } Loading @@ -5660,19 +5664,22 @@ bool is_sched_lib_based_app(pid_t pid) if (IS_ERR(name)) goto release_sem; if (strnstr(name, sched_lib_name, while ((libname = strsep(&lib_list, ","))) { if (strnstr(name, libname, strnlen(name, LIB_PATH_LENGTH))) { found = true; break; } } } } release_sem: up_read(&mm->mmap_sem); mmput(mm); put_task_struct: put_task_struct(p); kfree(lib_list); return found; } Loading
kernel/sched/fair.c +116 −26 Original line number Diff line number Diff line Loading @@ -131,6 +131,9 @@ unsigned int sched_capacity_margin_up[NR_CPUS] = { unsigned int sched_capacity_margin_down[NR_CPUS] = { [0 ... NR_CPUS-1] = 1205}; /* ~15% margin */ #ifdef CONFIG_SCHED_WALT __read_mostly unsigned int sysctl_sched_prefer_spread; #endif unsigned int sched_small_task_threshold = 102; static inline void update_load_add(struct load_weight *lw, unsigned long inc) Loading Loading @@ -3877,16 +3880,31 @@ static inline bool task_demand_fits(struct task_struct *p, int cpu) } struct find_best_target_env { bool is_rtg; int placement_boost; bool need_idle; bool boosted; int need_idle; int fastpath; int start_cpu; bool strict_max; int skip_cpu; bool is_rtg; bool boosted; bool strict_max; }; static inline bool prefer_spread_on_idle(int cpu) { #ifdef CONFIG_SCHED_WALT if (likely(!sysctl_sched_prefer_spread)) return false; if (is_min_capacity_cpu(cpu)) return sysctl_sched_prefer_spread >= 1; return sysctl_sched_prefer_spread > 1; #else return false; #endif } static inline void adjust_cpus_for_packing(struct task_struct *p, int *target_cpu, int *best_idle_cpu, int shallowest_idle_cstate, Loading @@ -3898,7 +3916,10 @@ static inline void adjust_cpus_for_packing(struct task_struct *p, if (*best_idle_cpu == -1 || *target_cpu == -1) return; if (task_placement_boost_enabled(p) || fbt_env->need_idle || boosted || if (prefer_spread_on_idle(*best_idle_cpu)) fbt_env->need_idle |= 2; if (fbt_env->need_idle || task_placement_boost_enabled(p) || boosted || shallowest_idle_cstate <= 0) { *target_cpu = -1; return; Loading Loading @@ -7024,6 +7045,7 @@ int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, curr_is_rtg = task_in_related_thread_group(cpu_rq(cpu)->curr); fbt_env.fastpath = 0; fbt_env.need_idle = need_idle; if (trace_sched_task_util_enabled()) start_t = sched_clock(); Loading Loading @@ -7070,7 +7092,6 @@ int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, if (sched_feat(FIND_BEST_TARGET)) { fbt_env.is_rtg = is_rtg; fbt_env.placement_boost = placement_boost; fbt_env.need_idle = need_idle; fbt_env.start_cpu = start_cpu; fbt_env.boosted = boosted; fbt_env.strict_max = is_rtg && Loading @@ -7096,8 +7117,8 @@ int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, if (p->state == TASK_WAKING) delta = task_util(p); #endif if (task_placement_boost_enabled(p) || need_idle || boosted || is_rtg || __cpu_overutilized(prev_cpu, delta) || if (task_placement_boost_enabled(p) || fbt_env.need_idle || boosted || is_rtg || __cpu_overutilized(prev_cpu, delta) || !task_fits_max(p, prev_cpu) || cpu_isolated(prev_cpu)) { best_energy_cpu = cpu; goto unlock; Loading Loading @@ -7231,8 +7252,9 @@ int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, done: trace_sched_task_util(p, cpumask_bits(candidates)[0], best_energy_cpu, sync, need_idle, fbt_env.fastpath, placement_boost, start_t, boosted, is_rtg, get_rtg_status(p), start_cpu); sync, fbt_env.need_idle, fbt_env.fastpath, placement_boost, start_t, boosted, is_rtg, get_rtg_status(p), start_cpu); return best_energy_cpu; Loading Loading @@ -7946,6 +7968,7 @@ struct lb_env { unsigned int loop; unsigned int loop_break; unsigned int loop_max; bool prefer_spread; enum fbq_type fbq_type; enum group_type src_grp_type; Loading Loading @@ -8119,7 +8142,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) struct root_domain *rd = env->dst_rq->rd; if ((rcu_dereference(rd->pd) && !sd_overutilized(env->sd)) && env->idle == CPU_NEWLY_IDLE && env->idle == CPU_NEWLY_IDLE && !env->prefer_spread && !task_in_related_thread_group(p)) { long util_cum_dst, util_cum_src; unsigned long demand; Loading Loading @@ -8282,7 +8305,21 @@ static int detach_tasks(struct lb_env *env) if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed) goto next; if ((load / 2) > env->imbalance) /* * p is not running task when we goes until here, so if p is one * of the 2 task in src cpu rq and not the running one, * that means it is the only task that can be balanced. * So only when there is other tasks can be balanced or * there is situation to ignore big task, it is needed * to skip the task load bigger than 2*imbalance. * * And load based checks are skipped for prefer_spread in * finding busiest group, ignore the task's h_load. */ if (!env->prefer_spread && ((cpu_rq(env->src_cpu)->nr_running > 2) || (env->flags & LBF_IGNORE_BIG_TASKS)) && ((load / 2) > env->imbalance)) goto next; detach_task(p, env); Loading Loading @@ -9082,6 +9119,11 @@ static bool update_sd_pick_busiest(struct lb_env *env, if (sgs->group_type < busiest->group_type) return false; if (env->prefer_spread && env->idle != CPU_NOT_IDLE && (sgs->sum_nr_running > busiest->sum_nr_running) && (sgs->group_util > busiest->group_util)) return true; if (sgs->avg_load <= busiest->avg_load) return false; Loading Loading @@ -9115,6 +9157,11 @@ static bool update_sd_pick_busiest(struct lb_env *env, return false; asym_packing: if (env->prefer_spread && (sgs->sum_nr_running < busiest->sum_nr_running)) return false; /* This is the busiest node in its class. */ if (!(env->sd->flags & SD_ASYM_PACKING)) return true; Loading Loading @@ -9595,6 +9642,15 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s return fix_small_imbalance(env, sds); } /* * If we couldn't find any imbalance, then boost the imbalance * with the group util. */ if (env->prefer_spread && !env->imbalance && env->idle != CPU_NOT_IDLE && busiest->sum_nr_running > busiest->group_weight) env->imbalance = busiest->group_util; } /******* find_busiest_group() helpers end here *********************/ Loading Loading @@ -9988,6 +10044,15 @@ static int load_balance(int this_cpu, struct rq *this_rq, .loop = 0, }; #ifdef CONFIG_SCHED_WALT env.prefer_spread = (prefer_spread_on_idle(this_cpu) && !((sd->flags & SD_ASYM_CPUCAPACITY) && !cpumask_test_cpu(this_cpu, &asym_cap_sibling_cpus))); #else env.prefer_spread = false; #endif cpumask_and(cpus, sched_domain_span(sd), cpu_active_mask); schedstat_inc(sd->lb_count[idle]); Loading Loading @@ -10277,10 +10342,11 @@ static int load_balance(int this_cpu, struct rq *this_rq, env.imbalance, env.flags, ld_moved, sd->balance_interval, active_balance, #ifdef CONFIG_SCHED_WALT sd_overutilized(sd)); sd_overutilized(sd), #else READ_ONCE(this_rq->rd->overutilized)); READ_ONCE(this_rq->rd->overutilized), #endif env.prefer_spread); return ld_moved; } Loading Loading @@ -10525,7 +10591,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) max_cost += sd->max_newidle_lb_cost; #ifdef CONFIG_SCHED_WALT if (!sd_overutilized(sd)) if (!sd_overutilized(sd) && !prefer_spread_on_idle(cpu)) continue; #endif Loading Loading @@ -10615,7 +10681,7 @@ static inline int find_energy_aware_new_ilb(void) int ilb = nr_cpu_ids; struct sched_domain *sd; int cpu = raw_smp_processor_id(); cpumask_t avail_cpus, tmp_cpus; cpumask_t idle_cpus, tmp_cpus; struct sched_group *sg; unsigned long ref_cap = capacity_orig_of(cpu); unsigned long best_cap = 0, best_cap_cpu = -1; Loading @@ -10625,16 +10691,16 @@ static inline int find_energy_aware_new_ilb(void) if (!sd) goto out; cpumask_and(&avail_cpus, nohz.idle_cpus_mask, cpumask_and(&idle_cpus, nohz.idle_cpus_mask, housekeeping_cpumask(HK_FLAG_MISC)); cpumask_andnot(&avail_cpus, &avail_cpus, cpu_isolated_mask); cpumask_andnot(&idle_cpus, &idle_cpus, cpu_isolated_mask); sg = sd->groups; do { int i; unsigned long cap; cpumask_and(&tmp_cpus, &avail_cpus, sched_group_span(sg)); cpumask_and(&tmp_cpus, &idle_cpus, sched_group_span(sg)); i = cpumask_first(&tmp_cpus); /* This sg did not have any idle CPUs */ Loading @@ -10649,7 +10715,18 @@ static inline int find_energy_aware_new_ilb(void) break; } /* The back up CPU is selected from the best capacity CPUs */ /* * When there are no idle CPUs in the same capacity group, * we find the next best capacity CPU. */ if (best_cap > ref_cap) { if (cap > ref_cap && cap < best_cap) { best_cap = cap; best_cap_cpu = i; } continue; } if (cap > best_cap) { best_cap = cap; best_cap_cpu = i; Loading Loading @@ -10761,7 +10838,8 @@ static void nohz_balancer_kick(struct rq *rq) * happens from the tickpath. */ if (sched_energy_enabled()) { if (rq->nr_running >= 2 && cpu_overutilized(cpu)) if (rq->nr_running >= 2 && (cpu_overutilized(cpu) || prefer_spread_on_idle(cpu))) flags = NOHZ_KICK_MASK; goto out; } Loading Loading @@ -11166,6 +11244,11 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) int pulled_task = 0; u64 curr_cost = 0; u64 avg_idle = this_rq->avg_idle; bool prefer_spread = prefer_spread_on_idle(this_cpu); bool force_lb = (!is_min_capacity_cpu(this_cpu) && silver_has_big_tasks() && (atomic_read(&this_rq->nr_iowait) == 0)); if (cpu_isolated(this_cpu)) return 0; Loading @@ -11182,8 +11265,8 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) */ if (!cpu_active(this_cpu)) return 0; if (!is_min_capacity_cpu(this_cpu) && silver_has_big_tasks() && (atomic_read(&this_rq->nr_iowait) == 0)) if (force_lb || prefer_spread) avg_idle = ULLONG_MAX; /* * This is OK, because current is on_cpu, which avoids it being picked Loading Loading @@ -11218,6 +11301,13 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) if (!(sd->flags & SD_LOAD_BALANCE)) continue; #ifdef CONFIG_SCHED_WALT if (prefer_spread && !force_lb && (sd->flags & SD_ASYM_CPUCAPACITY) && !(cpumask_test_cpu(this_cpu, &asym_cap_sibling_cpus))) avg_idle = this_rq->avg_idle; #endif if (avg_idle < curr_cost + sd->max_newidle_lb_cost) { update_next_balance(sd, &next_balance); break; Loading
kernel/sysctl.c +9 −0 Original line number Diff line number Diff line Loading @@ -583,6 +583,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sched_updown_migrate_handler, }, { .procname = "sched_prefer_spread", .data = &sysctl_sched_prefer_spread, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, }, #endif #ifdef CONFIG_SCHED_DEBUG { Loading