Loading include/linux/sched/topology.h +2 −0 Original line number Diff line number Diff line Loading @@ -74,6 +74,8 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; bool overutilized; }; struct sched_domain { Loading kernel/sched/fair.c +75 −13 Original line number Diff line number Diff line Loading @@ -5278,12 +5278,31 @@ bool cpu_overutilized(int cpu) return __cpu_overutilized(cpu, 0); } static inline void update_overutilized_status(struct rq *rq) static bool sd_overutilized(struct sched_domain *sd) { if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) { WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED); trace_sched_overutilized(1); return sd->shared->overutilized; } static void set_sd_overutilized(struct sched_domain *sd) { sd->shared->overutilized = true; } static void clear_sd_overutilized(struct sched_domain *sd) { sd->shared->overutilized = false; } static inline void update_overutilized_status(struct rq *rq) { struct sched_domain *sd; rcu_read_lock(); sd = rcu_dereference(rq->sd); if (sd && !sd_overutilized(sd) && cpu_overutilized(rq->cpu)) set_sd_overutilized(sd); rcu_read_unlock(); } #else static inline void update_overutilized_status(struct rq *rq) { } Loading Loading @@ -8410,7 +8429,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) if (static_branch_unlikely(&sched_energy_present)) { struct root_domain *rd = env->dst_rq->rd; if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized) && if ((rcu_dereference(rd->pd) && !sd_overutilized(env->sd)) && env->idle == CPU_NEWLY_IDLE && !task_in_related_thread_group(p)) { long util_cum_dst, util_cum_src; Loading Loading @@ -8879,6 +8898,7 @@ struct sd_lb_stats { unsigned long total_running; unsigned long total_load; /* Total load of all groups in sd */ unsigned long total_capacity; /* Total capacity of all groups in sd */ unsigned long total_util; /* Total util of all groups in sd */ unsigned long avg_load; /* Average load across all groups in sd */ struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ Loading @@ -8899,6 +8919,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) .total_running = 0UL, .total_load = 0UL, .total_capacity = 0UL, .total_util = 0UL, .busiest_stat = { .avg_load = 0UL, .sum_nr_running = 0, Loading Loading @@ -9298,9 +9319,13 @@ static inline void update_sg_lb_stats(struct lb_env *env, if (nr_running > 1) *sg_status |= SG_OVERLOAD; if (cpu_overutilized(i)) if (cpu_overutilized(i)) { *sg_status |= SG_OVERUTILIZED; if (rq->misfit_task_load) *sg_status |= SG_HAS_MISFIT_TASK; } #ifdef CONFIG_NUMA_BALANCING sgs->nr_numa_running += rq->nr_numa_running; sgs->nr_preferred_running += rq->nr_preferred_running; Loading Loading @@ -9534,6 +9559,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd sds->total_running += sgs->sum_nr_running; sds->total_load += sgs->group_load; sds->total_capacity += sgs->group_capacity; sds->total_util += sgs->group_util; trace_sched_load_balance_sg_stats(sg->cpumask[0], sgs->group_type, sgs->idle_cpus, Loading Loading @@ -9565,14 +9591,47 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /* update overload indicator if we are at root domain */ WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD); } /* Update over-utilization (tipping point, U >= 0) indicator */ WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED); trace_sched_overutilized(!!(sg_status & SG_OVERUTILIZED)); } else if (sg_status & SG_OVERUTILIZED) { WRITE_ONCE(env->dst_rq->rd->overutilized, SG_OVERUTILIZED); trace_sched_overutilized(1); if (sg_status & SG_OVERUTILIZED) set_sd_overutilized(env->sd); else clear_sd_overutilized(env->sd); /* * If there is a misfit task in one cpu in this sched_domain * it is likely that the imbalance cannot be sorted out among * the cpu's in this sched_domain. In this case set the * overutilized flag at the parent sched_domain. */ if (sg_status & SG_HAS_MISFIT_TASK) { struct sched_domain *sd = env->sd->parent; /* * In case of a misfit task, load balance at the parent * sched domain level will make sense only if the the cpus * have a different capacity. If cpus at a domain level have * the same capacity, the misfit task cannot be well * accomodated in any of the cpus and there in no point in * trying a load balance at this level */ while (sd) { if (sd->flags & SD_ASYM_CPUCAPACITY) { set_sd_overutilized(sd); break; } sd = sd->parent; } } /* * If the domain util is greater that domain capacity, load balancing * needs to be done at the next sched domain level as well. */ if (env->sd->parent && sds->total_capacity * 1024 < sds->total_util * sched_capacity_margin_up[group_first_cpu(sds->local)]) set_sd_overutilized(env->sd->parent); } Loading Loading @@ -9867,7 +9926,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (static_branch_unlikely(&sched_energy_present)) { struct root_domain *rd = env->dst_rq->rd; if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized)) { if (rcu_dereference(rd->pd) && !sd_overutilized(env->sd)) { int cpu_local, cpu_busiest; unsigned long capacity_local, capacity_busiest; Loading Loading @@ -10711,6 +10770,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) } max_cost += sd->max_newidle_lb_cost; if (!sd_overutilized(sd)) continue; if (!(sd->flags & SD_LOAD_BALANCE)) continue; Loading kernel/sched/sched.h +1 −3 Original line number Diff line number Diff line Loading @@ -790,6 +790,7 @@ struct max_cpu_capacity { /* Scheduling group status flags */ #define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */ #define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */ #define SG_HAS_MISFIT_TASK 0x4 /* Group has misfit task. */ /* * We add the notion of a root-domain which will be used to define per-domain Loading @@ -813,9 +814,6 @@ struct root_domain { */ int overload; /* Indicate one or more cpus over-utilized (tipping point) */ int overutilized; /* * The bit corresponding to a CPU gets set here if such CPU has more * than one runnable -deadline task (as it is below for RT tasks). Loading kernel/sched/topology.c +4 −8 Original line number Diff line number Diff line Loading @@ -1380,15 +1380,11 @@ sd_init(struct sched_domain_topology_level *tl, sd->idle_idx = 1; } /* * For all levels sharing cache; connect a sched_domain_shared * instance. */ if (sd->flags & SD_SHARE_PKG_RESOURCES) { sd->shared = *per_cpu_ptr(sdd->sds, sd_id); atomic_inc(&sd->shared->ref); if (sd->flags & SD_SHARE_PKG_RESOURCES) atomic_set(&sd->shared->nr_busy_cpus, sd_weight); } sd->private = sdd; Loading Loading
include/linux/sched/topology.h +2 −0 Original line number Diff line number Diff line Loading @@ -74,6 +74,8 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; bool overutilized; }; struct sched_domain { Loading
kernel/sched/fair.c +75 −13 Original line number Diff line number Diff line Loading @@ -5278,12 +5278,31 @@ bool cpu_overutilized(int cpu) return __cpu_overutilized(cpu, 0); } static inline void update_overutilized_status(struct rq *rq) static bool sd_overutilized(struct sched_domain *sd) { if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) { WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED); trace_sched_overutilized(1); return sd->shared->overutilized; } static void set_sd_overutilized(struct sched_domain *sd) { sd->shared->overutilized = true; } static void clear_sd_overutilized(struct sched_domain *sd) { sd->shared->overutilized = false; } static inline void update_overutilized_status(struct rq *rq) { struct sched_domain *sd; rcu_read_lock(); sd = rcu_dereference(rq->sd); if (sd && !sd_overutilized(sd) && cpu_overutilized(rq->cpu)) set_sd_overutilized(sd); rcu_read_unlock(); } #else static inline void update_overutilized_status(struct rq *rq) { } Loading Loading @@ -8410,7 +8429,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) if (static_branch_unlikely(&sched_energy_present)) { struct root_domain *rd = env->dst_rq->rd; if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized) && if ((rcu_dereference(rd->pd) && !sd_overutilized(env->sd)) && env->idle == CPU_NEWLY_IDLE && !task_in_related_thread_group(p)) { long util_cum_dst, util_cum_src; Loading Loading @@ -8879,6 +8898,7 @@ struct sd_lb_stats { unsigned long total_running; unsigned long total_load; /* Total load of all groups in sd */ unsigned long total_capacity; /* Total capacity of all groups in sd */ unsigned long total_util; /* Total util of all groups in sd */ unsigned long avg_load; /* Average load across all groups in sd */ struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ Loading @@ -8899,6 +8919,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) .total_running = 0UL, .total_load = 0UL, .total_capacity = 0UL, .total_util = 0UL, .busiest_stat = { .avg_load = 0UL, .sum_nr_running = 0, Loading Loading @@ -9298,9 +9319,13 @@ static inline void update_sg_lb_stats(struct lb_env *env, if (nr_running > 1) *sg_status |= SG_OVERLOAD; if (cpu_overutilized(i)) if (cpu_overutilized(i)) { *sg_status |= SG_OVERUTILIZED; if (rq->misfit_task_load) *sg_status |= SG_HAS_MISFIT_TASK; } #ifdef CONFIG_NUMA_BALANCING sgs->nr_numa_running += rq->nr_numa_running; sgs->nr_preferred_running += rq->nr_preferred_running; Loading Loading @@ -9534,6 +9559,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd sds->total_running += sgs->sum_nr_running; sds->total_load += sgs->group_load; sds->total_capacity += sgs->group_capacity; sds->total_util += sgs->group_util; trace_sched_load_balance_sg_stats(sg->cpumask[0], sgs->group_type, sgs->idle_cpus, Loading Loading @@ -9565,14 +9591,47 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /* update overload indicator if we are at root domain */ WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD); } /* Update over-utilization (tipping point, U >= 0) indicator */ WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED); trace_sched_overutilized(!!(sg_status & SG_OVERUTILIZED)); } else if (sg_status & SG_OVERUTILIZED) { WRITE_ONCE(env->dst_rq->rd->overutilized, SG_OVERUTILIZED); trace_sched_overutilized(1); if (sg_status & SG_OVERUTILIZED) set_sd_overutilized(env->sd); else clear_sd_overutilized(env->sd); /* * If there is a misfit task in one cpu in this sched_domain * it is likely that the imbalance cannot be sorted out among * the cpu's in this sched_domain. In this case set the * overutilized flag at the parent sched_domain. */ if (sg_status & SG_HAS_MISFIT_TASK) { struct sched_domain *sd = env->sd->parent; /* * In case of a misfit task, load balance at the parent * sched domain level will make sense only if the the cpus * have a different capacity. If cpus at a domain level have * the same capacity, the misfit task cannot be well * accomodated in any of the cpus and there in no point in * trying a load balance at this level */ while (sd) { if (sd->flags & SD_ASYM_CPUCAPACITY) { set_sd_overutilized(sd); break; } sd = sd->parent; } } /* * If the domain util is greater that domain capacity, load balancing * needs to be done at the next sched domain level as well. */ if (env->sd->parent && sds->total_capacity * 1024 < sds->total_util * sched_capacity_margin_up[group_first_cpu(sds->local)]) set_sd_overutilized(env->sd->parent); } Loading Loading @@ -9867,7 +9926,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (static_branch_unlikely(&sched_energy_present)) { struct root_domain *rd = env->dst_rq->rd; if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized)) { if (rcu_dereference(rd->pd) && !sd_overutilized(env->sd)) { int cpu_local, cpu_busiest; unsigned long capacity_local, capacity_busiest; Loading Loading @@ -10711,6 +10770,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) } max_cost += sd->max_newidle_lb_cost; if (!sd_overutilized(sd)) continue; if (!(sd->flags & SD_LOAD_BALANCE)) continue; Loading
kernel/sched/sched.h +1 −3 Original line number Diff line number Diff line Loading @@ -790,6 +790,7 @@ struct max_cpu_capacity { /* Scheduling group status flags */ #define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */ #define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */ #define SG_HAS_MISFIT_TASK 0x4 /* Group has misfit task. */ /* * We add the notion of a root-domain which will be used to define per-domain Loading @@ -813,9 +814,6 @@ struct root_domain { */ int overload; /* Indicate one or more cpus over-utilized (tipping point) */ int overutilized; /* * The bit corresponding to a CPU gets set here if such CPU has more * than one runnable -deadline task (as it is below for RT tasks). Loading
kernel/sched/topology.c +4 −8 Original line number Diff line number Diff line Loading @@ -1380,15 +1380,11 @@ sd_init(struct sched_domain_topology_level *tl, sd->idle_idx = 1; } /* * For all levels sharing cache; connect a sched_domain_shared * instance. */ if (sd->flags & SD_SHARE_PKG_RESOURCES) { sd->shared = *per_cpu_ptr(sdd->sds, sd_id); atomic_inc(&sd->shared->ref); if (sd->flags & SD_SHARE_PKG_RESOURCES) atomic_set(&sd->shared->nr_busy_cpus, sd_weight); } sd->private = sdd; Loading