Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 88532b42 authored by qctecmdr Service's avatar qctecmdr Service Committed by Gerrit - the friendly Code Review server
Browse files

Merge "ANDROID: sched/fair: Don't balance misfits if it would overload local group"

parents 393a5471 4ca7dd5b
Loading
Loading
Loading
Loading
+73 −21
Original line number Diff line number Diff line
@@ -9332,6 +9332,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
	cpu_rq(cpu)->cpu_capacity = capacity;
	sdg->sgc->capacity = capacity;
	sdg->sgc->min_capacity = capacity;
	sdg->sgc->max_capacity = capacity;
}

void update_group_capacity(struct sched_domain *sd, int cpu)
@@ -9507,17 +9508,29 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
}

/*
 * group_smaller_cpu_capacity: Returns true if sched_group sg has smaller
 * group_smaller_min_cpu_capacity: Returns true if sched_group sg has smaller
 * per-CPU capacity than sched_group ref.
 */
static inline bool
group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
{
	return sg->sgc->min_capacity *
				sched_capacity_margin_up[group_first_cpu(sg)] <
						ref->sgc->min_capacity * 1024;
}

/*
 * group_smaller_max_cpu_capacity: Returns true if sched_group sg has smaller
 * per-CPU capacity_orig than sched_group ref.
 */
static inline bool
group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
{
	return sg->sgc->max_capacity *
				sched_capacity_margin_up[group_first_cpu(sg)] <
						ref->sgc->max_capacity * 1024;
}

/*
 * group_similar_cpu_capacity: Returns true if the minimum capacity of the
 * compared groups differ by less than 12.5%.
@@ -9554,7 +9567,7 @@ group_type group_classify(struct sched_group *group,
 * @load_idx: Load index of sched_domain of this_cpu for load calc.
 * @local_group: Does group contain this_cpu.
 * @sgs: variable to hold the statistics for this group.
 * @overload: Indicate more than one runnable task for any CPU.
 * @overload: Indicate pullable load (e.g. >1 runnable task).
 * @overutilized: Indicate overutilization for any CPU.
 */
static inline void update_sg_lb_stats(struct lb_env *env,
@@ -9599,8 +9612,11 @@ static inline void update_sg_lb_stats(struct lb_env *env,
			sgs->idle_cpus++;

		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
		    sgs->group_misfit_task_load < rq->misfit_task_load)
		    sgs->group_misfit_task_load < rq->misfit_task_load) {
			sgs->group_misfit_task_load = rq->misfit_task_load;
			*overload = 1;
		}


		if (cpu_overutilized(i)) {
			*overutilized = true;
@@ -9656,9 +9672,12 @@ static bool update_sd_pick_busiest(struct lb_env *env,

	/*
	 * Don't try to pull misfit tasks we can't help.
	 * We can use max_capacity here as reduction in capacity on some
	 * cpus in the group should either be possible to resolve
	 * internally or be covered by avg_load imbalance (eventually).
	 */
	if (sgs->group_type == group_misfit_task &&
	    (!group_smaller_cpu_capacity(sg, sds->local) ||
	    (!group_smaller_max_cpu_capacity(sg, sds->local) ||
	     !group_has_capacity(env, &sds->local_stat)))
		return false;

@@ -9681,7 +9700,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
	 * power/energy consequences are not considered.
	 */
	if (sgs->sum_nr_running <= sgs->group_weight &&
	    group_smaller_cpu_capacity(sds->local, sg))
	    group_smaller_min_cpu_capacity(sds->local, sg))
		return false;

	/*
@@ -9693,6 +9712,13 @@ static bool update_sd_pick_busiest(struct lb_env *env,
		!group_similar_cpu_capacity(sds->local, sg))
		return false;

	/*
	 * If we have more than one misfit sg go with the biggest misfit.
	 */
	if (sgs->group_type == group_misfit_task &&
	    sgs->group_misfit_task_load < busiest->group_misfit_task_load)
		return false;

asym_packing:
	/* This is the busiest node in its class. */
	if (!(env->sd->flags & SD_ASYM_PACKING))
@@ -9773,11 +9799,9 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
	struct sched_group *sg = env->sd->groups;
	struct sg_lb_stats *local = &sds->local_stat;
	struct sg_lb_stats tmp_sgs;
	int load_idx, prefer_sibling = 0;
	int load_idx;
	bool overload = false, overutilized = false, misfit_task = false;

	if (child && child->flags & SD_PREFER_SIBLING)
		prefer_sibling = 1;
	bool prefer_sibling = child && child->flags & SD_PREFER_SIBLING;

#ifdef CONFIG_NO_HZ_COMMON
	if (env->idle == CPU_NEWLY_IDLE) {
@@ -9865,8 +9889,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd

	if (!lb_sd_parent(env->sd)) {
		/* update overload indicator if we are at root domain */
		if (env->dst_rq->rd->overload != overload)
			env->dst_rq->rd->overload = overload;
		if (READ_ONCE(env->dst_rq->rd->overload) != overload)
			WRITE_ONCE(env->dst_rq->rd->overload, overload);
	}

	if (overutilized)
@@ -10112,8 +10136,18 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
		(sds->avg_load - local->avg_load) * local->group_capacity
	) / SCHED_CAPACITY_SCALE;

	/* Boost imbalance to allow misfit task to be balanced. */
	if (busiest->group_type == group_misfit_task) {
	/* Boost imbalance to allow misfit task to be balanced.
	 * Always do this if we are doing a NEWLY_IDLE balance
	 * on the assumption that any tasks we have must not be
	 * long-running (and hence we cannot rely upon load).
	 * However if we are not idle, we should assume the tasks
	 * we have are longer running and not override load-based
	 * calculations above unless we are sure that the local
	 * group is underutilized.
	 */
	if (busiest->group_type == group_misfit_task &&
	    (env->idle == CPU_NEWLY_IDLE ||
	     local->sum_nr_running < local->group_weight)) {
		env->imbalance = max_t(long, env->imbalance,
				       busiest->group_misfit_task_load);
	}
@@ -10214,7 +10248,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
	    busiest->group_no_capacity)
		goto force_balance;

	/* Misfitting tasks should be dealt with regardless of the avg load */
	/* Misfit tasks should be dealt with regardless of the avg load */
	if (busiest->group_type == group_misfit_task)
		goto force_balance;

@@ -10304,15 +10338,30 @@ static struct rq *find_busiest_queue(struct lb_env *env,
			continue;

		/*
		 * For ASYM_CPUCAPACITY domains with misfit tasks we ignore
		 * load.
		 * For ASYM_CPUCAPACITY domains with misfit tasks we simply
		 * seek the "biggest" misfit task.
		 */
		if (env->src_grp_type == group_misfit_task &&
		    rq->misfit_task_load)
			return rq;
		if (env->src_grp_type == group_misfit_task) {
			if (rq->misfit_task_load > busiest_load) {
				busiest_load = rq->misfit_task_load;
				busiest = rq;
			}
			continue;
		}

		capacity = capacity_of(i);

		/*
		 * For ASYM_CPUCAPACITY domains, don't pick a cpu that could
		 * eventually lead to active_balancing high->low capacity.
		 * Higher per-cpu capacity is considered better than balancing
		 * average load.
		 */
		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
		    capacity_of(env->dst_cpu) < capacity &&
		    rq->nr_running == 1)
			continue;

		wl = weighted_cpuload(rq);

		/*
@@ -10390,6 +10439,9 @@ static int need_active_balance(struct lb_env *env)
			return 1;
	}

	if (env->src_grp_type == group_misfit_task)
		return 1;

	return unlikely(sd->nr_balance_failed >
			sd->cache_nice_tries + NEED_ACTIVE_BALANCE_THRESHOLD);
}
@@ -10814,7 +10866,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
	rq_unpin_lock(this_rq, rf);

	if (this_rq->avg_idle < sysctl_sched_migration_cost ||
	    !this_rq->rd->overload) {
	    !READ_ONCE(this_rq->rd->overload)) {
		rcu_read_lock();
		sd = rcu_dereference_check_sched_domain(this_rq->sd);
		if (sd)
+8 −4
Original line number Diff line number Diff line
@@ -695,8 +695,12 @@ struct root_domain {
	cpumask_var_t span;
	cpumask_var_t online;

	/* Indicate more than one runnable task for any CPU */
	bool overload;
	/*
	 * Indicate pullable load on at least one CPU, e.g:
	 * - More than one runnable task
	 * - Running task is misfit
	 */
	int overload;

	/*
	 * The bit corresponding to a CPU gets set here if such CPU has more
@@ -1761,8 +1765,8 @@ static inline void add_nr_running(struct rq *rq, unsigned count)

	if (prev_nr < 2 && rq->nr_running >= 2) {
#ifdef CONFIG_SMP
		if (!rq->rd->overload)
			rq->rd->overload = true;
		if (!READ_ONCE(rq->rd->overload))
			WRITE_ONCE(rq->rd->overload, 1);
#endif
	}

+29 −4
Original line number Diff line number Diff line
@@ -915,6 +915,7 @@ static struct sched_group *get_group(int cpu, struct sd_data *sdd)

	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;

	return sg;
}
@@ -1290,7 +1291,7 @@ sd_init(struct sched_domain_topology_level *tl,
					| 0*SD_SHARE_CPUCAPACITY
					| 0*SD_SHARE_PKG_RESOURCES
					| 0*SD_SERIALIZE
					| 0*SD_PREFER_SIBLING
					| 1*SD_PREFER_SIBLING
					| 0*SD_NUMA
					| sd_flags
					,
@@ -1309,6 +1310,26 @@ sd_init(struct sched_domain_topology_level *tl,
	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
	sd_id = cpumask_first(sched_domain_span(sd));

	/*
	 * Check if cpu_map eclipses cpu capacity asymmetry.
	 */

	if (sd->flags & SD_ASYM_CPUCAPACITY) {
		long capacity = arch_scale_cpu_capacity(NULL, sd_id);
		bool disable = true;
		int i;

		for_each_cpu(i, sched_domain_span(sd)) {
			if (capacity != arch_scale_cpu_capacity(NULL, i)) {
				disable = false;
				break;
			}
		}

		if (disable)
			sd->flags &= ~SD_ASYM_CPUCAPACITY;
	}

	/*
	 * Convert topological properties into behaviour.
	 */
@@ -1316,12 +1337,17 @@ sd_init(struct sched_domain_topology_level *tl,
	if (sd->flags & SD_ASYM_CPUCAPACITY) {
		struct sched_domain *t = sd;

		/*
		 * Don't attempt to spread across cpus of different capacities.
		 */
		if (sd->child)
			sd->child->flags &= ~SD_PREFER_SIBLING;

		for_each_lower_domain(t)
			t->flags |= SD_BALANCE_WAKE;
	}

	if (sd->flags & SD_SHARE_CPUCAPACITY) {
		sd->flags |= SD_PREFER_SIBLING;
		sd->imbalance_pct = 110;
		sd->smt_gain = 1178; /* ~15% */

@@ -1336,6 +1362,7 @@ sd_init(struct sched_domain_topology_level *tl,
		sd->busy_idx = 3;
		sd->idle_idx = 2;

		sd->flags &= ~SD_PREFER_SIBLING;
		sd->flags |= SD_SERIALIZE;
		if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
			sd->flags &= ~(SD_BALANCE_EXEC |
@@ -1345,7 +1372,6 @@ sd_init(struct sched_domain_topology_level *tl,

#endif
	} else {
		sd->flags |= SD_PREFER_SIBLING;
		sd->cache_nice_tries = 1;
		sd->busy_idx = 2;
		sd->idle_idx = 1;
@@ -2102,4 +2128,3 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],

	mutex_unlock(&sched_domains_mutex);
}