Merge "ANDROID: sched/fair: Don't balance misfits if it would overload local group" (88532b42) · Commits · e / devices / android_kernel_oneplus_sm8150

kernel/sched/fair.c

+73 −21

Original line number	Diff line number	Diff line
		@@ -9332,6 +9332,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
		cpu_rq(cpu)->cpu_capacity = capacity;
		sdg->sgc->capacity = capacity;
		sdg->sgc->min_capacity = capacity;
		sdg->sgc->max_capacity = capacity;
		}

		void update_group_capacity(struct sched_domain *sd, int cpu)
		@@ -9507,17 +9508,29 @@ group_is_overloaded(struct lb_env env, struct sg_lb_stats sgs)
		}

		/*
		* group_smaller_cpu_capacity: Returns true if sched_group sg has smaller
		* group_smaller_min_cpu_capacity: Returns true if sched_group sg has smaller
		* per-CPU capacity than sched_group ref.
		*/
		static inline bool
		group_smaller_cpu_capacity(struct sched_group sg, struct sched_group ref)
		group_smaller_min_cpu_capacity(struct sched_group sg, struct sched_group ref)
		{
		return sg->sgc->min_capacity *
		sched_capacity_margin_up[group_first_cpu(sg)] <
		ref->sgc->min_capacity * 1024;
		}

		/*
		* group_smaller_max_cpu_capacity: Returns true if sched_group sg has smaller
		* per-CPU capacity_orig than sched_group ref.
		*/
		static inline bool
		group_smaller_max_cpu_capacity(struct sched_group sg, struct sched_group ref)
		{
		return sg->sgc->max_capacity *
		sched_capacity_margin_up[group_first_cpu(sg)] <
		ref->sgc->max_capacity * 1024;
		}

		/*
		* group_similar_cpu_capacity: Returns true if the minimum capacity of the
		* compared groups differ by less than 12.5%.
		@@ -9554,7 +9567,7 @@ group_type group_classify(struct sched_group *group,
		* @load_idx: Load index of sched_domain of this_cpu for load calc.
		* @local_group: Does group contain this_cpu.
		* @sgs: variable to hold the statistics for this group.
		* @overload: Indicate more than one runnable task for any CPU.
		* @overload: Indicate pullable load (e.g. >1 runnable task).
		* @overutilized: Indicate overutilization for any CPU.
		*/
		static inline void update_sg_lb_stats(struct lb_env *env,
		@@ -9599,8 +9612,11 @@ static inline void update_sg_lb_stats(struct lb_env *env,
		sgs->idle_cpus++;

		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
		sgs->group_misfit_task_load < rq->misfit_task_load)
		sgs->group_misfit_task_load < rq->misfit_task_load) {
		sgs->group_misfit_task_load = rq->misfit_task_load;
		*overload = 1;
		}


		if (cpu_overutilized(i)) {
		*overutilized = true;
		@@ -9656,9 +9672,12 @@ static bool update_sd_pick_busiest(struct lb_env *env,

		/*
		* Don't try to pull misfit tasks we can't help.
		* We can use max_capacity here as reduction in capacity on some
		* cpus in the group should either be possible to resolve
		* internally or be covered by avg_load imbalance (eventually).
		*/
		if (sgs->group_type == group_misfit_task &&
		(!group_smaller_cpu_capacity(sg, sds->local) \|\|
		(!group_smaller_max_cpu_capacity(sg, sds->local) \|\|
		!group_has_capacity(env, &sds->local_stat)))
		return false;

		@@ -9681,7 +9700,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
		* power/energy consequences are not considered.
		*/
		if (sgs->sum_nr_running <= sgs->group_weight &&
		group_smaller_cpu_capacity(sds->local, sg))
		group_smaller_min_cpu_capacity(sds->local, sg))
		return false;

		/*
		@@ -9693,6 +9712,13 @@ static bool update_sd_pick_busiest(struct lb_env *env,
		!group_similar_cpu_capacity(sds->local, sg))
		return false;

		/*
		* If we have more than one misfit sg go with the biggest misfit.
		*/
		if (sgs->group_type == group_misfit_task &&
		sgs->group_misfit_task_load < busiest->group_misfit_task_load)
		return false;

		asym_packing:
		/* This is the busiest node in its class. */
		if (!(env->sd->flags & SD_ASYM_PACKING))
		@@ -9773,11 +9799,9 @@ static inline void update_sd_lb_stats(struct lb_env env, struct sd_lb_stats sd
		struct sched_group *sg = env->sd->groups;
		struct sg_lb_stats *local = &sds->local_stat;
		struct sg_lb_stats tmp_sgs;
		int load_idx, prefer_sibling = 0;
		int load_idx;
		bool overload = false, overutilized = false, misfit_task = false;

		if (child && child->flags & SD_PREFER_SIBLING)
		prefer_sibling = 1;
		bool prefer_sibling = child && child->flags & SD_PREFER_SIBLING;

		#ifdef CONFIG_NO_HZ_COMMON
		if (env->idle == CPU_NEWLY_IDLE) {
		@@ -9865,8 +9889,8 @@ static inline void update_sd_lb_stats(struct lb_env env, struct sd_lb_stats sd

		if (!lb_sd_parent(env->sd)) {
		/* update overload indicator if we are at root domain */
		if (env->dst_rq->rd->overload != overload)
		env->dst_rq->rd->overload = overload;
		if (READ_ONCE(env->dst_rq->rd->overload) != overload)
		WRITE_ONCE(env->dst_rq->rd->overload, overload);
		}

		if (overutilized)
		@@ -10112,8 +10136,18 @@ static inline void calculate_imbalance(struct lb_env env, struct sd_lb_stats s
		(sds->avg_load - local->avg_load) * local->group_capacity
		) / SCHED_CAPACITY_SCALE;

		/* Boost imbalance to allow misfit task to be balanced. */
		if (busiest->group_type == group_misfit_task) {
		/* Boost imbalance to allow misfit task to be balanced.
		* Always do this if we are doing a NEWLY_IDLE balance
		* on the assumption that any tasks we have must not be
		* long-running (and hence we cannot rely upon load).
		* However if we are not idle, we should assume the tasks
		* we have are longer running and not override load-based
		* calculations above unless we are sure that the local
		* group is underutilized.
		*/
		if (busiest->group_type == group_misfit_task &&
		(env->idle == CPU_NEWLY_IDLE \|\|
		local->sum_nr_running < local->group_weight)) {
		env->imbalance = max_t(long, env->imbalance,
		busiest->group_misfit_task_load);
		}
		@@ -10214,7 +10248,7 @@ static struct sched_group find_busiest_group(struct lb_env env)
		busiest->group_no_capacity)
		goto force_balance;

		/* Misfitting tasks should be dealt with regardless of the avg load */
		/* Misfit tasks should be dealt with regardless of the avg load */
		if (busiest->group_type == group_misfit_task)
		goto force_balance;

		@@ -10304,15 +10338,30 @@ static struct rq find_busiest_queue(struct lb_env env,
		continue;

		/*
		* For ASYM_CPUCAPACITY domains with misfit tasks we ignore
		* load.
		* For ASYM_CPUCAPACITY domains with misfit tasks we simply
		* seek the "biggest" misfit task.
		*/
		if (env->src_grp_type == group_misfit_task &&
		rq->misfit_task_load)
		return rq;
		if (env->src_grp_type == group_misfit_task) {
		if (rq->misfit_task_load > busiest_load) {
		busiest_load = rq->misfit_task_load;
		busiest = rq;
		}
		continue;
		}

		capacity = capacity_of(i);

		/*
		* For ASYM_CPUCAPACITY domains, don't pick a cpu that could
		* eventually lead to active_balancing high->low capacity.
		* Higher per-cpu capacity is considered better than balancing
		* average load.
		*/
		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
		capacity_of(env->dst_cpu) < capacity &&
		rq->nr_running == 1)
		continue;

		wl = weighted_cpuload(rq);

		/*
		@@ -10390,6 +10439,9 @@ static int need_active_balance(struct lb_env *env)
		return 1;
		}

		if (env->src_grp_type == group_misfit_task)
		return 1;

		return unlikely(sd->nr_balance_failed >
		sd->cache_nice_tries + NEED_ACTIVE_BALANCE_THRESHOLD);
		}
		@@ -10814,7 +10866,7 @@ static int idle_balance(struct rq this_rq, struct rq_flags rf)
		rq_unpin_lock(this_rq, rf);

		if (this_rq->avg_idle < sysctl_sched_migration_cost \|\|
		!this_rq->rd->overload) {
		!READ_ONCE(this_rq->rd->overload)) {
		rcu_read_lock();
		sd = rcu_dereference_check_sched_domain(this_rq->sd);
		if (sd)

kernel/sched/sched.h

+8 −4

Original line number	Diff line number	Diff line
		@@ -695,8 +695,12 @@ struct root_domain {
		cpumask_var_t span;
		cpumask_var_t online;

		/* Indicate more than one runnable task for any CPU */
		bool overload;
		/*
		* Indicate pullable load on at least one CPU, e.g:
		* - More than one runnable task
		* - Running task is misfit
		*/
		int overload;

		/*
		* The bit corresponding to a CPU gets set here if such CPU has more
		@@ -1761,8 +1765,8 @@ static inline void add_nr_running(struct rq *rq, unsigned count)

		if (prev_nr < 2 && rq->nr_running >= 2) {
		#ifdef CONFIG_SMP
		if (!rq->rd->overload)
		rq->rd->overload = true;
		if (!READ_ONCE(rq->rd->overload))
		WRITE_ONCE(rq->rd->overload, 1);
		#endif
		}

kernel/sched/topology.c

+29 −4

Original line number	Diff line number	Diff line
		@@ -915,6 +915,7 @@ static struct sched_group get_group(int cpu, struct sd_data sdd)

		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
		sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
		sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;

		return sg;
		}
		@@ -1290,7 +1291,7 @@ sd_init(struct sched_domain_topology_level *tl,
		\| 0*SD_SHARE_CPUCAPACITY
		\| 0*SD_SHARE_PKG_RESOURCES
		\| 0*SD_SERIALIZE
		\| 0*SD_PREFER_SIBLING
		\| 1*SD_PREFER_SIBLING
		\| 0*SD_NUMA
		\| sd_flags
		,
		@@ -1309,6 +1310,26 @@ sd_init(struct sched_domain_topology_level *tl,
		cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
		sd_id = cpumask_first(sched_domain_span(sd));

		/*
		* Check if cpu_map eclipses cpu capacity asymmetry.
		*/

		if (sd->flags & SD_ASYM_CPUCAPACITY) {
		long capacity = arch_scale_cpu_capacity(NULL, sd_id);
		bool disable = true;
		int i;

		for_each_cpu(i, sched_domain_span(sd)) {
		if (capacity != arch_scale_cpu_capacity(NULL, i)) {
		disable = false;
		break;
		}
		}

		if (disable)
		sd->flags &= ~SD_ASYM_CPUCAPACITY;
		}

		/*
		* Convert topological properties into behaviour.
		*/
		@@ -1316,12 +1337,17 @@ sd_init(struct sched_domain_topology_level *tl,
		if (sd->flags & SD_ASYM_CPUCAPACITY) {
		struct sched_domain *t = sd;

		/*
		* Don't attempt to spread across cpus of different capacities.
		*/
		if (sd->child)
		sd->child->flags &= ~SD_PREFER_SIBLING;

		for_each_lower_domain(t)
		t->flags \|= SD_BALANCE_WAKE;
		}

		if (sd->flags & SD_SHARE_CPUCAPACITY) {
		sd->flags \|= SD_PREFER_SIBLING;
		sd->imbalance_pct = 110;
		sd->smt_gain = 1178; /* ~15% */

		@@ -1336,6 +1362,7 @@ sd_init(struct sched_domain_topology_level *tl,
		sd->busy_idx = 3;
		sd->idle_idx = 2;

		sd->flags &= ~SD_PREFER_SIBLING;
		sd->flags \|= SD_SERIALIZE;
		if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
		sd->flags &= ~(SD_BALANCE_EXEC \|
		@@ -1345,7 +1372,6 @@ sd_init(struct sched_domain_topology_level *tl,

		#endif
		} else {
		sd->flags \|= SD_PREFER_SIBLING;
		sd->cache_nice_tries = 1;
		sd->busy_idx = 2;
		sd->idle_idx = 1;
		@@ -2102,4 +2128,3 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],

		mutex_unlock(&sched_domains_mutex);
		}