Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (a339b351) · Commits · e / devices / android_kernel_fairphone_FP4

include/linux/sched/topology.h

+0 −8

Original line number	Original line	Diff line number	Diff line
	@@ -71,14 +71,6 @@ struct sched_domain_shared {
	atomic_t ref;		atomic_t ref;
	atomic_t nr_busy_cpus;		atomic_t nr_busy_cpus;
	int has_idle_cores;		int has_idle_cores;

	/*
	* Some variables from the most recent sd_lb_stats for this domain,
	* used by wake_affine().
	*/
	unsigned long nr_running;
	unsigned long load;
	unsigned long capacity;
	};		};

	struct sched_domain {		struct sched_domain {

kernel/sched/fair.c

+46 −94

Original line number	Original line	Diff line number	Diff line
	@@ -5356,91 +5356,62 @@ static int wake_wide(struct task_struct *p)
	return 1;		return 1;
	}		}

	struct llc_stats {		/*
	unsigned long nr_running;		* The purpose of wake_affine() is to quickly determine on which CPU we can run
	unsigned long load;		* soonest. For the purpose of speed we only consider the waking and previous
	unsigned long capacity;		* CPU.
	int has_capacity;		*
	};		* wake_affine_idle() - only considers 'now', it check if the waking CPU is (or
			* will be) idle.
			*
			* wake_affine_weight() - considers the weight to reflect the average
			* scheduling latency of the CPUs. This seems to work
			* for the overloaded case.
			*/

	static bool get_llc_stats(struct llc_stats *stats, int cpu)		static bool
			wake_affine_idle(struct sched_domain sd, struct task_struct p,
			int this_cpu, int prev_cpu, int sync)
	{		{
	struct sched_domain_shared *sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));		if (idle_cpu(this_cpu))
			return true;
	if (!sds)
	return false;

	stats->nr_running = READ_ONCE(sds->nr_running);
	stats->load = READ_ONCE(sds->load);
	stats->capacity = READ_ONCE(sds->capacity);
	stats->has_capacity = stats->nr_running < per_cpu(sd_llc_size, cpu);

			if (sync && cpu_rq(this_cpu)->nr_running == 1)
	return true;		return true;

			return false;
	}		}

	/*
	* Can a task be moved from prev_cpu to this_cpu without causing a load
	* imbalance that would trigger the load balancer?
	*
	* Since we're running on 'stale' values, we might in fact create an imbalance
	* but recomputing these values is expensive, as that'd mean iteration 2 cache
	* domains worth of CPUs.
	*/
	static bool		static bool
	wake_affine_llc(struct sched_domain sd, struct task_struct p,		wake_affine_weight(struct sched_domain sd, struct task_struct p,
	int this_cpu, int prev_cpu, int sync)		int this_cpu, int prev_cpu, int sync)
	{		{
	struct llc_stats prev_stats, this_stats;
	s64 this_eff_load, prev_eff_load;		s64 this_eff_load, prev_eff_load;
	unsigned long task_load;		unsigned long task_load;

	if (!get_llc_stats(&prev_stats, prev_cpu) \|\|		this_eff_load = target_load(this_cpu, sd->wake_idx);
	!get_llc_stats(&this_stats, this_cpu))		prev_eff_load = source_load(prev_cpu, sd->wake_idx);
	return false;

	/*
	* If sync wakeup then subtract the (maximum possible)
	* effect of the currently running task from the load
	* of the current LLC.
	*/
	if (sync) {		if (sync) {
	unsigned long current_load = task_h_load(current);		unsigned long current_load = task_h_load(current);

	/* in this case load hits 0 and this LLC is considered 'idle' */		if (current_load > this_eff_load)
	if (current_load > this_stats.load)
	return true;		return true;

	this_stats.load -= current_load;		this_eff_load -= current_load;
	}		}

	/*
	* The has_capacity stuff is not SMT aware, but by trying to balance
	* the nr_running on both ends we try and fill the domain at equal
	* rates, thereby first consuming cores before siblings.
	*/

	/* if the old cache has capacity, stay there */
	if (prev_stats.has_capacity && prev_stats.nr_running < this_stats.nr_running+1)
	return false;

	/* if this cache has capacity, come here */
	if (this_stats.has_capacity && this_stats.nr_running+1 < prev_stats.nr_running)
	return true;

	/*
	* Check to see if we can move the load without causing too much
	* imbalance.
	*/
	task_load = task_h_load(p);		task_load = task_h_load(p);

	this_eff_load = 100;		this_eff_load += task_load;
	this_eff_load *= prev_stats.capacity;		if (sched_feat(WA_BIAS))
			this_eff_load *= 100;
	prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;		this_eff_load *= capacity_of(prev_cpu);
	prev_eff_load *= this_stats.capacity;

	this_eff_load *= this_stats.load + task_load;		prev_eff_load -= task_load;
	prev_eff_load *= prev_stats.load - task_load;		if (sched_feat(WA_BIAS))
			prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
			prev_eff_load *= capacity_of(this_cpu);

	return this_eff_load <= prev_eff_load;		return this_eff_load <= prev_eff_load;
	}		}
	@@ -5449,22 +5420,13 @@ static int wake_affine(struct sched_domain sd, struct task_struct p,
	int prev_cpu, int sync)		int prev_cpu, int sync)
	{		{
	int this_cpu = smp_processor_id();		int this_cpu = smp_processor_id();
	bool affine;		bool affine = false;

	/*		if (sched_feat(WA_IDLE) && !affine)
	* Default to no affine wakeups; wake_affine() should not effect a task		affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync);
	* placement the load-balancer feels inclined to undo. The conservative
	* option is therefore to not move tasks when they wake up.
	*/
	affine = false;

	/*		if (sched_feat(WA_WEIGHT) && !affine)
	* If the wakeup is across cache domains, try to evaluate if movement		affine = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
	* makes sense, otherwise rely on select_idle_siblings() to do
	* placement inside the cache domain.
	*/
	if (!cpus_share_cache(prev_cpu, this_cpu))
	affine = wake_affine_llc(sd, p, this_cpu, prev_cpu, sync);

	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);		schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
	if (affine) {		if (affine) {
	@@ -7600,7 +7562,6 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq)
	*/		*/
	static inline void update_sd_lb_stats(struct lb_env env, struct sd_lb_stats sds)		static inline void update_sd_lb_stats(struct lb_env env, struct sd_lb_stats sds)
	{		{
	struct sched_domain_shared *shared = env->sd->shared;
	struct sched_domain *child = env->sd->child;		struct sched_domain *child = env->sd->child;
	struct sched_group *sg = env->sd->groups;		struct sched_group *sg = env->sd->groups;
	struct sg_lb_stats *local = &sds->local_stat;		struct sg_lb_stats *local = &sds->local_stat;
	@@ -7672,22 +7633,6 @@ static inline void update_sd_lb_stats(struct lb_env env, struct sd_lb_stats sd
	if (env->dst_rq->rd->overload != overload)		if (env->dst_rq->rd->overload != overload)
	env->dst_rq->rd->overload = overload;		env->dst_rq->rd->overload = overload;
	}		}

	if (!shared)
	return;

	/*
	* Since these are sums over groups they can contain some CPUs
	* multiple times for the NUMA domains.
	*
	* Currently only wake_affine_llc() and find_busiest_group()
	* uses these numbers, only the last is affected by this problem.
	*
	* XXX fix that.
	*/
	WRITE_ONCE(shared->nr_running, sds->total_running);
	WRITE_ONCE(shared->load, sds->total_load);
	WRITE_ONCE(shared->capacity, sds->total_capacity);
	}		}

	/**		/**
	@@ -8097,6 +8042,13 @@ static int should_we_balance(struct lb_env *env)
	struct sched_group *sg = env->sd->groups;		struct sched_group *sg = env->sd->groups;
	int cpu, balance_cpu = -1;		int cpu, balance_cpu = -1;

			/*
			* Ensure the balancing environment is consistent; can happen
			* when the softirq triggers 'during' hotplug.
			*/
			if (!cpumask_test_cpu(env->dst_cpu, env->cpus))
			return 0;

	/*		/*
	* In the newly idle case, we will allow all the cpu's		* In the newly idle case, we will allow all the cpu's
	* to do the newly idle load balance.		* to do the newly idle load balance.

kernel/sched/features.h

+3 −0

Original line number	Original line	Diff line number	Diff line
	@@ -81,3 +81,6 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
	SCHED_FEAT(LB_MIN, false)		SCHED_FEAT(LB_MIN, false)
	SCHED_FEAT(ATTACH_AGE_LOAD, true)		SCHED_FEAT(ATTACH_AGE_LOAD, true)

			SCHED_FEAT(WA_IDLE, true)
			SCHED_FEAT(WA_WEIGHT, true)
			SCHED_FEAT(WA_BIAS, true)