Merge "genirq: Honour IRQ's affinity hint during migration" (feabd355) · Commits · e / devices / android_kernel_fairphone_FP4

kernel/cpu.c

+15 −0

Original line number	Diff line number	Diff line
		@@ -31,6 +31,7 @@
		#include <linux/slab.h>
		#include <linux/percpu-rwsem.h>
		#include <uapi/linux/sched/types.h>
		#include <linux/cpuset.h>

		#include <trace/events/power.h>
		#define CREATE_TRACE_POINTS
		@@ -1014,6 +1015,18 @@ static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
		{
		int err;

		/*
		* When cpusets are enabled, the rebuilding of the scheduling
		* domains is deferred to a workqueue context. Make sure
		* that the work is completed before proceeding to the next
		* hotplug. Otherwise scheduler observes an inconsistent
		* view of online and offline CPUs in the root domain. If
		* the online CPUs are still stuck in the offline (default)
		* domain, those CPUs would not be visible when scheduling
		* happens on from other CPUs in the root domain.
		*/
		cpuset_wait_for_hotplug();

		cpu_maps_update_begin();
		err = cpu_down_maps_locked(cpu, target);
		cpu_maps_update_done();
		@@ -1177,6 +1190,8 @@ static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
		return -EINVAL;
		}

		cpuset_wait_for_hotplug();

		switch_err = switch_to_rt_policy();
		if (switch_err < 0)
		return switch_err;

kernel/irq/cpuhotplug.c

+6 −2

Original line number	Diff line number	Diff line
		@@ -116,6 +116,8 @@ static bool migrate_one_irq(struct irq_desc *desc)
		affinity = &available_cpus;

		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
		const struct cpumask *default_affinity;

		/*
		* If the interrupt is managed, then shut it down and leave
		* the affinity untouched.
		@@ -125,6 +127,8 @@ static bool migrate_one_irq(struct irq_desc *desc)
		irq_shutdown(desc);
		return false;
		}

		default_affinity = desc->affinity_hint ? : irq_default_affinity;
		/*
		* The order of preference for selecting a fallback CPU is
		*
		@@ -134,9 +138,9 @@ static bool migrate_one_irq(struct irq_desc *desc)
		*/
		cpumask_andnot(&available_cpus, cpu_online_mask,
		cpu_isolated_mask);
		if (cpumask_intersects(&available_cpus, irq_default_affinity))
		if (cpumask_intersects(&available_cpus, default_affinity))
		cpumask_and(&available_cpus, &available_cpus,
		irq_default_affinity);
		default_affinity);
		else if (cpumask_empty(&available_cpus))
		affinity = cpu_online_mask;

kernel/sched/fair.c

+36 −21

Original line number	Diff line number	Diff line
		@@ -8333,7 +8333,8 @@ int can_migrate_task(struct task_struct p, struct lb_env env)
		struct root_domain *rd = env->dst_rq->rd;

		if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized) &&
		env->idle == CPU_NEWLY_IDLE) {
		env->idle == CPU_NEWLY_IDLE &&
		!task_in_related_thread_group(p)) {
		long util_cum_dst, util_cum_src;
		unsigned long demand;

		@@ -9631,6 +9632,7 @@ static inline void calculate_imbalance(struct lb_env env, struct sd_lb_stats s
		{
		unsigned long max_pull, load_above_capacity = ~0UL;
		struct sg_lb_stats local, busiest;
		bool no_imbalance = false;

		local = &sds->local_stat;
		busiest = &sds->busiest_stat;
		@@ -9650,9 +9652,11 @@ static inline void calculate_imbalance(struct lb_env env, struct sd_lb_stats s
		* factors in sg capacity and sgs with smaller group_type are
		* skipped when updating the busiest sg:
		*/
		if (busiest->group_type != group_misfit_task &&
		(busiest->avg_load <= sds->avg_load \|\|
		local->avg_load >= sds->avg_load)) {
		if (busiest->avg_load <= sds->avg_load \|\|
		local->avg_load >= sds->avg_load)
		no_imbalance = true;

		if (busiest->group_type != group_misfit_task && no_imbalance) {
		env->imbalance = 0;
		if (busiest->group_type == group_overloaded &&
		local->group_type <= group_misfit_task) {
		@@ -9677,19 +9681,35 @@ static inline void calculate_imbalance(struct lb_env env, struct sd_lb_stats s
		}

		/*
		* We're trying to get all the CPUs to the average_load, so we don't
		* want to push ourselves above the average load, nor do we wish to
		* reduce the max loaded CPU below the average load. At the same time,
		* we also don't want to reduce the group load below the group
		* capacity. Thus we look for the minimum possible imbalance.
		* In case of a misfit task, independent of avg loads we do load balance
		* at the parent sched domain level for B.L systems, so it is possible
		* that busiest group avg load can be less than sd avg load.
		* So skip calculating load based imbalance between groups.
		*/
		if (!no_imbalance) {
		/*
		* We're trying to get all the cpus to the average_load,
		* so we don't want to push ourselves above the average load,
		* nor do we wish to reduce the max loaded cpu below the average
		* load. At the same time, we also don't want to reduce the
		* group load below the group capacity.
		* Thus we look for the minimum possible imbalance.
		*/
		max_pull = min(busiest->avg_load - sds->avg_load, load_above_capacity);
		max_pull = min(busiest->avg_load - sds->avg_load,
		load_above_capacity);

		/* How much load to actually move to equalise the imbalance */
		env->imbalance = min(
		max_pull * busiest->group_capacity,
		(sds->avg_load - local->avg_load) * local->group_capacity
		) / SCHED_CAPACITY_SCALE;
		env->imbalance = min(max_pull * busiest->group_capacity,
		(sds->avg_load - local->avg_load) *
		local->group_capacity) /
		SCHED_CAPACITY_SCALE;
		} else {
		/*
		* Skipped load based imbalance calculations, but let's find
		* imbalance based on busiest group type or fix small imbalance.
		*/
		env->imbalance = 0;
		}

		/* Boost imbalance to allow misfit task to be balanced.
		* Always do this if we are doing a NEWLY_IDLE balance
		@@ -9763,7 +9783,6 @@ static struct sched_group find_busiest_group(struct lb_env env)

		if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized)) {
		int cpu_local, cpu_busiest;
		long util_cum;
		unsigned long capacity_local, capacity_busiest;

		if (env->idle != CPU_NEWLY_IDLE)
		@@ -9783,10 +9802,6 @@ static struct sched_group find_busiest_group(struct lb_env env)
		} else if (capacity_local == capacity_busiest) {
		if (cpu_rq(cpu_busiest)->nr_running < 2)
		goto out_balanced;

		util_cum = cpu_util_cum(cpu_busiest, 0);
		if (util_cum < cpu_util_cum(cpu_local, 0))
		goto out_balanced;
		}
		}
		}