Speed up divides by cpu_power in scheduler (5517d86b) · Commits · e / devices / android_kernel_fairphone_FP4

include/linux/sched.h

+7 −1

Original line number	Diff line number	Diff line
		@@ -680,8 +680,14 @@ struct sched_group {
		/*
		* CPU power of this group, SCHED_LOAD_SCALE being max power for a
		* single CPU. This is read only (except for setup, hotplug CPU).
		* Note : Never change cpu_power without recompute its reciprocal
		*/
		unsigned long cpu_power;
		unsigned int __cpu_power;
		/*
		* reciprocal value of cpu_power to avoid expensive divides
		* (see include/linux/reciprocal_div.h)
		*/
		u32 reciprocal_cpu_power;
		};

		struct sched_domain {

kernel/sched.c

+54 −29

Original line number	Diff line number	Diff line
		@@ -52,8 +52,9 @@
		#include <linux/tsacct_kern.h>
		#include <linux/kprobes.h>
		#include <linux/delayacct.h>
		#include <asm/tlb.h>
		#include <linux/reciprocal_div.h>

		#include <asm/tlb.h>
		#include <asm/unistd.h>

		/*
		@@ -181,6 +182,27 @@ static unsigned int static_prio_timeslice(int static_prio)
		return SCALE_PRIO(DEF_TIMESLICE, static_prio);
		}

		#ifdef CONFIG_SMP
		/*
		* Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
		* Since cpu_power is a 'constant', we can use a reciprocal divide.
		*/
		static inline u32 sg_div_cpu_power(const struct sched_group *sg, u32 load)
		{
		return reciprocal_divide(load, sg->reciprocal_cpu_power);
		}

		/*
		* Each time a sched group cpu_power is changed,
		* we must compute its reciprocal value
		*/
		static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val)
		{
		sg->__cpu_power += val;
		sg->reciprocal_cpu_power = reciprocal_value(sg->__cpu_power);
		}
		#endif

		/*
		* task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
		* to time slice values: [800ms ... 100ms ... 5ms]
		@@ -1256,7 +1278,8 @@ find_idlest_group(struct sched_domain sd, struct task_struct p, int this_cpu)
		}

		/* Adjust by relative CPU power of the group */
		avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
		avg_load = sg_div_cpu_power(group,
		avg_load * SCHED_LOAD_SCALE);

		if (local_group) {
		this_load = avg_load;
		@@ -2367,12 +2390,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
		}

		total_load += avg_load;
		total_pwr += group->cpu_power;
		total_pwr += group->__cpu_power;

		/* Adjust by relative CPU power of the group */
		avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
		avg_load = sg_div_cpu_power(group,
		avg_load * SCHED_LOAD_SCALE);

		group_capacity = group->cpu_power / SCHED_LOAD_SCALE;
		group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;

		if (local_group) {
		this_load = avg_load;
		@@ -2483,8 +2507,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
		max_pull = min(max_load - avg_load, max_load - busiest_load_per_task);

		/* How much load to actually move to equalise the imbalance */
		imbalance = min(max_pull busiest->cpu_power,
		(avg_load - this_load) * this->cpu_power)
		imbalance = min(max_pull busiest->__cpu_power,
		(avg_load - this_load) * this->__cpu_power)
		/ SCHED_LOAD_SCALE;

		/*
		@@ -2518,27 +2542,28 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
		* moving them.
		*/

		pwr_now += busiest->cpu_power *
		pwr_now += busiest->__cpu_power *
		min(busiest_load_per_task, max_load);
		pwr_now += this->cpu_power *
		pwr_now += this->__cpu_power *
		min(this_load_per_task, this_load);
		pwr_now /= SCHED_LOAD_SCALE;

		/* Amount of load we'd subtract */
		tmp = busiest_load_per_task * SCHED_LOAD_SCALE /
		busiest->cpu_power;
		tmp = sg_div_cpu_power(busiest,
		busiest_load_per_task * SCHED_LOAD_SCALE);
		if (max_load > tmp)
		pwr_move += busiest->cpu_power *
		pwr_move += busiest->__cpu_power *
		min(busiest_load_per_task, max_load - tmp);

		/* Amount of load we'd add */
		if (max_load * busiest->cpu_power <
		if (max_load * busiest->__cpu_power <
		busiest_load_per_task * SCHED_LOAD_SCALE)
		tmp = max_load * busiest->cpu_power / this->cpu_power;
		tmp = sg_div_cpu_power(this,
		max_load * busiest->__cpu_power);
		else
		tmp = busiest_load_per_task * SCHED_LOAD_SCALE /
		this->cpu_power;
		pwr_move += this->cpu_power *
		tmp = sg_div_cpu_power(this,
		busiest_load_per_task * SCHED_LOAD_SCALE);
		pwr_move += this->__cpu_power *
		min(this_load_per_task, this_load + tmp);
		pwr_move /= SCHED_LOAD_SCALE;

		@@ -5501,7 +5526,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
		break;
		}

		if (!group->cpu_power) {
		if (!group->__cpu_power) {
		printk("\n");
		printk(KERN_ERR "ERROR: domain->cpu_power not "
		"set\n");
		@@ -5678,7 +5703,7 @@ init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map,
		continue;

		sg->cpumask = CPU_MASK_NONE;
		sg->cpu_power = 0;
		sg->__cpu_power = 0;

		for_each_cpu_mask(j, span) {
		if (group_fn(j, cpu_map, NULL) != group)
		@@ -6367,7 +6392,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
		continue;
		}

		sg->cpu_power += sd->groups->cpu_power;
		sg_inc_cpu_power(sg, sd->groups->__cpu_power);
		}
		sg = sg->next;
		if (sg != group_head)
		@@ -6442,6 +6467,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)

		child = sd->child;

		sd->groups->__cpu_power = 0;

		/*
		* For perf policy, if the groups in child domain share resources
		* (for example cores sharing some portions of the cache hierarchy
		@@ -6452,18 +6479,16 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
		if (!child \|\| (!(sd->flags & SD_POWERSAVINGS_BALANCE) &&
		(child->flags &
		(SD_SHARE_CPUPOWER \| SD_SHARE_PKG_RESOURCES)))) {
		sd->groups->cpu_power = SCHED_LOAD_SCALE;
		sg_inc_cpu_power(sd->groups, SCHED_LOAD_SCALE);
		return;
		}

		sd->groups->cpu_power = 0;

		/*
		* add cpu_power of each child group to this groups cpu_power
		*/
		group = child->groups;
		do {
		sd->groups->cpu_power += group->cpu_power;
		sg_inc_cpu_power(sd->groups, group->__cpu_power);
		group = group->next;
		} while (group != child->groups);
		}
		@@ -6623,7 +6648,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
		sd = &per_cpu(node_domains, j);
		sd->groups = sg;
		}
		sg->cpu_power = 0;
		sg->__cpu_power = 0;
		sg->cpumask = nodemask;
		sg->next = sg;
		cpus_or(covered, covered, nodemask);
		@@ -6651,7 +6676,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
		"Can not alloc domain group for node %d\n", j);
		goto error;
		}
		sg->cpu_power = 0;
		sg->__cpu_power = 0;
		sg->cpumask = tmp;
		sg->next = prev->next;
		cpus_or(covered, covered, tmp);