Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b4f436ae authored by Dietmar Eggemann's avatar Dietmar Eggemann Committed by Gerrit - the friendly Code Review server
Browse files

ANDROID: sched: Update max cpu capacity in case of max frequency constraints



Wakeup balancing uses cpu capacity awareness and needs to know the
system-wide maximum cpu capacity.

Patch "sched: Store system-wide maximum cpu capacity in root domain"
finds the system-wide maximum cpu capacity during scheduler domain
hierarchy setup. This is sufficient as long as maximum frequency
invariance is not enabled.

If it is enabled, the system-wide maximum cpu capacity can change
between scheduler domain hierarchy setups due to frequency capping.

The cpu capacity is changed in update_cpu_capacity() which is called in
load balance on the lowest scheduler domain hierarchy level. To be able
to know if a change in cpu capacity for a certain cpu also has an effect
on the system-wide maximum cpu capacity it is normally necessary to
iterate over all cpus. This would be way too costly. That's why this
patch follows a different approach.

The unsigned long max_cpu_capacity value in struct root_domain is
replaced with a struct max_cpu_capacity, containing value (the
max_cpu_capacity) and cpu (the cpu index of the cpu providing the
maximum cpu_capacity).

Changes to the system-wide maximum cpu capacity and the cpu index are
made if:

 1 System-wide maximum cpu capacity < cpu capacity
 2 System-wide maximum cpu capacity > cpu capacity and cpu index == cpu

There are no changes to the system-wide maximum cpu capacity in all
other cases.

Atomic read and write access to the pair (max_cpu_capacity.val,
max_cpu_capacity.cpu) is enforced by max_cpu_capacity.lock.

The access to max_cpu_capacity.val in task_fits_max() is still performed
without taking the max_cpu_capacity.lock.

The code to set max cpu capacity in build_sched_domains() has been
removed because the whole functionality is now provided by
update_cpu_capacity() instead.

This approach can introduce errors temporarily, e.g. in case the cpu
currently providing the max cpu capacity has its cpu capacity lowered
due to frequency capping and calls update_cpu_capacity() before any cpu
which might provide the max cpu now.

Change-Id: Idaa7a16723001e222e476de34df332558e48dd13
Signed-off-by: default avatarDietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: default avatarIonela Voinescu <ionela.voinescu@arm.com>
Git-commit: 2cc3df5e
Git-repo: https://android.googlesource.com/kernel/common/


[satyap@codeaurora.org:
1. Replace max_cpu_capacity with max_cpu_capacity.val
2. Replace pr_info() with printk_deferred()
3. Fix below warning introduced as part of the commit to
   avoid compilation issue.
   kernel/sched/fair.c:9324:30: warning: '&&' within '||' \
				[-Wlogical-op-parentheses]
]
Signed-off-by: default avatarSatya Durga Srinivasu Prabhala <satyap@codeaurora.org>
parent 76eb5bbd
Loading
Loading
Loading
Loading
+34 −3
Original line number Diff line number Diff line
@@ -7021,7 +7021,7 @@ static inline bool task_fits_capacity(struct task_struct *p,
static inline bool task_fits_max(struct task_struct *p, int cpu)
{
	unsigned long capacity = capacity_orig_of(cpu);
	unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity;
	unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity.val;

	if (capacity == max_capacity)
		return true;
@@ -7567,7 +7567,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
		return 0;

	min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val;

	/* Minimum capacity is close to max, no need to abort wake_affine */
	if (max_cap - min_cap < max_cap >> 3)
@@ -9311,10 +9311,21 @@ static unsigned long scale_rt_capacity(int cpu)
	return 1;
}

void init_max_cpu_capacity(struct max_cpu_capacity *mcc)
{
	raw_spin_lock_init(&mcc->lock);
	mcc->val = 0;
	mcc->cpu = -1;
}

static void update_cpu_capacity(struct sched_domain *sd, int cpu)
{
	unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
	struct sched_group *sdg = sd->groups;
	struct max_cpu_capacity *mcc;
	unsigned long max_capacity;
	int max_cap_cpu;
	unsigned long flags;

	capacity = min(capacity, thermal_cap(cpu));

@@ -9323,6 +9334,26 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
	capacity *= arch_scale_max_freq_capacity(sd, cpu);
	capacity >>= SCHED_CAPACITY_SHIFT;

	mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;

	raw_spin_lock_irqsave(&mcc->lock, flags);
	max_capacity = mcc->val;
	max_cap_cpu = mcc->cpu;

	if ((max_capacity > capacity && max_cap_cpu == cpu) ||
	    max_capacity < capacity) {
		mcc->val = capacity;
		mcc->cpu = cpu;
#ifdef CONFIG_SCHED_DEBUG
		raw_spin_unlock_irqrestore(&mcc->lock, flags);
		printk_deferred("CPU%d: update max cpu_capacity %lu\n",
							cpu, capacity);
		goto skip_unlock;
#endif
	}
	raw_spin_unlock_irqrestore(&mcc->lock, flags);

skip_unlock: __attribute__ ((unused));
	capacity *= scale_rt_capacity(cpu);
	capacity >>= SCHED_CAPACITY_SHIFT;

@@ -11123,7 +11154,7 @@ static inline int find_new_ilb(void)
	if (sd && (ilb >= nr_cpu_ids || !idle_cpu(ilb))) {
		if (!energy_aware() ||
				(capacity_orig_of(cpu) ==
				cpu_rq(cpu)->rd->max_cpu_capacity ||
				cpu_rq(cpu)->rd->max_cpu_capacity.val ||
				cpu_overutilized(cpu))) {
			cpumask_andnot(&cpumask, nohz.idle_cpus_mask,
					cpu_isolated_mask);
+9 −1
Original line number Diff line number Diff line
@@ -681,6 +681,12 @@ static inline bool sched_asym_prefer(int a, int b)
	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
}

struct max_cpu_capacity {
	raw_spinlock_t lock;
	unsigned long val;
	int cpu;
};

/*
 * We add the notion of a root-domain which will be used to define per-domain
 * variables. Each exclusive cpuset essentially defines an island domain by
@@ -732,7 +738,8 @@ struct root_domain {
	cpumask_var_t rto_mask;
	struct cpupri cpupri;

	unsigned long max_cpu_capacity;
	/* Maximum cpu capacity in the system. */
	struct max_cpu_capacity max_cpu_capacity;

	/* First cpu with maximum and minimum original capacity */
	int max_cap_orig_cpu, min_cap_orig_cpu;
@@ -744,6 +751,7 @@ extern struct root_domain def_root_domain;
extern struct mutex sched_domains_mutex;

extern void init_defrootdomain(void);
extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
extern int sched_init_domains(const struct cpumask *cpu_map);
extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
extern void sched_get_rd(struct root_domain *rd);
+2 −11
Original line number Diff line number Diff line
@@ -305,6 +305,8 @@ static int init_rootdomain(struct root_domain *rd)
	rd->max_cap_orig_cpu = rd->min_cap_orig_cpu = -1;
	rd->mid_cap_orig_cpu = -1;

	init_max_cpu_capacity(&rd->max_cpu_capacity);

	return 0;

free_cpudl:
@@ -1822,7 +1824,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
	enum s_alloc alloc_state;
	struct sched_domain *sd;
	struct s_data d;
	struct rq *rq = NULL;
	int i, ret = -ENOMEM;

	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
@@ -1877,13 +1878,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
		int max_cpu = READ_ONCE(d.rd->max_cap_orig_cpu);
		int min_cpu = READ_ONCE(d.rd->min_cap_orig_cpu);

		rq = cpu_rq(i);
		sd = *per_cpu_ptr(d.sd, i);

		/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
		if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
			WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);

		if ((max_cpu < 0) || (cpu_rq(i)->cpu_capacity_orig >
		    cpu_rq(max_cpu)->cpu_capacity_orig))
			WRITE_ONCE(d.rd->max_cap_orig_cpu, i);
@@ -1913,11 +1909,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
	if (!cpumask_empty(cpu_map))
		update_asym_cpucapacity(cpumask_first(cpu_map));

	if (rq && sched_debug_enabled) {
		pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
			cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
	}

	ret = 0;
error:
	__free_domain_allocs(&d, alloc_state, cpu_map);