Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 713997b7 authored by Satya Durga Srinivasu Prabhala's avatar Satya Durga Srinivasu Prabhala
Browse files

ched/core_ctl: Add multicluster awareness of misfit accounting



Currently, the misfit task accounting isn't multicluster aware.
It only picks out misfits on the smallest cluster.
Fix this so that we have per cpu and per cluster accounting.
While at it, update the code to consume this data and unisolate
a cpu if there is a misfit task in prev cluster.

Change-Id: I9a2c30bf2c9da3ace1ffd7ea22d6c6e22bbddacc
Signed-off-by: default avatarPavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: default avatarSatya Durga Srinivasu Prabhala <satyap@codeaurora.org>
parent 3afeb98f
Loading
Loading
Loading
Loading
+0 −8
Original line number Diff line number Diff line
@@ -24,20 +24,12 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);

#ifdef CONFIG_SMP
extern void sched_update_nr_prod(int cpu, long delta, bool inc);
extern void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg,
					unsigned int *max_nr,
					unsigned int *big_max_nr);
extern unsigned int sched_get_cpu_util(int cpu);
extern u64 sched_get_cpu_last_busy_time(int cpu);
#else
static inline void sched_update_nr_prod(int cpu, long delta, bool inc)
{
}
static inline void sched_get_nr_running_avg(int *avg, int *iowait_avg,
				int *big_avg, unsigned int *max_nr,
				unsigned int *big_max_nr)
{
}
static inline unsigned int sched_get_cpu_util(int cpu)
{
	return 0;
+40 −16
Original line number Diff line number Diff line
@@ -1002,6 +1002,34 @@ TRACE_EVENT(core_ctl_set_boost,
	TP_printk("refcount=%u, ret=%d", __entry->refcount, __entry->ret)
);

TRACE_EVENT(core_ctl_update_nr_need,

	TP_PROTO(int cpu, int nr_need, int prev_misfit_need,
		int nrrun, int max_nr),

	TP_ARGS(cpu, nr_need, prev_misfit_need, nrrun, max_nr),

	TP_STRUCT__entry(
		__field( int, cpu)
		__field( int, nr_need)
		__field( int, prev_misfit_need)
		__field( int, nrrun)
		__field( int, max_nr)
	),

	TP_fast_assign(
		__entry->cpu = cpu;
		__entry->nr_need = nr_need;
		__entry->prev_misfit_need = prev_misfit_need;
		__entry->nrrun = nrrun;
		__entry->max_nr = max_nr;
	),

	TP_printk("cpu=%d nr_need=%d prev_misfit_need=%d nrrun=%d max_nr=%d",
		__entry->cpu, __entry->nr_need, __entry->prev_misfit_need,
		__entry->nrrun, __entry->max_nr)
);

/*
 * Tracepoint for schedtune_tasks_update
 */
@@ -1236,30 +1264,26 @@ TRACE_EVENT(sched_energy_diff,
 */
TRACE_EVENT(sched_get_nr_running_avg,

	TP_PROTO(int avg, int big_avg, int iowait_avg,
		unsigned int max_nr, unsigned int big_max_nr),
	TP_PROTO(int cpu, int nr, int nr_misfit, int nr_max),

	TP_ARGS(avg, big_avg, iowait_avg, max_nr, big_max_nr),
	TP_ARGS(cpu, nr, nr_misfit, nr_max),

	TP_STRUCT__entry(
		__field( int,   avg                     )
		__field( int,   big_avg                 )
		__field( int,   iowait_avg              )
		__field( unsigned int,  max_nr          )
		__field( unsigned int,  big_max_nr      )
		__field( int, cpu)
		__field( int, nr)
		__field( int, nr_misfit)
		__field( int, nr_max)
	),

	TP_fast_assign(
		__entry->avg            = avg;
		__entry->big_avg        = big_avg;
		__entry->iowait_avg     = iowait_avg;
		__entry->max_nr         = max_nr;
		__entry->big_max_nr     = big_max_nr;
		__entry->cpu = cpu;
		__entry->nr = nr;
		__entry->nr_misfit = nr_misfit;
		__entry->nr_max = nr_max;
	),

	TP_printk("avg=%d big_avg=%d iowait_avg=%d max_nr=%u big_max_nr=%u",
		__entry->avg, __entry->big_avg, __entry->iowait_avg,
		__entry->max_nr, __entry->big_max_nr)
	TP_printk("cpu=%d nr=%d nr_misfit=%d nr_max=%d",
		__entry->cpu, __entry->nr, __entry->nr_misfit, __entry->nr_max)
);

/*
+119 −27
Original line number Diff line number Diff line
@@ -48,7 +48,6 @@ struct cluster_data {
	struct list_head lru;
	bool pending;
	spinlock_t pending_lock;
	bool is_big_cluster;
	bool enable;
	int nrrun;
	struct task_struct *core_ctl_thread;
@@ -239,23 +238,6 @@ static ssize_t show_busy_down_thres(const struct cluster_data *state, char *buf)
	return count;
}

static ssize_t store_is_big_cluster(struct cluster_data *state,
				const char *buf, size_t count)
{
	unsigned int val;

	if (sscanf(buf, "%u\n", &val) != 1)
		return -EINVAL;

	state->is_big_cluster = val ? 1 : 0;
	return count;
}

static ssize_t show_is_big_cluster(const struct cluster_data *state, char *buf)
{
	return snprintf(buf, PAGE_SIZE, "%u\n", state->is_big_cluster);
}

static ssize_t store_enable(struct cluster_data *state,
				const char *buf, size_t count)
{
@@ -407,7 +389,6 @@ core_ctl_attr_rw(offline_delay_ms);
core_ctl_attr_rw(busy_up_thres);
core_ctl_attr_rw(busy_down_thres);
core_ctl_attr_rw(task_thres);
core_ctl_attr_rw(is_big_cluster);
core_ctl_attr_ro(need_cpus);
core_ctl_attr_ro(active_cpus);
core_ctl_attr_ro(global_state);
@@ -421,7 +402,6 @@ static struct attribute *default_attrs[] = {
	&busy_up_thres.attr,
	&busy_down_thres.attr,
	&task_thres.attr,
	&is_big_cluster.attr,
	&enable.attr,
	&need_cpus.attr,
	&active_cpus.attr,
@@ -469,26 +449,138 @@ static struct kobj_type ktype_core_ctl = {

/* ==================== runqueue based core count =================== */

static struct sched_avg_stats nr_stats[NR_CPUS];

/*
 * nr_need:
 *   Number of tasks running on this cluster plus
 *   tasks running on higher capacity clusters.
 *   To find out CPUs needed from this cluster.
 *
 * For example:
 *   On dual cluster system with 4 min capacity
 *   CPUs and 4 max capacity CPUs, if there are
 *   4 small tasks running on min capacity CPUs
 *   and 2 big tasks running on 2 max capacity
 *   CPUs, nr_need has to be 6 for min capacity
 *   cluster and 2 for max capacity cluster.
 *   This is because, min capacity cluster has to
 *   account for tasks running on max capacity
 *   cluster, so that, the min capacity cluster
 *   can be ready to accommodate tasks running on max
 *   capacity CPUs if the demand of tasks goes down.
 */
static int compute_cluster_nr_need(int index)
{
	int cpu;
	struct cluster_data *cluster;
	int nr_need = 0;

	for_each_cluster(cluster, index) {
		for_each_cpu(cpu, &cluster->cpu_mask)
			nr_need += nr_stats[cpu].nr;
	}

	return nr_need;
}

/*
 * prev_misfit_need:
 *   Tasks running on smaller capacity cluster which
 *   needs to be migrated to higher capacity cluster.
 *   To find out how many tasks need higher capacity CPUs.
 *
 * For example:
 *   On dual cluster system with 4 min capacity
 *   CPUs and 4 max capacity CPUs, if there are
 *   2 small tasks and 2 big tasks running on
 *   min capacity CPUs and no tasks running on
 *   max cpacity, prev_misfit_need of min capacity
 *   cluster will be 0 and prev_misfit_need of
 *   max capacity cluster will be 2.
 */
static int compute_prev_cluster_misfit_need(int index)
{
	int cpu;
	struct cluster_data *prev_cluster;
	int prev_misfit_need = 0;

	/*
	 * Lowest capacity cluster does not have to
	 * accommodate any misfit tasks.
	 */
	if (index == 0)
		return 0;

	prev_cluster = &cluster_state[index - 1];

	for_each_cpu(cpu, &prev_cluster->cpu_mask)
		prev_misfit_need += nr_stats[cpu].nr_misfit;

	return prev_misfit_need;
}

static int compute_cluster_max_nr(int index)
{
	int cpu;
	struct cluster_data *cluster = &cluster_state[index];
	int max_nr = 0;

	for_each_cpu(cpu, &cluster->cpu_mask)
		max_nr = max(max_nr, nr_stats[cpu].nr_max);

	return max_nr;
}

static int cluster_real_big_tasks(int index)
{
	int nr_big = 0;
	int cpu;
	struct cluster_data *cluster = &cluster_state[index];

	if (index == 0) {
		for_each_cpu(cpu, &cluster->cpu_mask)
			nr_big += nr_stats[cpu].nr_misfit;
	} else {
		for_each_cpu(cpu, &cluster->cpu_mask)
			nr_big += nr_stats[cpu].nr;
	}

	return nr_big;
}

static void update_running_avg(void)
{
	int avg, iowait_avg, big_avg;
	int max_nr, big_max_nr;
	struct cluster_data *cluster;
	unsigned int index = 0;
	unsigned long flags;
	int big_avg = 0;

	sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg,
				 &max_nr, &big_max_nr);
	walt_rotation_checkpoint(big_avg);
	sched_get_nr_running_avg(nr_stats);

	spin_lock_irqsave(&state_lock, flags);
	for_each_cluster(cluster, index) {
		int nr_need, prev_misfit_need;

		if (!cluster->inited)
			continue;
		cluster->nrrun = cluster->is_big_cluster ? big_avg : avg;
		cluster->max_nr = cluster->is_big_cluster ? big_max_nr : max_nr;

		nr_need = compute_cluster_nr_need(index);
		prev_misfit_need = compute_prev_cluster_misfit_need(index);


		cluster->nrrun = nr_need + prev_misfit_need;
		cluster->max_nr = compute_cluster_max_nr(index);

		trace_core_ctl_update_nr_need(cluster->first_cpu, nr_need,
					prev_misfit_need,
					cluster->nrrun, cluster->max_nr);

		big_avg += cluster_real_big_tasks(index);
	}
	spin_unlock_irqrestore(&state_lock, flags);

	walt_rotation_checkpoint(big_avg);
}

#define MAX_NR_THRESHOLD	4
+7 −0
Original line number Diff line number Diff line
@@ -3047,3 +3047,10 @@ static inline bool energy_aware(void)
{
	return sched_feat(ENERGY_AWARE);
}

struct sched_avg_stats {
	int nr;
	int nr_misfit;
	int nr_max;
};
extern void sched_get_nr_running_avg(struct sched_avg_stats *stats);
+28 −48
Original line number Diff line number Diff line
@@ -35,7 +35,8 @@ static s64 last_get_time;

static DEFINE_PER_CPU(atomic64_t, last_busy_time) = ATOMIC64_INIT(0);

#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
#define NR_THRESHOLD_PCT		85

/**
 * sched_get_nr_running_avg
 * @return: Average nr_running, iowait and nr_big_tasks value since last poll.
@@ -45,80 +46,59 @@ static DEFINE_PER_CPU(atomic64_t, last_busy_time) = ATOMIC64_INIT(0);
 * Obtains the average nr_running value since the last poll.
 * This function may not be called concurrently with itself
 */
void sched_get_nr_running_avg(int *avg, int *iowait_avg, int *big_avg,
			      unsigned int *max_nr, unsigned int *big_max_nr)
void sched_get_nr_running_avg(struct sched_avg_stats *stats)
{
	int cpu;
	u64 curr_time = sched_clock();
	u64 diff = curr_time - last_get_time;
	u64 tmp_avg = 0, tmp_iowait = 0, tmp_big_avg = 0;

	*avg = 0;
	*iowait_avg = 0;
	*big_avg = 0;
	*max_nr = 0;
	*big_max_nr = 0;
	u64 period = curr_time - last_get_time;
	u64 tmp_nr, tmp_misfit;

	if (!diff)
	if (!period)
		return;

	/* read and reset nr_running counts */
	for_each_possible_cpu(cpu) {
		unsigned long flags;
		u64 diff;

		spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
		curr_time = sched_clock();
		diff = curr_time - per_cpu(last_time, cpu);
		BUG_ON((s64)diff < 0);

		tmp_avg += per_cpu(nr_prod_sum, cpu);
		tmp_avg += per_cpu(nr, cpu) * diff;
		tmp_nr = per_cpu(nr_prod_sum, cpu);
		tmp_nr += per_cpu(nr, cpu) * diff;
		tmp_nr = div64_u64((tmp_nr * 100), period);

		tmp_misfit = per_cpu(nr_big_prod_sum, cpu);
		tmp_misfit += walt_big_tasks(cpu) * diff;
		tmp_misfit = div64_u64((tmp_misfit * 100), period);

		tmp_big_avg += per_cpu(nr_big_prod_sum, cpu);
		tmp_big_avg += nr_eligible_big_tasks(cpu) * diff;
		/*
		 * NR_THRESHOLD_PCT is to make sure that the task ran
		 * at least 15% in the last window to compensate any
		 * over estimating being done.
		 */
		stats[cpu].nr = (int)div64_u64((tmp_nr + NR_THRESHOLD_PCT),
								100);
		stats[cpu].nr_misfit = (int)div64_u64((tmp_misfit +
						NR_THRESHOLD_PCT), 100);
		stats[cpu].nr_max = per_cpu(nr_max, cpu);

		tmp_iowait += per_cpu(iowait_prod_sum, cpu);
		tmp_iowait +=  nr_iowait_cpu(cpu) * diff;
		trace_sched_get_nr_running_avg(cpu, stats[cpu].nr,
				stats[cpu].nr_misfit, stats[cpu].nr_max);

		per_cpu(last_time, cpu) = curr_time;

		per_cpu(nr_prod_sum, cpu) = 0;
		per_cpu(nr_big_prod_sum, cpu) = 0;
		per_cpu(iowait_prod_sum, cpu) = 0;

		if (*max_nr < per_cpu(nr_max, cpu))
			*max_nr = per_cpu(nr_max, cpu);

		if (!is_min_capacity_cpu(cpu)) {
			if (*big_max_nr < per_cpu(nr_max, cpu))
				*big_max_nr = per_cpu(nr_max, cpu);
		}

		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);

		spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
	}

	diff = curr_time - last_get_time;
	last_get_time = curr_time;

	/*
	 * Any task running on BIG cluster and BIG tasks running on little
	 * cluster contributes to big_avg. Small or medium tasks can also
	 * run on BIG cluster when co-location and scheduler boost features
	 * are activated. We don't want these tasks to downmigrate to little
	 * cluster when BIG CPUs are available but isolated. Round up the
	 * average values so that core_ctl aggressively unisolate BIG CPUs.
	 */
	*avg = (int)DIV64_U64_ROUNDUP(tmp_avg, diff);
	*big_avg = (int)DIV64_U64_ROUNDUP(tmp_big_avg, diff);
	*iowait_avg = (int)DIV64_U64_ROUNDUP(tmp_iowait, diff);

	trace_sched_get_nr_running_avg(*avg, *big_avg, *iowait_avg,
				       *max_nr, *big_max_nr);

	BUG_ON(*avg < 0 || *big_avg < 0 || *iowait_avg < 0);
	pr_debug("%s - avg:%d big_avg:%d iowait_avg:%d\n",
				 __func__, *avg, *big_avg, *iowait_avg);
}
EXPORT_SYMBOL(sched_get_nr_running_avg);

@@ -174,7 +154,7 @@ void sched_update_nr_prod(int cpu, long delta, bool inc)
	update_last_busy_time(cpu, !inc, nr_running, curr_time);

	per_cpu(nr_prod_sum, cpu) += nr_running * diff;
	per_cpu(nr_big_prod_sum, cpu) += nr_eligible_big_tasks(cpu) * diff;
	per_cpu(nr_big_prod_sum, cpu) += walt_big_tasks(cpu) * diff;
	per_cpu(iowait_prod_sum, cpu) += nr_iowait_cpu(cpu) * diff;
	spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
}
Loading