ched/core_ctl: Add multicluster awareness of misfit accounting (713997b7) · Commits · e / devices / android_kernel_xiaomi_nabu

include/linux/sched/stat.h

+0 −8

Original line number	Diff line number	Diff line
		@@ -24,20 +24,12 @@ extern void get_iowait_load(unsigned long nr_waiters, unsigned long load);

		#ifdef CONFIG_SMP
		extern void sched_update_nr_prod(int cpu, long delta, bool inc);
		extern void sched_get_nr_running_avg(int avg, int iowait_avg, int *big_avg,
		unsigned int *max_nr,
		unsigned int *big_max_nr);
		extern unsigned int sched_get_cpu_util(int cpu);
		extern u64 sched_get_cpu_last_busy_time(int cpu);
		#else
		static inline void sched_update_nr_prod(int cpu, long delta, bool inc)
		{
		}
		static inline void sched_get_nr_running_avg(int avg, int iowait_avg,
		int big_avg, unsigned int max_nr,
		unsigned int *big_max_nr)
		{
		}
		static inline unsigned int sched_get_cpu_util(int cpu)
		{
		return 0;

include/trace/events/sched.h

+40 −16

Original line number	Diff line number	Diff line
		@@ -1002,6 +1002,34 @@ TRACE_EVENT(core_ctl_set_boost,
		TP_printk("refcount=%u, ret=%d", __entry->refcount, __entry->ret)
		);

		TRACE_EVENT(core_ctl_update_nr_need,

		TP_PROTO(int cpu, int nr_need, int prev_misfit_need,
		int nrrun, int max_nr),

		TP_ARGS(cpu, nr_need, prev_misfit_need, nrrun, max_nr),

		TP_STRUCT__entry(
		__field( int, cpu)
		__field( int, nr_need)
		__field( int, prev_misfit_need)
		__field( int, nrrun)
		__field( int, max_nr)
		),

		TP_fast_assign(
		__entry->cpu = cpu;
		__entry->nr_need = nr_need;
		__entry->prev_misfit_need = prev_misfit_need;
		__entry->nrrun = nrrun;
		__entry->max_nr = max_nr;
		),

		TP_printk("cpu=%d nr_need=%d prev_misfit_need=%d nrrun=%d max_nr=%d",
		__entry->cpu, __entry->nr_need, __entry->prev_misfit_need,
		__entry->nrrun, __entry->max_nr)
		);

		/*
		* Tracepoint for schedtune_tasks_update
		*/
		@@ -1236,30 +1264,26 @@ TRACE_EVENT(sched_energy_diff,
		*/
		TRACE_EVENT(sched_get_nr_running_avg,

		TP_PROTO(int avg, int big_avg, int iowait_avg,
		unsigned int max_nr, unsigned int big_max_nr),
		TP_PROTO(int cpu, int nr, int nr_misfit, int nr_max),

		TP_ARGS(avg, big_avg, iowait_avg, max_nr, big_max_nr),
		TP_ARGS(cpu, nr, nr_misfit, nr_max),

		TP_STRUCT__entry(
		__field( int, avg )
		__field( int, big_avg )
		__field( int, iowait_avg )
		__field( unsigned int, max_nr )
		__field( unsigned int, big_max_nr )
		__field( int, cpu)
		__field( int, nr)
		__field( int, nr_misfit)
		__field( int, nr_max)
		),

		TP_fast_assign(
		__entry->avg = avg;
		__entry->big_avg = big_avg;
		__entry->iowait_avg = iowait_avg;
		__entry->max_nr = max_nr;
		__entry->big_max_nr = big_max_nr;
		__entry->cpu = cpu;
		__entry->nr = nr;
		__entry->nr_misfit = nr_misfit;
		__entry->nr_max = nr_max;
		),

		TP_printk("avg=%d big_avg=%d iowait_avg=%d max_nr=%u big_max_nr=%u",
		__entry->avg, __entry->big_avg, __entry->iowait_avg,
		__entry->max_nr, __entry->big_max_nr)
		TP_printk("cpu=%d nr=%d nr_misfit=%d nr_max=%d",
		__entry->cpu, __entry->nr, __entry->nr_misfit, __entry->nr_max)
		);

		/*

kernel/sched/core_ctl.c

+119 −27

Original line number	Diff line number	Diff line
		@@ -48,7 +48,6 @@ struct cluster_data {
		struct list_head lru;
		bool pending;
		spinlock_t pending_lock;
		bool is_big_cluster;
		bool enable;
		int nrrun;
		struct task_struct *core_ctl_thread;
		@@ -239,23 +238,6 @@ static ssize_t show_busy_down_thres(const struct cluster_data state, char buf)
		return count;
		}

		static ssize_t store_is_big_cluster(struct cluster_data *state,
		const char *buf, size_t count)
		{
		unsigned int val;

		if (sscanf(buf, "%u\n", &val) != 1)
		return -EINVAL;

		state->is_big_cluster = val ? 1 : 0;
		return count;
		}

		static ssize_t show_is_big_cluster(const struct cluster_data state, char buf)
		{
		return snprintf(buf, PAGE_SIZE, "%u\n", state->is_big_cluster);
		}

		static ssize_t store_enable(struct cluster_data *state,
		const char *buf, size_t count)
		{
		@@ -407,7 +389,6 @@ core_ctl_attr_rw(offline_delay_ms);
		core_ctl_attr_rw(busy_up_thres);
		core_ctl_attr_rw(busy_down_thres);
		core_ctl_attr_rw(task_thres);
		core_ctl_attr_rw(is_big_cluster);
		core_ctl_attr_ro(need_cpus);
		core_ctl_attr_ro(active_cpus);
		core_ctl_attr_ro(global_state);
		@@ -421,7 +402,6 @@ static struct attribute *default_attrs[] = {
		&busy_up_thres.attr,
		&busy_down_thres.attr,
		&task_thres.attr,
		&is_big_cluster.attr,
		&enable.attr,
		&need_cpus.attr,
		&active_cpus.attr,
		@@ -469,26 +449,138 @@ static struct kobj_type ktype_core_ctl = {

		/* ==================== runqueue based core count =================== */

		static struct sched_avg_stats nr_stats[NR_CPUS];

		/*
		* nr_need:
		* Number of tasks running on this cluster plus
		* tasks running on higher capacity clusters.
		* To find out CPUs needed from this cluster.
		*
		* For example:
		* On dual cluster system with 4 min capacity
		* CPUs and 4 max capacity CPUs, if there are
		* 4 small tasks running on min capacity CPUs
		* and 2 big tasks running on 2 max capacity
		* CPUs, nr_need has to be 6 for min capacity
		* cluster and 2 for max capacity cluster.
		* This is because, min capacity cluster has to
		* account for tasks running on max capacity
		* cluster, so that, the min capacity cluster
		* can be ready to accommodate tasks running on max
		* capacity CPUs if the demand of tasks goes down.
		*/
		static int compute_cluster_nr_need(int index)
		{
		int cpu;
		struct cluster_data *cluster;
		int nr_need = 0;

		for_each_cluster(cluster, index) {
		for_each_cpu(cpu, &cluster->cpu_mask)
		nr_need += nr_stats[cpu].nr;
		}

		return nr_need;
		}

		/*
		* prev_misfit_need:
		* Tasks running on smaller capacity cluster which
		* needs to be migrated to higher capacity cluster.
		* To find out how many tasks need higher capacity CPUs.
		*
		* For example:
		* On dual cluster system with 4 min capacity
		* CPUs and 4 max capacity CPUs, if there are
		* 2 small tasks and 2 big tasks running on
		* min capacity CPUs and no tasks running on
		* max cpacity, prev_misfit_need of min capacity
		* cluster will be 0 and prev_misfit_need of
		* max capacity cluster will be 2.
		*/
		static int compute_prev_cluster_misfit_need(int index)
		{
		int cpu;
		struct cluster_data *prev_cluster;
		int prev_misfit_need = 0;

		/*
		* Lowest capacity cluster does not have to
		* accommodate any misfit tasks.
		*/
		if (index == 0)
		return 0;

		prev_cluster = &cluster_state[index - 1];

		for_each_cpu(cpu, &prev_cluster->cpu_mask)
		prev_misfit_need += nr_stats[cpu].nr_misfit;

		return prev_misfit_need;
		}

		static int compute_cluster_max_nr(int index)
		{
		int cpu;
		struct cluster_data *cluster = &cluster_state[index];
		int max_nr = 0;

		for_each_cpu(cpu, &cluster->cpu_mask)
		max_nr = max(max_nr, nr_stats[cpu].nr_max);

		return max_nr;
		}

		static int cluster_real_big_tasks(int index)
		{
		int nr_big = 0;
		int cpu;
		struct cluster_data *cluster = &cluster_state[index];

		if (index == 0) {
		for_each_cpu(cpu, &cluster->cpu_mask)
		nr_big += nr_stats[cpu].nr_misfit;
		} else {
		for_each_cpu(cpu, &cluster->cpu_mask)
		nr_big += nr_stats[cpu].nr;
		}

		return nr_big;
		}

		static void update_running_avg(void)
		{
		int avg, iowait_avg, big_avg;
		int max_nr, big_max_nr;
		struct cluster_data *cluster;
		unsigned int index = 0;
		unsigned long flags;
		int big_avg = 0;

		sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg,
		&max_nr, &big_max_nr);
		walt_rotation_checkpoint(big_avg);
		sched_get_nr_running_avg(nr_stats);

		spin_lock_irqsave(&state_lock, flags);
		for_each_cluster(cluster, index) {
		int nr_need, prev_misfit_need;

		if (!cluster->inited)
		continue;
		cluster->nrrun = cluster->is_big_cluster ? big_avg : avg;
		cluster->max_nr = cluster->is_big_cluster ? big_max_nr : max_nr;

		nr_need = compute_cluster_nr_need(index);
		prev_misfit_need = compute_prev_cluster_misfit_need(index);


		cluster->nrrun = nr_need + prev_misfit_need;
		cluster->max_nr = compute_cluster_max_nr(index);

		trace_core_ctl_update_nr_need(cluster->first_cpu, nr_need,
		prev_misfit_need,
		cluster->nrrun, cluster->max_nr);

		big_avg += cluster_real_big_tasks(index);
		}
		spin_unlock_irqrestore(&state_lock, flags);

		walt_rotation_checkpoint(big_avg);
		}

		#define MAX_NR_THRESHOLD 4

kernel/sched/sched.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -3047,3 +3047,10 @@ static inline bool energy_aware(void)
		{
		return sched_feat(ENERGY_AWARE);
		}

		struct sched_avg_stats {
		int nr;
		int nr_misfit;
		int nr_max;
		};
		extern void sched_get_nr_running_avg(struct sched_avg_stats *stats);

kernel/sched/sched_avg.c

+28 −48

Original line number	Diff line number	Diff line
		@@ -35,7 +35,8 @@ static s64 last_get_time;

		static DEFINE_PER_CPU(atomic64_t, last_busy_time) = ATOMIC64_INIT(0);

		#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
		#define NR_THRESHOLD_PCT 85

		/**
		* sched_get_nr_running_avg
		* @return: Average nr_running, iowait and nr_big_tasks value since last poll.
		@@ -45,80 +46,59 @@ static DEFINE_PER_CPU(atomic64_t, last_busy_time) = ATOMIC64_INIT(0);
		* Obtains the average nr_running value since the last poll.
		* This function may not be called concurrently with itself
		*/
		void sched_get_nr_running_avg(int avg, int iowait_avg, int *big_avg,
		unsigned int max_nr, unsigned int big_max_nr)
		void sched_get_nr_running_avg(struct sched_avg_stats *stats)
		{
		int cpu;
		u64 curr_time = sched_clock();
		u64 diff = curr_time - last_get_time;
		u64 tmp_avg = 0, tmp_iowait = 0, tmp_big_avg = 0;

		*avg = 0;
		*iowait_avg = 0;
		*big_avg = 0;
		*max_nr = 0;
		*big_max_nr = 0;
		u64 period = curr_time - last_get_time;
		u64 tmp_nr, tmp_misfit;

		if (!diff)
		if (!period)
		return;

		/* read and reset nr_running counts */
		for_each_possible_cpu(cpu) {
		unsigned long flags;
		u64 diff;

		spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
		curr_time = sched_clock();
		diff = curr_time - per_cpu(last_time, cpu);
		BUG_ON((s64)diff < 0);

		tmp_avg += per_cpu(nr_prod_sum, cpu);
		tmp_avg += per_cpu(nr, cpu) * diff;
		tmp_nr = per_cpu(nr_prod_sum, cpu);
		tmp_nr += per_cpu(nr, cpu) * diff;
		tmp_nr = div64_u64((tmp_nr * 100), period);

		tmp_misfit = per_cpu(nr_big_prod_sum, cpu);
		tmp_misfit += walt_big_tasks(cpu) * diff;
		tmp_misfit = div64_u64((tmp_misfit * 100), period);

		tmp_big_avg += per_cpu(nr_big_prod_sum, cpu);
		tmp_big_avg += nr_eligible_big_tasks(cpu) * diff;
		/*
		* NR_THRESHOLD_PCT is to make sure that the task ran
		* at least 15% in the last window to compensate any
		* over estimating being done.
		*/
		stats[cpu].nr = (int)div64_u64((tmp_nr + NR_THRESHOLD_PCT),
		100);
		stats[cpu].nr_misfit = (int)div64_u64((tmp_misfit +
		NR_THRESHOLD_PCT), 100);
		stats[cpu].nr_max = per_cpu(nr_max, cpu);

		tmp_iowait += per_cpu(iowait_prod_sum, cpu);
		tmp_iowait += nr_iowait_cpu(cpu) * diff;
		trace_sched_get_nr_running_avg(cpu, stats[cpu].nr,
		stats[cpu].nr_misfit, stats[cpu].nr_max);

		per_cpu(last_time, cpu) = curr_time;

		per_cpu(nr_prod_sum, cpu) = 0;
		per_cpu(nr_big_prod_sum, cpu) = 0;
		per_cpu(iowait_prod_sum, cpu) = 0;

		if (*max_nr < per_cpu(nr_max, cpu))
		*max_nr = per_cpu(nr_max, cpu);

		if (!is_min_capacity_cpu(cpu)) {
		if (*big_max_nr < per_cpu(nr_max, cpu))
		*big_max_nr = per_cpu(nr_max, cpu);
		}

		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);

		spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
		}

		diff = curr_time - last_get_time;
		last_get_time = curr_time;

		/*
		* Any task running on BIG cluster and BIG tasks running on little
		* cluster contributes to big_avg. Small or medium tasks can also
		* run on BIG cluster when co-location and scheduler boost features
		* are activated. We don't want these tasks to downmigrate to little
		* cluster when BIG CPUs are available but isolated. Round up the
		* average values so that core_ctl aggressively unisolate BIG CPUs.
		*/
		*avg = (int)DIV64_U64_ROUNDUP(tmp_avg, diff);
		*big_avg = (int)DIV64_U64_ROUNDUP(tmp_big_avg, diff);
		*iowait_avg = (int)DIV64_U64_ROUNDUP(tmp_iowait, diff);

		trace_sched_get_nr_running_avg(avg, big_avg, *iowait_avg,
		max_nr, big_max_nr);

		BUG_ON(avg < 0 \|\| big_avg < 0 \|\| *iowait_avg < 0);
		pr_debug("%s - avg:%d big_avg:%d iowait_avg:%d\n",
		__func__, avg, big_avg, *iowait_avg);
		}
		EXPORT_SYMBOL(sched_get_nr_running_avg);

		@@ -174,7 +154,7 @@ void sched_update_nr_prod(int cpu, long delta, bool inc)
		update_last_busy_time(cpu, !inc, nr_running, curr_time);

		per_cpu(nr_prod_sum, cpu) += nr_running * diff;
		per_cpu(nr_big_prod_sum, cpu) += nr_eligible_big_tasks(cpu) * diff;
		per_cpu(nr_big_prod_sum, cpu) += walt_big_tasks(cpu) * diff;
		per_cpu(iowait_prod_sum, cpu) += nr_iowait_cpu(cpu) * diff;
		spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
		}