Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eb35eebc authored by Pavankumar Kondeti's avatar Pavankumar Kondeti
Browse files

sched: walt: Optimize cpu_util() and cpu_util_cum()



The task demand in 1024 units is readily available in task_struct, so
use it directly for cumulative_runnable_avg and cum_window_demand
accounting. The cpu_util() and cpu_util_cum() functions which are
called multiple times during the task placement can return the
scaled values without doing any math.

Scaling the sum of unscaled demand of tasks is more accurate compared
to the sum of scaled demand of tasks, but it is good enough for
task placement decisions.

Change-Id: Iba4be93cd34f130bed1cb533ecaa52ab8bae5f3d
Signed-off-by: default avatarPavankumar Kondeti <pkondeti@codeaurora.org>
parent 289dd294
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -604,6 +604,7 @@ struct ravg {
	u32 pred_demand;
	u8 busy_buckets[NUM_BUSY_BUCKETS];
	u16 demand_scaled;
	u16 pred_demand_scaled;
};
#else
static inline void sched_exit(struct task_struct *p) { }
+2 −2
Original line number Diff line number Diff line
@@ -470,7 +470,7 @@ DECLARE_EVENT_CLASS(sched_cpu_load,
		__entry->nr_big_tasks		= rq->walt_stats.nr_big_tasks;
		__entry->load_scale_factor	= cpu_load_scale_factor(rq->cpu);
		__entry->capacity		= cpu_capacity(rq->cpu);
		__entry->cumulative_runnable_avg = rq->walt_stats.cumulative_runnable_avg;
		__entry->cumulative_runnable_avg = rq->walt_stats.cumulative_runnable_avg_scaled;
		__entry->irqload		= irqload;
		__entry->max_freq		= cpu_max_freq(rq->cpu);
		__entry->power_cost		= power_cost;
@@ -532,7 +532,7 @@ TRACE_EVENT(sched_load_to_gov,
		__entry->grp_rq_ps	= rq->grp_time.prev_runnable_sum;
		__entry->nt_ps		= rq->nt_prev_runnable_sum;
		__entry->grp_nt_ps	= rq->grp_time.nt_prev_runnable_sum;
		__entry->pl		= rq->walt_stats.pred_demands_sum;
		__entry->pl		= rq->walt_stats.pred_demands_sum_scaled;
		__entry->load		= load;
		__entry->big_task_rotation = big_task_rotation;
		__entry->sysctl_sched_little_cluster_coloc_fmin_khz =
+1 −1
Original line number Diff line number Diff line
@@ -759,7 +759,7 @@ do { \
	P(cluster->exec_scale_factor);
	P(walt_stats.nr_big_tasks);
	SEQ_printf(m, "  .%-30s: %llu\n", "walt_stats.cumulative_runnable_avg",
			rq->walt_stats.cumulative_runnable_avg);
			rq->walt_stats.cumulative_runnable_avg_scaled);
#endif
#undef P
#undef PN
+25 −17
Original line number Diff line number Diff line
@@ -47,7 +47,8 @@ static inline bool task_fits_max(struct task_struct *p, int cpu);
#ifdef CONFIG_SCHED_WALT

static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p,
					u32 new_task_load, u32 new_pred_demand);
					u16 updated_demand_scaled,
					u16 updated_pred_demand_scaled);
static void walt_fixup_nr_big_tasks(struct rq *rq, struct task_struct *p,
					int delta, bool inc);
#endif /* CONFIG_SCHED_WALT */
@@ -12505,22 +12506,24 @@ __init void init_sched_fair_class(void)
static void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq)
{
	cfs_rq->walt_stats.nr_big_tasks = 0;
	cfs_rq->walt_stats.cumulative_runnable_avg = 0;
	cfs_rq->walt_stats.cumulative_runnable_avg_scaled = 0;
	cfs_rq->walt_stats.pred_demands_sum = 0;
}

static void walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p)
{
	inc_nr_big_task(&cfs_rq->walt_stats, p);
	fixup_cumulative_runnable_avg(&cfs_rq->walt_stats, p->ravg.demand,
				      p->ravg.pred_demand);
	fixup_cumulative_runnable_avg(&cfs_rq->walt_stats,
				      p->ravg.demand_scaled,
				      p->ravg.pred_demand_scaled);
}

static void walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p)
{
	dec_nr_big_task(&cfs_rq->walt_stats, p);
	fixup_cumulative_runnable_avg(&cfs_rq->walt_stats, -(s64)p->ravg.demand,
				      -(s64)p->ravg.pred_demand);
	fixup_cumulative_runnable_avg(&cfs_rq->walt_stats,
				      -(s64)p->ravg.demand_scaled,
				      -(s64)p->ravg.pred_demand_scaled);
}

static void walt_inc_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
@@ -12530,12 +12533,12 @@ static void walt_inc_throttled_cfs_rq_stats(struct walt_sched_stats *stats,

	stats->nr_big_tasks += tcfs_rq->walt_stats.nr_big_tasks;
	fixup_cumulative_runnable_avg(stats,
				tcfs_rq->walt_stats.cumulative_runnable_avg,
				tcfs_rq->walt_stats.pred_demands_sum);
			tcfs_rq->walt_stats.cumulative_runnable_avg_scaled,
			tcfs_rq->walt_stats.pred_demands_sum_scaled);

	if (stats == &rq->walt_stats)
		walt_fixup_cum_window_demand(rq,
			tcfs_rq->walt_stats.cumulative_runnable_avg);
			tcfs_rq->walt_stats.cumulative_runnable_avg_scaled);

}

@@ -12546,8 +12549,8 @@ static void walt_dec_throttled_cfs_rq_stats(struct walt_sched_stats *stats,

	stats->nr_big_tasks -= tcfs_rq->walt_stats.nr_big_tasks;
	fixup_cumulative_runnable_avg(stats,
				-tcfs_rq->walt_stats.cumulative_runnable_avg,
				-tcfs_rq->walt_stats.pred_demands_sum);
			-tcfs_rq->walt_stats.cumulative_runnable_avg_scaled,
			-tcfs_rq->walt_stats.pred_demands_sum_scaled);

	/*
	 * We remove the throttled cfs_rq's tasks's contribution from the
@@ -12556,16 +12559,19 @@ static void walt_dec_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
	 */
	if (stats == &rq->walt_stats)
		walt_fixup_cum_window_demand(rq,
			-tcfs_rq->walt_stats.cumulative_runnable_avg);
			-tcfs_rq->walt_stats.cumulative_runnable_avg_scaled);
}

static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p,
				       u32 new_task_load, u32 new_pred_demand)
					u16 updated_demand_scaled,
					u16 updated_pred_demand_scaled)
{
	struct cfs_rq *cfs_rq;
	struct sched_entity *se = &p->se;
	s64 task_load_delta = (s64)new_task_load - task_load(p);
	s64 pred_demand_delta = PRED_DEMAND_DELTA;
	s64 task_load_delta = (s64)updated_demand_scaled -
			      p->ravg.demand_scaled;
	s64 pred_demand_delta = (s64)updated_pred_demand_scaled -
				p->ravg.pred_demand_scaled;

	for_each_sched_entity(se) {
		cfs_rq = cfs_rq_of(se);
@@ -12637,9 +12643,11 @@ static int task_will_be_throttled(struct task_struct *p)
#else /* CONFIG_CFS_BANDWIDTH */

static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p,
				       u32 new_task_load, u32 new_pred_demand)
					u16 updated_demand_scaled,
					u16 updated_pred_demand_scaled)
{
	fixup_walt_sched_stats_common(rq, p, new_task_load, new_pred_demand);
	fixup_walt_sched_stats_common(rq, p, updated_demand_scaled,
				      updated_pred_demand_scaled);
}

static void walt_fixup_nr_big_tasks(struct rq *rq, struct task_struct *p,
+17 −27
Original line number Diff line number Diff line
@@ -58,8 +58,8 @@ extern unsigned int walt_cpu_util_freq_divisor;

struct walt_sched_stats {
	int nr_big_tasks;
	u64 cumulative_runnable_avg;
	u64 pred_demands_sum;
	u64 cumulative_runnable_avg_scaled;
	u64 pred_demands_sum_scaled;
};

struct cpu_cycle {
@@ -882,7 +882,7 @@ struct rq {
	u64 prev_runnable_sum;
	u64 nt_curr_runnable_sum;
	u64 nt_prev_runnable_sum;
	u64 cum_window_demand;
	u64 cum_window_demand_scaled;
	struct group_cpu_time grp_time;
	struct load_subtractions load_subs[NUM_TRACKED_WINDOWS];
	DECLARE_BITMAP_ARRAY(top_tasks_bitmap,
@@ -1615,7 +1615,8 @@ struct sched_class {

#ifdef CONFIG_SCHED_WALT
	void (*fixup_walt_sched_stats)(struct rq *rq, struct task_struct *p,
				      u32 new_task_load, u32 new_pred_demand);
				       u16 updated_demand_scaled,
				       u16 updated_pred_demand_scaled);
#endif
};

@@ -1940,10 +1941,7 @@ static inline unsigned long cpu_util(int cpu)
#ifdef CONFIG_SCHED_WALT
	if (likely(!walt_disabled && sysctl_sched_use_walt_cpu_util)) {
		u64 walt_cpu_util =
				cpu_rq(cpu)->walt_stats.cumulative_runnable_avg;

		walt_cpu_util <<= SCHED_CAPACITY_SHIFT;
		do_div(walt_cpu_util, sched_ravg_window);
			cpu_rq(cpu)->walt_stats.cumulative_runnable_avg_scaled;

		return min_t(unsigned long, walt_cpu_util,
				capacity_orig_of(cpu));
@@ -1972,11 +1970,8 @@ static inline unsigned long cpu_util_cum(int cpu, int delta)
	unsigned long capacity = capacity_orig_of(cpu);

#ifdef CONFIG_SCHED_WALT
	if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
		util = cpu_rq(cpu)->cum_window_demand;
		util = div64_u64(util,
				 sched_ravg_window >> SCHED_CAPACITY_SHIFT);
	}
	if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
		util = cpu_rq(cpu)->cum_window_demand_scaled;
#endif
	delta += util;
	if (delta < 0)
@@ -2010,19 +2005,17 @@ cpu_util_freq_walt(int cpu, struct sched_walt_cpu_load *walt_load)
	if (walt_load) {
		u64 nl = cpu_rq(cpu)->nt_prev_runnable_sum +
				rq->grp_time.nt_prev_runnable_sum;
		u64 pl = rq->walt_stats.pred_demands_sum;
		u64 pl = rq->walt_stats.pred_demands_sum_scaled;

		/* do_pl_notif() needs unboosted signals */
		rq->old_busy_time = div64_u64(util_unboosted,
						sched_ravg_window >>
						SCHED_CAPACITY_SHIFT);
		rq->old_estimated_time = div64_u64(pl, sched_ravg_window >>
						SCHED_CAPACITY_SHIFT);
		rq->old_estimated_time = pl;

		nl = div64_u64(nl * (100 + boost),
		walt_cpu_util_freq_divisor);
		pl = div64_u64(pl * (100 + boost),
		walt_cpu_util_freq_divisor);
		pl = div64_u64(pl * (100 + boost), 100);

		walt_load->prev_window_util = util;
		walt_load->nl = nl;
@@ -2809,8 +2802,6 @@ struct related_thread_group *task_related_thread_group(struct task_struct *p)
	return rcu_dereference(p->grp);
}

#define PRED_DEMAND_DELTA ((s64)new_pred_demand - p->ravg.pred_demand)

/* Is frequency of two cpus synchronized with each other? */
static inline int same_freq_domain(int src_cpu, int dst_cpu)
{
@@ -2889,11 +2880,11 @@ task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
							 rq->window_start);
}

static inline void walt_fixup_cum_window_demand(struct rq *rq, s64 delta)
static inline void walt_fixup_cum_window_demand(struct rq *rq, s64 scaled_delta)
{
	rq->cum_window_demand += delta;
	if (unlikely((s64)rq->cum_window_demand < 0))
		rq->cum_window_demand = 0;
	rq->cum_window_demand_scaled += scaled_delta;
	if (unlikely((s64)rq->cum_window_demand_scaled < 0))
		rq->cum_window_demand_scaled = 0;
}

extern void update_cpu_cluster_capacity(const cpumask_t *cpus);
@@ -3036,8 +3027,6 @@ static inline int update_preferred_cluster(struct related_thread_group *grp,

static inline void add_new_task_to_grp(struct task_struct *new) {}

#define PRED_DEMAND_DELTA (0)

static inline int same_freq_domain(int src_cpu, int dst_cpu)
{
	return 1;
@@ -3051,7 +3040,8 @@ static inline int alloc_related_thread_groups(void) { return 0; }
#define trace_sched_cpu_load_cgroup(...)
#define trace_sched_cpu_load_wakeup(...)

static inline void walt_fixup_cum_window_demand(struct rq *rq, s64 delta) { }
static inline void walt_fixup_cum_window_demand(struct rq *rq,
						s64 scaled_delta) { }

static inline void update_cpu_cluster_capacity(const cpumask_t *cpus) { }

Loading