Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9d219776 authored by Srivatsa Vaddagiri's avatar Srivatsa Vaddagiri Committed by Joonwoo Park
Browse files

sched: Aggregate for frequency



Related threads in a group could execute on different CPUs and hence
present a split-demand picture to cpufreq governor. IOW the governor
fails to see the net cpu demand of all related threads in a given
window if the threads's execution were to be split across CPUs. That
could result in sub-optimal frequency chosen in comparison to the
ideal frequency at which the aggregate work (taken up by related
threads) needs to be run.

This patch aggregates cpu execution stats in a window for all related
threads in a group. This helps present cpu busy time to governor as if
all related threads were part of the same thread and thus help select
the right frequency required by related threads. This aggregation
is done per-cluster.

Change-Id: I71e6047620066323721c6d542034ddd4b2950e7f
Signed-off-by: default avatarSrivatsa Vaddagiri <vatsa@codeaurora.org>
Signed-off-by: default avatarSyed Rameez Mustafa <rameezmustafa@codeaurora.org>
parent 4b378cac
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -281,6 +281,16 @@ enum task_event {
	IRQ_UPDATE	= 5,
};

/* Note: this need to be in sync with migrate_type_names array */
enum migrate_types {
	GROUP_TO_RQ,
	RQ_TO_GROUP,
	RQ_TO_RQ,
	GROUP_TO_GROUP,
};

extern const char *migrate_type_names[];

#include <linux/spinlock.h>

/*
+1 −0
Original line number Diff line number Diff line
@@ -83,6 +83,7 @@ extern unsigned int sysctl_sched_enable_thread_grouping;
#if defined(CONFIG_SCHED_FREQ_INPUT)
extern unsigned int sysctl_sched_new_task_windows;
extern unsigned int sysctl_sched_pred_alert_freq;
extern unsigned int sysctl_sched_freq_aggregate;
#endif
#endif

+81 −34
Original line number Diff line number Diff line
@@ -12,6 +12,8 @@
#include <linux/binfmts.h>

struct rq;
struct group_cpu_time;
struct migration_sum_data;
extern const char *task_event_names[];

/*
@@ -272,9 +274,10 @@ TRACE_EVENT(sched_set_boost,
TRACE_EVENT(sched_update_task_ravg,

	TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt,
		 u64 wallclock, u64 irqtime, u32 cycles, u32 exec_time),
		 u64 wallclock, u64 irqtime, u32 cycles, u32 exec_time,
		 struct group_cpu_time *cpu_time),

	TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time),
	TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time, cpu_time),

	TP_STRUCT__entry(
		__array(	char,	comm,   TASK_COMM_LEN	)
@@ -293,8 +296,12 @@ TRACE_EVENT(sched_update_task_ravg,
		__field(	 int,	cpu			)
#ifdef CONFIG_SCHED_FREQ_INPUT
		__field(unsigned int,	pred_demand		)
		__field(	u64,	cs			)
		__field(	u64,	ps			)
		__field(	u64,	rq_cs			)
		__field(	u64,	rq_ps			)
		__field(	u64,	grp_cs			)
		__field(	u64,	grp_ps			)
		__field(	u64,	grp_nt_cs			)
		__field(	u64,	grp_nt_ps			)
		__field(	u32,	curr_window		)
		__field(	u32,	prev_window		)
		__field(	u64,	nt_cs			)
@@ -321,8 +328,12 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->irqtime        = irqtime;
#ifdef CONFIG_SCHED_FREQ_INPUT
		__entry->pred_demand     = p->ravg.pred_demand;
		__entry->cs             = rq->curr_runnable_sum;
		__entry->ps             = rq->prev_runnable_sum;
		__entry->rq_cs          = rq->curr_runnable_sum;
		__entry->rq_ps          = rq->prev_runnable_sum;
		__entry->grp_cs = cpu_time ? cpu_time->curr_runnable_sum : 0;
		__entry->grp_ps = cpu_time ? cpu_time->prev_runnable_sum : 0;
		__entry->grp_nt_cs = cpu_time ? cpu_time->nt_curr_runnable_sum : 0;
		__entry->grp_nt_ps = cpu_time ? cpu_time->nt_prev_runnable_sum : 0;
		__entry->curr_window	= p->ravg.curr_window;
		__entry->prev_window	= p->ravg.prev_window;
		__entry->nt_cs		= rq->nt_curr_runnable_sum;
@@ -333,7 +344,7 @@ TRACE_EVENT(sched_update_task_ravg,

	TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
#ifdef CONFIG_SCHED_FREQ_INPUT
		" pred_demand %u cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
		" pred_demand %u rq_cs %llu rq_ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu"
#endif
		, __entry->wallclock, __entry->win_start, __entry->delta,
		task_event_names[__entry->evt], __entry->cpu,
@@ -342,10 +353,12 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->delta_m, __entry->demand,
		__entry->sum, __entry->irqtime
#ifdef CONFIG_SCHED_FREQ_INPUT
		, __entry->pred_demand, __entry->cs, __entry->ps,
		, __entry->pred_demand, __entry->rq_cs, __entry->rq_ps,
		__entry->curr_window, __entry->prev_window,
		  __entry->nt_cs, __entry->nt_ps,
		  __entry->active_windows
		  __entry->active_windows,
		__entry->grp_cs, __entry->grp_ps,
		__entry->grp_nt_cs, __entry->grp_nt_ps
#endif
		)
);
@@ -509,31 +522,62 @@ TRACE_EVENT(sched_update_pred_demand,

TRACE_EVENT(sched_migration_update_sum,

	TP_PROTO(struct rq *rq, struct task_struct *p),
	TP_PROTO(struct task_struct *p, enum migrate_types migrate_type, struct migration_sum_data *d),

	TP_ARGS(rq, p),
	TP_ARGS(p, migrate_type, d),

	TP_STRUCT__entry(
		__field(int,		cpu			)
		__field(int,		tcpu			)
		__field(int,		pid			)
		__field(	u64,	cs			)
		__field(	u64,	ps			)
		__field(	s64,	nt_cs			)
		__field(	s64,	nt_ps			)
		__field(enum migrate_types,	migrate_type	)
		__field(	s64,	src_cs			)
		__field(	s64,	src_ps			)
		__field(	s64,	dst_cs			)
		__field(	s64,	dst_ps			)
		__field(	s64,	src_nt_cs		)
		__field(	s64,	src_nt_ps		)
		__field(	s64,	dst_nt_cs		)
		__field(	s64,	dst_nt_ps		)
	),

	TP_fast_assign(
		__entry->cpu		= cpu_of(rq);
		__entry->cs		= rq->curr_runnable_sum;
		__entry->ps		= rq->prev_runnable_sum;
		__entry->nt_cs		= (s64)rq->nt_curr_runnable_sum;
		__entry->nt_ps		= (s64)rq->nt_prev_runnable_sum;
		__entry->tcpu		= task_cpu(p);
		__entry->pid		= p->pid;
	),

	TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d",
		  __entry->cpu, __entry->cs, __entry->ps,
		  __entry->nt_cs, __entry->nt_ps, __entry->pid)
		__entry->migrate_type	= migrate_type;
		__entry->src_cs		= d->src_rq ?
						d->src_rq->curr_runnable_sum :
						d->src_cpu_time->curr_runnable_sum;
		__entry->src_ps		= d->src_rq ?
						d->src_rq->prev_runnable_sum :
						d->src_cpu_time->prev_runnable_sum;
		__entry->dst_cs		= d->dst_rq ?
						d->dst_rq->curr_runnable_sum :
						d->dst_cpu_time->curr_runnable_sum;
		__entry->dst_ps		= d->dst_rq ?
						d->dst_rq->prev_runnable_sum :
						d->dst_cpu_time->prev_runnable_sum;
		__entry->src_nt_cs		= d->src_rq ?
						d->src_rq->nt_curr_runnable_sum :
						d->src_cpu_time->nt_curr_runnable_sum;
		__entry->src_nt_ps		= d->src_rq ?
						d->src_rq->nt_prev_runnable_sum :
						d->src_cpu_time->nt_prev_runnable_sum;
		__entry->dst_nt_cs		= d->dst_rq ?
						d->dst_rq->nt_curr_runnable_sum :
						d->dst_cpu_time->nt_curr_runnable_sum;
		__entry->dst_nt_ps		= d->dst_rq ?
						d->dst_rq->nt_prev_runnable_sum :
						d->dst_cpu_time->nt_prev_runnable_sum;
	),

	TP_printk("pid %d task_cpu %d migrate_type %s src_cs %llu src_ps %llu dst_cs %lld dst_ps %lld src_nt_cs %llu src_nt_ps %llu dst_nt_cs %lld dst_nt_ps %lld",
		__entry->pid, __entry->tcpu, migrate_type_names[__entry->migrate_type],
		__entry->src_cs, __entry->src_ps, __entry->dst_cs, __entry->dst_ps,
		__entry->src_nt_cs, __entry->src_nt_ps, __entry->dst_nt_cs, __entry->dst_nt_ps)
);

TRACE_EVENT(sched_get_busy,
@@ -565,15 +609,17 @@ TRACE_EVENT(sched_get_busy,

TRACE_EVENT(sched_freq_alert,

	TP_PROTO(int cpu, int pd_notif, u64 old_load, u64 new_load,
		u64 old_pred, u64 new_pred),
	TP_PROTO(int cpu, int pd_notif, int check_groups, struct rq *rq,
		u64 new_load),

	TP_ARGS(cpu, pd_notif, old_load, new_load, old_pred, new_pred),
	TP_ARGS(cpu, pd_notif, check_groups, rq, new_load),

	TP_STRUCT__entry(
		__field(	int,	cpu			)
		__field(	int,	pd_notif		)
		__field(	u64,	old_load		)
		__field(	int,	check_groups		)
		__field(	u64,	old_busy_time		)
		__field(	u64,	ps			)
		__field(	u64,	new_load		)
		__field(	u64,	old_pred		)
		__field(	u64,	new_pred		)
@@ -582,17 +628,18 @@ TRACE_EVENT(sched_freq_alert,
	TP_fast_assign(
		__entry->cpu		= cpu;
		__entry->pd_notif	= pd_notif;
		__entry->old_load	= old_load;
		__entry->check_groups	= check_groups;
		__entry->old_busy_time	= rq->old_busy_time;
		__entry->ps		= rq->prev_runnable_sum;
		__entry->new_load	= new_load;
		__entry->old_pred	= old_pred;
		__entry->new_pred	= new_pred;
		__entry->old_pred	= rq->old_estimated_time;
		__entry->new_pred	= rq->hmp_stats.pred_demands_sum;
	),

	TP_printk("cpu %d pd_notif=%d old_load=%llu new_load=%llu "
		"old_pred=%llu new_pred=%llu",
		__entry->cpu, __entry->pd_notif, __entry->old_load,
		__entry->new_load, __entry->old_pred,
		 __entry->new_pred)
	TP_printk("cpu %d pd_notif=%d check_groups %d old_busy_time=%llu prev_sum=%lld new_load=%llu old_pred=%llu new_pred=%llu",
		__entry->cpu, __entry->pd_notif, __entry->check_groups,
		__entry->old_busy_time, __entry->ps, __entry->new_load,
		__entry->old_pred, __entry->new_pred)
);

#endif	/* CONFIG_SCHED_FREQ_INPUT */
+621 −112

File changed.

Preview size limit exceeded, changes collapsed.

+18 −8
Original line number Diff line number Diff line
@@ -32,9 +32,8 @@
#include <linux/task_work.h>
#include <linux/ratelimit.h>

#include <trace/events/sched.h>

#include "sched.h"
#include <trace/events/sched.h>

/*
 * Targeted preemption latency for CPU-bound tasks:
@@ -3822,6 +3821,9 @@ static inline int invalid_value_freq_input(unsigned int *data)
	if (data == &sysctl_sched_freq_account_wait_time)
		return !(*data == 0 || *data == 1);

	if (data == &sysctl_sched_freq_aggregate)
		return !(*data == 0 || *data == 1);

	return 0;
}
#else
@@ -7262,6 +7264,7 @@ enum fbq_type { regular, remote, all };
				LBF_BIG_TASK_ACTIVE_BALANCE)
#define LBF_IGNORE_BIG_TASKS 0x100
#define LBF_IGNORE_PREFERRED_CLUSTER_TASKS 0x200
#define LBF_MOVED_RELATED_THREAD_GROUP_TASK 0x400

struct lb_env {
	struct sched_domain	*sd;
@@ -7538,6 +7541,8 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
	p->on_rq = TASK_ON_RQ_MIGRATING;
	double_lock_balance(env->src_rq, env->dst_rq);
	set_task_cpu(p, env->dst_cpu);
	if (rcu_access_pointer(p->grp))
		env->flags |= LBF_MOVED_RELATED_THREAD_GROUP_TASK;
	double_unlock_balance(env->src_rq, env->dst_rq);
}

@@ -9242,10 +9247,13 @@ no_move:

		/* Assumes one 'busiest' cpu that we pulled tasks from */
		if (!same_freq_domain(this_cpu, cpu_of(busiest))) {
			check_for_freq_change(this_rq, false);
			check_for_freq_change(busiest, false);
			int check_groups = !!(env.flags &
					 LBF_MOVED_RELATED_THREAD_GROUP_TASK);

			check_for_freq_change(this_rq, false, check_groups);
			check_for_freq_change(busiest, false, check_groups);
		} else {
			check_for_freq_change(this_rq, true);
			check_for_freq_change(this_rq, true, false);
		}
	}
	if (likely(!active_balance)) {
@@ -9546,10 +9554,12 @@ out_unlock:
	local_irq_enable();

	if (moved && !same_freq_domain(busiest_cpu, target_cpu)) {
		check_for_freq_change(busiest_rq, false);
		check_for_freq_change(target_rq, false);
		int check_groups = !!(env.flags &
					 LBF_MOVED_RELATED_THREAD_GROUP_TASK);
		check_for_freq_change(busiest_rq, false, check_groups);
		check_for_freq_change(target_rq, false, check_groups);
	} else if (moved) {
		check_for_freq_change(target_rq, true);
		check_for_freq_change(target_rq, true, false);
	}

	if (per_cpu(dbs_boost_needed, target_cpu)) {
Loading