Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 26c21548 authored by Srivatsa Vaddagiri's avatar Srivatsa Vaddagiri Committed by Dmitry Shmidt
Browse files

ANDROID: sched: Introduce Window Assisted Load Tracking (WALT)



use a window based view of time in order to track task
demand and CPU utilization in the scheduler.

Window Assisted Load Tracking (WALT) implementation credits:
 Srivatsa Vaddagiri, Steve Muckle, Syed Rameez Mustafa, Joonwoo Park,
 Pavan Kumar Kondeti, Olav Haugan

2016-03-06: Integration with EAS/refactoring by Vikram Mulukutla
            and Todd Kjos

Change-Id: I21408236836625d4e7d7de1843d20ed5ff36c708

Includes fixes for issues:

eas/walt: Use walt_ktime_clock() instead of ktime_get_ns() to avoid a
race resulting in watchdog resets
BUG: 29353986
Change-Id: Ic1820e22a136f7c7ebd6f42e15f14d470f6bbbdb

Handle walt accounting anomoly during resume

During resume, there is a corner case where on wakeup, a task's
prev_runnable_sum can go negative. This is a workaround that
fixes the condition and warns (instead of crashing).

BUG: 29464099
Change-Id: I173e7874324b31a3584435530281708145773508

Signed-off-by: default avatarTodd Kjos <tkjos@google.com>
Signed-off-by: default avatarSrinath Sridharan <srinathsr@google.com>
Signed-off-by: default avatarJuri Lelli <juri.lelli@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: default avatarJohn Stultz <john.stultz@linaro.org>
Signed-off-by: default avatarAndres Oportus <andresoportus@google.com>
parent d5563d3a
Loading
Loading
Loading
Loading
+53 −0
Original line number Diff line number Diff line
@@ -318,6 +318,15 @@ extern char ___assert_task_state[1 - 2*!!(
/* Task command name length */
#define TASK_COMM_LEN 16

enum task_event {
	PUT_PREV_TASK   = 0,
	PICK_NEXT_TASK  = 1,
	TASK_WAKE       = 2,
	TASK_MIGRATE    = 3,
	TASK_UPDATE     = 4,
	IRQ_UPDATE	= 5,
};

#include <linux/spinlock.h>

/*
@@ -1368,6 +1377,41 @@ struct sched_statistics {
};
#endif

#ifdef CONFIG_SCHED_WALT
#define RAVG_HIST_SIZE_MAX  5

/* ravg represents frequency scaled cpu-demand of tasks */
struct ravg {
	/*
	 * 'mark_start' marks the beginning of an event (task waking up, task
	 * starting to execute, task being preempted) within a window
	 *
	 * 'sum' represents how runnable a task has been within current
	 * window. It incorporates both running time and wait time and is
	 * frequency scaled.
	 *
	 * 'sum_history' keeps track of history of 'sum' seen over previous
	 * RAVG_HIST_SIZE windows. Windows where task was entirely sleeping are
	 * ignored.
	 *
	 * 'demand' represents maximum sum seen over previous
	 * sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency
	 * demand for tasks.
	 *
	 * 'curr_window' represents task's contribution to cpu busy time
	 * statistics (rq->curr_runnable_sum) in current window
	 *
	 * 'prev_window' represents task's contribution to cpu busy time
	 * statistics (rq->prev_runnable_sum) in previous window
	 */
	u64 mark_start;
	u32 sum, demand;
	u32 sum_history[RAVG_HIST_SIZE_MAX];
	u32 curr_window, prev_window;
	u16 active_windows;
};
#endif

struct sched_entity {
	struct load_weight	load;		/* for load-balancing */
	struct rb_node		run_node;
@@ -1538,6 +1582,15 @@ struct task_struct {
	const struct sched_class *sched_class;
	struct sched_entity se;
	struct sched_rt_entity rt;
#ifdef CONFIG_SCHED_WALT
	struct ravg ravg;
	/*
	 * 'init_load_pct' represents the initial task load assigned to children
	 * of this task
	 */
	u32 init_load_pct;
#endif

#ifdef CONFIG_CGROUP_SCHED
	struct task_group *sched_task_group;
#endif
+5 −0
Original line number Diff line number Diff line
@@ -22,6 +22,11 @@ extern unsigned int sysctl_sched_is_big_little;
extern unsigned int sysctl_sched_sync_hint_enable;
extern unsigned int sysctl_sched_initial_task_util;
extern unsigned int sysctl_sched_cstate_aware;
#ifdef CONFIG_SCHED_WALT
extern unsigned int sysctl_sched_use_walt_cpu_util;
extern unsigned int sysctl_sched_use_walt_task_util;
extern unsigned int sysctl_sched_walt_init_task_load_pct;
#endif

enum sched_tunable_scaling {
	SCHED_TUNABLESCALING_NONE,
+149 −0
Original line number Diff line number Diff line
@@ -912,6 +912,155 @@ TRACE_EVENT(sched_tune_filter,
		__entry->payoff, __entry->region)
);

#ifdef CONFIG_SCHED_WALT
struct rq;

TRACE_EVENT(walt_update_task_ravg,

	TP_PROTO(struct task_struct *p, struct rq *rq, int evt,
						u64 wallclock, u64 irqtime),

	TP_ARGS(p, rq, evt, wallclock, irqtime),

	TP_STRUCT__entry(
		__array(	char,	comm,   TASK_COMM_LEN	)
		__field(	pid_t,	pid			)
		__field(	pid_t,	cur_pid			)
		__field(unsigned int,	cur_freq		)
		__field(	u64,	wallclock		)
		__field(	u64,	mark_start		)
		__field(	u64,	delta_m			)
		__field(	u64,	win_start		)
		__field(	u64,	delta			)
		__field(	u64,	irqtime			)
		__field(        int,    evt			)
		__field(unsigned int,	demand			)
		__field(unsigned int,	sum			)
		__field(	 int,	cpu			)
		__field(	u64,	cs			)
		__field(	u64,	ps			)
		__field(	u32,	curr_window		)
		__field(	u32,	prev_window		)
		__field(	u64,	nt_cs			)
		__field(	u64,	nt_ps			)
		__field(	u32,	active_windows		)
	),

	TP_fast_assign(
		__entry->wallclock      = wallclock;
		__entry->win_start      = rq->window_start;
		__entry->delta          = (wallclock - rq->window_start);
		__entry->evt            = evt;
		__entry->cpu            = rq->cpu;
		__entry->cur_pid        = rq->curr->pid;
		__entry->cur_freq       = rq->cur_freq;
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
		__entry->pid            = p->pid;
		__entry->mark_start     = p->ravg.mark_start;
		__entry->delta_m        = (wallclock - p->ravg.mark_start);
		__entry->demand         = p->ravg.demand;
		__entry->sum            = p->ravg.sum;
		__entry->irqtime        = irqtime;
		__entry->cs             = rq->curr_runnable_sum;
		__entry->ps             = rq->prev_runnable_sum;
		__entry->curr_window	= p->ravg.curr_window;
		__entry->prev_window	= p->ravg.prev_window;
		__entry->nt_cs		= rq->nt_curr_runnable_sum;
		__entry->nt_ps		= rq->nt_prev_runnable_sum;
		__entry->active_windows	= p->ravg.active_windows;
	),

	TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
		" cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
		, __entry->wallclock, __entry->win_start, __entry->delta,
		__entry->evt, __entry->cpu,
		__entry->cur_freq, __entry->cur_pid,
		__entry->pid, __entry->comm, __entry->mark_start,
		__entry->delta_m, __entry->demand,
		__entry->sum, __entry->irqtime,
		__entry->cs, __entry->ps,
		__entry->curr_window, __entry->prev_window,
		  __entry->nt_cs, __entry->nt_ps,
		  __entry->active_windows
		)
);

TRACE_EVENT(walt_update_history,

	TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples,
			int evt),

	TP_ARGS(rq, p, runtime, samples, evt),

	TP_STRUCT__entry(
		__array(	char,	comm,   TASK_COMM_LEN	)
		__field(	pid_t,	pid			)
		__field(unsigned int,	runtime			)
		__field(	 int,	samples			)
		__field(	 int,	evt			)
		__field(	 u64,	demand			)
		__field(unsigned int,	walt_avg		)
		__field(unsigned int,	pelt_avg		)
		__array(	 u32,	hist, RAVG_HIST_SIZE_MAX)
		__field(	 int,	cpu			)
	),

	TP_fast_assign(
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
		__entry->pid            = p->pid;
		__entry->runtime        = runtime;
		__entry->samples        = samples;
		__entry->evt            = evt;
		__entry->demand         = p->ravg.demand;
		__entry->walt_avg = (__entry->demand << 10) / walt_ravg_window,
		__entry->pelt_avg	= p->se.avg.util_avg;
		memcpy(__entry->hist, p->ravg.sum_history,
					RAVG_HIST_SIZE_MAX * sizeof(u32));
		__entry->cpu            = rq->cpu;
	),

	TP_printk("%d (%s): runtime %u samples %d event %d demand %llu"
		" walt %u pelt %u (hist: %u %u %u %u %u) cpu %d",
		__entry->pid, __entry->comm,
		__entry->runtime, __entry->samples, __entry->evt,
		__entry->demand,
		__entry->walt_avg,
		__entry->pelt_avg,
		__entry->hist[0], __entry->hist[1],
		__entry->hist[2], __entry->hist[3],
		__entry->hist[4], __entry->cpu)
);

TRACE_EVENT(walt_migration_update_sum,

	TP_PROTO(struct rq *rq, struct task_struct *p),

	TP_ARGS(rq, p),

	TP_STRUCT__entry(
		__field(int,		cpu			)
		__field(int,		pid			)
		__field(	u64,	cs			)
		__field(	u64,	ps			)
		__field(	s64,	nt_cs			)
		__field(	s64,	nt_ps			)
	),

	TP_fast_assign(
		__entry->cpu		= cpu_of(rq);
		__entry->cs		= rq->curr_runnable_sum;
		__entry->ps		= rq->prev_runnable_sum;
		__entry->nt_cs		= (s64)rq->nt_curr_runnable_sum;
		__entry->nt_ps		= (s64)rq->nt_prev_runnable_sum;
		__entry->pid		= p->pid;
	),

	TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d",
		  __entry->cpu, __entry->cs, __entry->ps,
		  __entry->nt_cs, __entry->nt_ps, __entry->pid)
);
#endif /* CONFIG_SCHED_WALT */

#endif /* CONFIG_SMP */

#endif /* _TRACE_SCHED_H */
+9 −0
Original line number Diff line number Diff line
@@ -400,6 +400,15 @@ config IRQ_TIME_ACCOUNTING

	  If in doubt, say N here.

config SCHED_WALT
        bool "Support window based load tracking"
        depends on SMP
        help
        This feature will allow the scheduler to maintain a tunable window
	based set of metrics for tasks and runqueues. These metrics can be
	used to guide task placement as well as task frequency requirements
	for cpufreq governors.

config BSD_PROCESS_ACCT
	bool "BSD Process Accounting"
	depends on MULTIUSER
+1 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
obj-y += wait.o swait.o completion.o idle.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o
obj-$(CONFIG_SCHED_WALT) += walt.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o
Loading