Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 67ddb178 authored by Joonwoo Park's avatar Joonwoo Park
Browse files

sched: account new task load so that governor can apply different policy



Account amount of load contributed by new tasks within CPU load so that
governor can apply different policy when CPU is loaded by new tasks.

To be able to distinguish new task load a new tunable
sched_new_task_windows also introduced.  The tunable defines tasks as new
when the tasks are have been active less than configured windows.

Change-Id: I2e2e62e4103882f7362154b792ab978b181b9f59
Suggested-by: default avatarSaravana Kannan <skannan@codeaurora.org>
Signed-off-by: default avatarJoonwoo Park <joonwoop@codeaurora.org>
parent d06e0f7b
Loading
Loading
Loading
Loading
+12 −11
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ struct cpufreq_interactive_policyinfo {
	bool reject_notification;
	int governor_enabled;
	struct cpufreq_interactive_tunables *cached_tunables;
	unsigned long *cpu_busy_times;
	struct sched_load *sl;
};

/* Protected by per-policy load_lock */
@@ -447,6 +447,7 @@ static void __cpufreq_interactive_timer(unsigned long data, bool is_notif)
	unsigned long flags;
	unsigned long max_cpu;
	int i, fcpu;
	struct sched_load *sl;
	struct cpufreq_govinfo govinfo;
	bool skip_hispeed_logic, skip_min_sample_time;
	bool policy_max_fast_restore = false;
@@ -462,14 +463,14 @@ static void __cpufreq_interactive_timer(unsigned long data, bool is_notif)
	ppol->last_evaluated_jiffy = get_jiffies_64();

	if (tunables->use_sched_load)
		sched_get_cpus_busy(ppol->cpu_busy_times,
				    ppol->policy->related_cpus);
		sched_get_cpus_busy(ppol->sl, ppol->policy->related_cpus);
	max_cpu = cpumask_first(ppol->policy->cpus);
	for_each_cpu(i, ppol->policy->cpus) {
		pcpu = &per_cpu(cpuinfo, i);
		sl = &ppol->sl[i - fcpu];
		if (tunables->use_sched_load) {
			cputime_speedadj = (u64)ppol->cpu_busy_times[i - fcpu]
					* ppol->policy->cpuinfo.max_freq;
			cputime_speedadj = (u64)sl->prev_load *
					   ppol->policy->cpuinfo.max_freq;
			do_div(cputime_speedadj, tunables->timer_rate);
		} else {
			now = update_load(i);
@@ -1503,7 +1504,7 @@ static struct cpufreq_interactive_policyinfo *get_policyinfo(
	struct cpufreq_interactive_policyinfo *ppol =
				per_cpu(polinfo, policy->cpu);
	int i;
	unsigned long *busy;
	struct sched_load *sl;

	/* polinfo already allocated for policy, return */
	if (ppol)
@@ -1513,13 +1514,13 @@ static struct cpufreq_interactive_policyinfo *get_policyinfo(
	if (!ppol)
		return ERR_PTR(-ENOMEM);

	busy = kcalloc(cpumask_weight(policy->related_cpus), sizeof(*busy),
	sl = kcalloc(cpumask_weight(policy->related_cpus), sizeof(*sl),
		     GFP_KERNEL);
	if (!busy) {
	if (!sl) {
		kfree(ppol);
		return ERR_PTR(-ENOMEM);
	}
	ppol->cpu_busy_times = busy;
	ppol->sl = sl;

	init_timer_deferrable(&ppol->policy_timer);
	ppol->policy_timer.function = cpufreq_interactive_timer;
@@ -1547,7 +1548,7 @@ static void free_policyinfo(int cpu)
		if (per_cpu(polinfo, j) == ppol)
			per_cpu(polinfo, cpu) = NULL;
	kfree(ppol->cached_tunables);
	kfree(ppol->cpu_busy_times);
	kfree(ppol->sl);
	kfree(ppol);
}

+10 −3
Original line number Diff line number Diff line
@@ -1167,6 +1167,7 @@ struct ravg {
	u32 sum_history[RAVG_HIST_SIZE_MAX];
#ifdef CONFIG_SCHED_FREQ_INPUT
	u32 curr_window, prev_window;
	u16 active_windows;
#endif
};

@@ -1963,10 +1964,16 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,

extern int task_free_register(struct notifier_block *n);
extern int task_free_unregister(struct notifier_block *n);

struct sched_load {
	unsigned long prev_load;
	unsigned long new_task_load;
};

#if defined(CONFIG_SCHED_FREQ_INPUT)
extern int sched_set_window(u64 window_start, unsigned int window_size);
extern unsigned long sched_get_busy(int cpu);
extern void sched_get_cpus_busy(unsigned long *busy,
extern void sched_get_cpus_busy(struct sched_load *busy,
				const struct cpumask *query_cpus);
extern void sched_set_io_is_busy(int val);
#ifdef CONFIG_SCHED_QHMP
@@ -1986,7 +1993,7 @@ static inline unsigned long sched_get_busy(int cpu)
{
	return 0;
}
static inline void sched_get_cpus_busy(unsigned long *busy,
static inline void sched_get_cpus_busy(struct sched_load *busy,
				       const struct cpumask *query_cpus) {};
static inline void sched_set_io_is_busy(int val) {};

+3 −0
Original line number Diff line number Diff line
@@ -74,6 +74,9 @@ extern unsigned int sysctl_sched_small_task_pct;
#else
extern unsigned int sysctl_sched_lowspill_freq;
extern unsigned int sysctl_sched_pack_freq;
#if defined(CONFIG_SCHED_FREQ_INPUT)
extern unsigned int sysctl_sched_new_task_windows;
#endif
#endif

#else /* CONFIG_SCHED_HMP */
+23 −8
Original line number Diff line number Diff line
@@ -247,6 +247,9 @@ TRACE_EVENT(sched_update_task_ravg,
		__field(	u64,	ps			)
		__field(	u32,	curr_window		)
		__field(	u32,	prev_window		)
		__field(	u64,	nt_cs			)
		__field(	u64,	nt_ps			)
		__field(	u32,	active_windows		)
#endif
	),

@@ -270,12 +273,15 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->ps             = rq->prev_runnable_sum;
		__entry->curr_window	= p->ravg.curr_window;
		__entry->prev_window	= p->ravg.prev_window;
		__entry->nt_cs		= rq->nt_curr_runnable_sum;
		__entry->nt_ps		= rq->nt_prev_runnable_sum;
		__entry->active_windows	= p->ravg.active_windows;
#endif
	),

	TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
#ifdef CONFIG_SCHED_FREQ_INPUT
		" cs %llu ps %llu cur_window %u prev_window %u"
		" cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
#endif
		, __entry->wallclock, __entry->win_start, __entry->delta,
		task_event_names[__entry->evt], __entry->cpu,
@@ -285,7 +291,9 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->sum, __entry->irqtime
#ifdef CONFIG_SCHED_FREQ_INPUT
		, __entry->cs, __entry->ps, __entry->curr_window,
		  __entry->prev_window
		  __entry->prev_window,
		  __entry->nt_cs, __entry->nt_ps,
		  __entry->active_windows
#endif
		)
);
@@ -377,37 +385,44 @@ TRACE_EVENT(sched_migration_update_sum,
		__field(int,		pid			)
		__field(	u64,	cs			)
		__field(	u64,	ps			)
		__field(	s64,	nt_cs			)
		__field(	s64,	nt_ps			)
	),

	TP_fast_assign(
		__entry->cpu		= cpu_of(rq);
		__entry->cs		= rq->curr_runnable_sum;
		__entry->ps		= rq->prev_runnable_sum;
		__entry->nt_cs		= (s64)rq->nt_curr_runnable_sum;
		__entry->nt_ps		= (s64)rq->nt_prev_runnable_sum;
		__entry->pid		= p->pid;
	),

	TP_printk("cpu %d: cs %llu ps %llu pid %d", __entry->cpu,
		      __entry->cs, __entry->ps, __entry->pid)
	TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d",
		  __entry->cpu, __entry->cs, __entry->ps,
		  __entry->nt_cs, __entry->nt_ps, __entry->pid)
);

TRACE_EVENT(sched_get_busy,

	TP_PROTO(int cpu, u64 load),
	TP_PROTO(int cpu, u64 load, u64 nload),

	TP_ARGS(cpu, load),
	TP_ARGS(cpu, load, nload),

	TP_STRUCT__entry(
		__field(	int,	cpu			)
		__field(	u64,	load			)
		__field(	u64,	nload			)
	),

	TP_fast_assign(
		__entry->cpu		= cpu;
		__entry->load		= load;
		__entry->nload		= nload;
	),

	TP_printk("cpu %d load %lld",
		__entry->cpu, __entry->load)
	TP_printk("cpu %d load %lld new_task_load %lld",
		__entry->cpu, __entry->load, __entry->nload)
);

TRACE_EVENT(sched_freq_alert,
+86 −11
Original line number Diff line number Diff line
@@ -1229,6 +1229,8 @@ static __read_mostly unsigned int sched_window_stats_policy =
__read_mostly unsigned int sysctl_sched_window_stats_policy =
	WINDOW_STATS_MAX_RECENT_AVG;

__read_mostly unsigned int sysctl_sched_new_task_windows = 5;

static __read_mostly unsigned int sched_account_wait_time = 1;
__read_mostly unsigned int sysctl_sched_account_wait_time = 1;

@@ -1472,6 +1474,11 @@ heavy_task_wakeup(struct task_struct *p, struct rq *rq, int event)
	return (rq->window_start - p->ravg.mark_start > sched_ravg_window);
}

static inline bool is_new_task(struct task_struct *p)
{
	return p->ravg.active_windows < sysctl_sched_new_task_windows;
}

/*
 * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
 */
@@ -1484,11 +1491,17 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
	u64 window_start = rq->window_start;
	u32 window_size = sched_ravg_window;
	u64 delta;
	bool new_task;

	new_window = mark_start < window_start;
	if (new_window)
	if (new_window) {
		nr_full_windows = div64_u64((window_start - mark_start),
						window_size);
		if (p->ravg.active_windows < USHRT_MAX)
			p->ravg.active_windows++;
	}

	new_task = is_new_task(p);

	/* Handle per-task window rollover. We don't care about the idle
	 * task or exiting tasks. */
@@ -1519,14 +1532,18 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
		/* A new window has started. The RQ demand must be rolled
		 * over if p is the current task. */
		if (p_is_curr_task) {
			u64 prev_sum = 0;
			u64 prev_sum = 0, nt_prev_sum = 0;

			/* p is either idle task or an exiting task */
			if (!nr_full_windows)
			if (!nr_full_windows) {
				prev_sum = rq->curr_runnable_sum;
				nt_prev_sum = rq->nt_curr_runnable_sum;
			}

			rq->prev_runnable_sum = prev_sum;
			rq->curr_runnable_sum = 0;
			rq->nt_prev_runnable_sum = nt_prev_sum;
			rq->nt_curr_runnable_sum = 0;

		} else if (heavy_task_wakeup(p, rq, event)) {
			/* A new window has started. If p is a waking
@@ -1538,6 +1555,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
			 * tunable. */
			p->ravg.prev_window = p->ravg.demand;
			rq->prev_runnable_sum += p->ravg.demand;
			if (new_task)
				rq->nt_prev_runnable_sum += p->ravg.demand;
		}

		return;
@@ -1556,6 +1575,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
			delta = irqtime;
		delta = scale_exec_time(delta, rq);
		rq->curr_runnable_sum += delta;
		if (new_task)
			rq->nt_curr_runnable_sum += delta;
		if (!is_idle_task(p) && !exiting_task(p))
			p->ravg.curr_window += delta;

@@ -1589,10 +1610,14 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
				p->ravg.prev_window = delta;
		}
		rq->prev_runnable_sum += delta;
		if (new_task)
			rq->nt_prev_runnable_sum += delta;

		/* Account piece of busy time in the current window. */
		delta = scale_exec_time(wallclock - window_start, rq);
		rq->curr_runnable_sum += delta;
		if (new_task)
			rq->nt_curr_runnable_sum += delta;
		if (!exiting_task(p))
			p->ravg.curr_window = delta;

@@ -1618,6 +1643,11 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
			delta = scale_exec_time(window_start - mark_start, rq);
			if (!is_idle_task(p) && !exiting_task(p))
				p->ravg.prev_window += delta;

			rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum;
			if (new_task)
				rq->nt_prev_runnable_sum += delta;

			delta += rq->curr_runnable_sum;
		} else {
			/* Since at least one full window has elapsed,
@@ -1626,14 +1656,27 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
			delta = scale_exec_time(window_size, rq);
			if (!is_idle_task(p) && !exiting_task(p))
				p->ravg.prev_window = delta;

			if (new_task)
				rq->nt_prev_runnable_sum = delta;
			else
				rq->nt_prev_runnable_sum = 0;
		}
		/* Rollover is done here by overwriting the values in
		 * prev_runnable_sum and curr_runnable_sum. */
		/*
		 * Rollover for normal runnable sum is done here by overwriting
		 * the values in prev_runnable_sum and curr_runnable_sum.
		 * Rollover for new task runnable sum has completed by previous
		 * if-else statement.
		 */
		rq->prev_runnable_sum = delta;

		/* Account piece of busy time in the current window. */
		delta = scale_exec_time(wallclock - window_start, rq);
		rq->curr_runnable_sum = delta;
		if (new_task)
			rq->nt_curr_runnable_sum = delta;
		else
			rq->nt_curr_runnable_sum = 0;
		if (!is_idle_task(p) && !exiting_task(p))
			p->ravg.curr_window = delta;

@@ -1657,6 +1700,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
		/* Roll window over. If IRQ busy time was just in the current
		 * window then that is all that need be accounted. */
		rq->prev_runnable_sum = rq->curr_runnable_sum;
		rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum;
		rq->nt_curr_runnable_sum = 0;
		if (mark_start > window_start) {
			rq->curr_runnable_sum = scale_exec_time(irqtime, rq);
			return;
@@ -2083,6 +2128,7 @@ static inline void set_window_start(struct rq *rq)
		rq->window_start = cpu_rq(sync_cpu)->window_start;
#ifdef CONFIG_SCHED_FREQ_INPUT
		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
		rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
#endif
		raw_spin_unlock(&sync_rq->lock);
	}
@@ -2215,6 +2261,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
			rq->window_start = window_start;
#ifdef CONFIG_SCHED_FREQ_INPUT
		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
		rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
#endif
		reset_cpu_hmp_stats(cpu, 1);

@@ -2272,12 +2319,13 @@ scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
	return div64_u64(load * (u64)src_freq, (u64)dst_freq);
}

void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)
void sched_get_cpus_busy(struct sched_load *busy,
			 const struct cpumask *query_cpus)
{
	unsigned long flags;
	struct rq *rq;
	const int cpus = cpumask_weight(query_cpus);
	u64 load[cpus];
	u64 load[cpus], nload[cpus];
	unsigned int cur_freq[cpus], max_freq[cpus];
	int notifier_sent[cpus];
	int cpu, i = 0;
@@ -2302,6 +2350,7 @@ void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)

		update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0);
		load[i] = rq->old_busy_time = rq->prev_runnable_sum;
		nload[i] = rq->nt_prev_runnable_sum;
		/*
		 * Scale load in reference to rq->max_possible_freq.
		 *
@@ -2309,6 +2358,7 @@ void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)
		 * rq->max_freq.
		 */
		load[i] = scale_load_to_cpu(load[i], cpu);
		nload[i] = scale_load_to_cpu(nload[i], cpu);

		notifier_sent[i] = rq->notifier_sent;
		rq->notifier_sent = 0;
@@ -2328,18 +2378,29 @@ void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)
		if (!notifier_sent[i]) {
			load[i] = scale_load_to_freq(load[i], max_freq[i],
						     cur_freq[i]);
			nload[i] = scale_load_to_freq(nload[i], max_freq[i],
						      cur_freq[i]);
			if (load[i] > window_size)
				load[i] = window_size;
			if (nload[i] > window_size)
				nload[i] = window_size;

			load[i] = scale_load_to_freq(load[i], cur_freq[i],
						     rq->max_possible_freq);
			nload[i] = scale_load_to_freq(nload[i], cur_freq[i],
						      rq->max_possible_freq);
		} else {
			load[i] = scale_load_to_freq(load[i], max_freq[i],
						     rq->max_possible_freq);
			nload[i] = scale_load_to_freq(nload[i], max_freq[i],
						     rq->max_possible_freq);
		}

		busy[i] = div64_u64(load[i], NSEC_PER_USEC);
		busy[i].prev_load = div64_u64(load[i], NSEC_PER_USEC);
		busy[i].new_task_load = div64_u64(nload[i], NSEC_PER_USEC);

		trace_sched_get_busy(cpu, busy[i]);
		trace_sched_get_busy(cpu, busy[i].prev_load,
				     busy[i].new_task_load);
		i++;
	}
}
@@ -2347,12 +2408,12 @@ void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus)
unsigned long sched_get_busy(int cpu)
{
	struct cpumask query_cpu = CPU_MASK_NONE;
	unsigned long busy;
	struct sched_load busy;

	cpumask_set_cpu(cpu, &query_cpu);
	sched_get_cpus_busy(&busy, &query_cpu);

	return busy;
	return busy.prev_load;
}

void sched_set_io_is_busy(int val)
@@ -2402,6 +2463,7 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
	struct rq *src_rq = task_rq(p);
	struct rq *dest_rq = cpu_rq(new_cpu);
	u64 wallclock;
	bool new_task;

	if (!sched_enable_hmp || !sched_migration_fixup ||
		 exiting_task(p) || (!p->on_rq && p->state != TASK_WAKING))
@@ -2424,18 +2486,30 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
	update_task_ravg(p, task_rq(p), TASK_MIGRATE,
			 wallclock, 0);

	new_task = is_new_task(p);

	if (p->ravg.curr_window) {
		src_rq->curr_runnable_sum -= p->ravg.curr_window;
		dest_rq->curr_runnable_sum += p->ravg.curr_window;
		if (new_task) {
			src_rq->nt_curr_runnable_sum -= p->ravg.curr_window;
			dest_rq->nt_curr_runnable_sum += p->ravg.curr_window;
		}
	}

	if (p->ravg.prev_window) {
		src_rq->prev_runnable_sum -= p->ravg.prev_window;
		dest_rq->prev_runnable_sum += p->ravg.prev_window;
		if (new_task) {
			src_rq->nt_prev_runnable_sum -= p->ravg.prev_window;
			dest_rq->nt_prev_runnable_sum += p->ravg.prev_window;
		}
	}

	BUG_ON((s64)src_rq->prev_runnable_sum < 0);
	BUG_ON((s64)src_rq->curr_runnable_sum < 0);
	BUG_ON((s64)src_rq->nt_prev_runnable_sum < 0);
	BUG_ON((s64)src_rq->nt_curr_runnable_sum < 0);

	trace_sched_migration_update_sum(src_rq, p);
	trace_sched_migration_update_sum(dest_rq, p);
@@ -9064,6 +9138,7 @@ void __init sched_init(void)

#ifdef CONFIG_SCHED_FREQ_INPUT
		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
		rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
		rq->old_busy_time = 0;
		rq->notifier_sent = 0;
#endif
Loading