Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0e2092e4 authored by Syed Rameez Mustafa's avatar Syed Rameez Mustafa Committed by Matt Wagantall
Browse files

sched: Use only partial wait time as task demand



The scheduler currently either considers a tasks entire wait time as
task demand or completely ignores wait time based on the tunable
sched_account_wait_time. Both approaches have their limitations,
however. The former artificially boosts tasks demand when it may not
actually be justified. With the latter, the scheduler runs the risk
of never being able to recognize true load (consider two CPU hogs on
a single little CPU). To achieve a compromise between these two
extremes, change the load tracking algorithm to only consider part of
a tasks wait time as its demand. The portion of wait time accounted
as demand is determined by each tasks percent load, i.e. a task that
waits for 10ms and has 60 % task load, only 6 ms of the wait will
contribute to task demand. This approach is more fair as the scheduler
now tries to determine how much of its wait time would a task actually
have been using the CPU if it had been executing. It ensures that tasks
with high demand continue to see most of the benefits of accounting
wait time as busy time, however, lower demand tasks don't experience a
disproportionately high boost to demand triggering unjustified big CPU
usage. Note that this new approach is only applicable to wait time
being considered as task demand and not wait time considered as CPU
busy time.

To achieve the above effect, ensure that anytime a task is waiting, its
runtime in every relevant window segment is appropriately adjusted using
its pct load.

Change-Id: I6a698d6cb1adeca49113c3499029b422daf7871f
Signed-off-by: default avatarSyed Rameez Mustafa <rameezmustafa@codeaurora.org>
parent 0fe2d4b0
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -1332,8 +1332,17 @@ Appears at: /proc/sys/kernel/sched_account_wait_time

Default value: 1

This controls whether a task's wait time is accounted as its demand for cpu
This controls whether a tasks wait time is accounted as its demand for cpu
and thus the values found in its sum, sum_history[] and demand attributes.
The load tracking algorithm only considers part of a tasks wait time as its
demand. The portion of wait time accounted as demand is determined by each
tasks percent load, i.e. a task that waits for 10ms and has 60 % task load,
only 6 ms of the wait will contribute to task demand. This approach is fair
as the scheduler tries to determine how much of its wait time would a task
actually have been using the CPU if it had been executing. It ensures that
tasks with high demand continue to see most of the benefits of accounting
wait time as busy time, however, lower demand tasks don't experience a
disproportionately high boost to demand.

*** 7.16 sched_freq_account_wait_time

+26 −10
Original line number Diff line number Diff line
@@ -1633,19 +1633,23 @@ static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq,

static int account_busy_for_task_demand(struct task_struct *p, int event)
{
	/* No need to bother updating task demand for exiting tasks
	 * or the idle task. */
	if (exiting_task(p) || is_idle_task(p))
		return 0;

	/* When a task is waking up it is completing a segment of non-busy
	/*
	 * When a task is waking up it is completing a segment of non-busy
	 * time. Likewise, if wait time is not treated as busy time, then
	 * when a task begins to run or is migrated, it is not running and
	 * is completing a segment of non-busy time. */
	 * is completing a segment of non-busy time.
	 */
	if (event == TASK_WAKE || (!sched_account_wait_time &&
		(event == PICK_NEXT_TASK || event == TASK_MIGRATE)))
		return 0;

	/*
	 * We are left with TASK_UPDATE, IRQ_UPDATE, PUT_PREV_TASK and
	 * wait time being accounted as busy time.
	 */
	return 1;
}

@@ -1717,6 +1721,15 @@ static void add_to_task_demand(struct rq *rq, struct task_struct *p,
		p->ravg.sum = sched_ravg_window;
}

static u64 wait_adjust(struct task_struct *p, u64 delta, int event)
{
	/* We already know that wait time counts as busy time. */
	if (event == PICK_NEXT_TASK || event == TASK_MIGRATE)
		return div64_u64(delta * task_load(p), max_task_load());

	return delta;
}

/*
 * Account cpu demand of task and/or update task's cpu demand history
 *
@@ -1791,7 +1804,8 @@ static void update_task_demand(struct task_struct *p, struct rq *rq,
	if (!new_window) {
		/* The simple case - busy time contained within the existing
		 * window. */
		add_to_task_demand(rq, p, wallclock - mark_start);
		add_to_task_demand(rq, p, wait_adjust(p,
				wallclock - mark_start, event));
		return;
	}

@@ -1802,13 +1816,14 @@ static void update_task_demand(struct task_struct *p, struct rq *rq,
	window_start -= (u64)nr_full_windows * (u64)window_size;

	/* Process (window_start - mark_start) first */
	add_to_task_demand(rq, p, window_start - mark_start);
	add_to_task_demand(rq, p,
		wait_adjust(p, window_start - mark_start, event));

	/* Push new sample(s) into task's demand history */
	update_history(rq, p, p->ravg.sum, 1, event);
	if (nr_full_windows)
		update_history(rq, p, scale_exec_time(window_size, rq),
			       nr_full_windows, event);
		update_history(rq, p, scale_exec_time(wait_adjust(p,
		window_size, event), rq), nr_full_windows, event);

	/* Roll window_start back to current to process any remainder
	 * in current window. */
@@ -1816,7 +1831,8 @@ static void update_task_demand(struct task_struct *p, struct rq *rq,

	/* Process (wallclock - window_start) next */
	mark_start = window_start;
	add_to_task_demand(rq, p, wallclock - mark_start);
	add_to_task_demand(rq, p,
		wait_adjust(p, wallclock - mark_start, event));
}

/* Reflect task activity on its demand and cpu's busy time statistics */
+1 −1
Original line number Diff line number Diff line
@@ -2449,7 +2449,7 @@ unsigned int __read_mostly sysctl_sched_init_task_load_pct = 15;
unsigned int __read_mostly sysctl_sched_min_runtime = 0; /* 0 ms */
u64 __read_mostly sched_min_runtime = 0; /* 0 ms */

static inline unsigned int task_load(struct task_struct *p)
unsigned int task_load(struct task_struct *p)
{
	if (sched_use_pelt)
		return p->se.avg.runnable_avg_sum_scaled;
+1 −0
Original line number Diff line number Diff line
@@ -912,6 +912,7 @@ extern u64 scale_load_to_cpu(u64 load, int cpu);
extern unsigned int sched_heavy_task;
extern void reset_cpu_hmp_stats(int cpu, int reset_cra);
extern void fixup_nr_big_small_task(int cpu, int reset_stats);
extern unsigned int task_load(struct task_struct *p);
extern unsigned int max_task_load(void);
extern void sched_account_irqtime(int cpu, struct task_struct *curr,
				 u64 delta, u64 wallclock);