Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d0f48d93 authored by Linux Build Service Account's avatar Linux Build Service Account Committed by Gerrit - the friendly Code Review server
Browse files

Merge "sched: Window-based load stat improvements"

parents f8fbd20d e407865c
Loading
Loading
Loading
Loading
+1 −5
Original line number Diff line number Diff line
@@ -1001,11 +1001,6 @@ struct ravg {

struct sched_entity {
	struct load_weight	load;		/* for load-balancing */
	/*
	 * Todo : Move ravg to 'struct task_struct', as this is common for both
	 * real-time and non-realtime tasks
	 */
	struct ravg		ravg;
	struct rb_node		run_node;
	struct list_head	group_node;
	unsigned int		on_rq;
@@ -1084,6 +1079,7 @@ struct task_struct {
	const struct sched_class *sched_class;
	struct sched_entity se;
	struct sched_rt_entity rt;
	struct ravg ravg;
#ifdef CONFIG_CGROUP_SCHED
	struct task_group *sched_task_group;
#endif
+0 −1
Original line number Diff line number Diff line
@@ -36,7 +36,6 @@ extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_wake_to_idle;
extern unsigned int sysctl_sched_ravg_window;
extern unsigned int sysctl_sched_wakeup_load_threshold;

enum sched_tunable_scaling {
+59 −57
Original line number Diff line number Diff line
@@ -784,7 +784,7 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
	sched_info_queued(p);
	p->sched_class->enqueue_task(rq, p, flags);
	trace_sched_enq_deq_task(p, 1);
	rq->cumulative_runnable_avg += p->se.ravg.demand;
	inc_cumulative_runnable_avg(rq, p);
}

static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -793,8 +793,7 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
	sched_info_dequeued(p);
	p->sched_class->dequeue_task(rq, p, flags);
	trace_sched_enq_deq_task(p, 0);
	rq->cumulative_runnable_avg -= p->se.ravg.demand;
	BUG_ON((s64)rq->cumulative_runnable_avg < 0);
	dec_cumulative_runnable_avg(rq, p);
}

void activate_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1346,6 +1345,15 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
		wq_worker_waking_up(p, cpu_of(rq));
}

/* Window size (in ns) */
__read_mostly unsigned int sched_ravg_window = 10000000;

/* Min window size (in ns) = 10ms */
__read_mostly unsigned int min_sched_ravg_window = 10000000;

/* Max window size (in ns) = 1s */
__read_mostly unsigned int max_sched_ravg_window = 1000000000;

/*
 * Called when new window is starting for a task, to record cpu usage over
 * recently concluded window(s). Normally 'samples' should be 1. It can be > 1
@@ -1355,9 +1363,9 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
static inline void
update_history(struct rq *rq, struct task_struct *p, u32 runtime, int samples)
{
	u32 *hist = &p->se.ravg.sum_history[0];
	u32 *hist = &p->ravg.sum_history[0];
	int ridx, widx;
	u32 max = 0;
	u32 sum = 0, avg;

	/* Ignore windows where task had no activity */
	if (!runtime)
@@ -1368,86 +1376,96 @@ update_history(struct rq *rq, struct task_struct *p, u32 runtime, int samples)
	ridx = widx - samples;
	for (; ridx >= 0; --widx, --ridx) {
		hist[widx] = hist[ridx];
		if  (hist[widx] > max)
			max = hist[widx];
		sum += hist[widx];
	}

	for (widx = 0; widx < samples && widx < RAVG_HIST_SIZE; widx++) {
		hist[widx] = runtime;
		if  (hist[widx] > max)
			max = hist[widx];
		sum += hist[widx];
	}

	p->se.ravg.sum = 0;
	p->ravg.sum = 0;
	if (p->on_rq) {
		rq->cumulative_runnable_avg -= p->se.ravg.demand;
		rq->cumulative_runnable_avg -= p->ravg.demand;
		BUG_ON((s64)rq->cumulative_runnable_avg < 0);
	}
	/*
	 * Maximum demand seen over previous RAVG_HIST_SIZE windows drives
	 * frequency demand for a task. Record maximum in 'demand' attribute.
	 */
	p->se.ravg.demand = max;

	avg = sum / RAVG_HIST_SIZE;

	p->ravg.demand = max(avg, runtime);

	if (p->on_rq)
		rq->cumulative_runnable_avg += p->se.ravg.demand;
		rq->cumulative_runnable_avg += p->ravg.demand;
}

/* Window size (in ns) */
__read_mostly unsigned int sysctl_sched_ravg_window = 50000000;
static int __init set_sched_ravg_window(char *str)
{
	get_option(&str, &sched_ravg_window);

	return 0;
}

early_param("sched_ravg_window", set_sched_ravg_window);

void update_task_ravg(struct task_struct *p, struct rq *rq, int update_sum)
{
	u32 window_size = sysctl_sched_ravg_window;
	u32 window_size = sched_ravg_window;
	int new_window;
	u64 wallclock = sched_clock();

	if (sched_ravg_window < min_sched_ravg_window)
		return;

	do {
		s64 delta = 0;
		int n;
		u64 now = wallclock;

		new_window = 0;
		delta = now - p->se.ravg.window_start;
		delta = now - p->ravg.window_start;
		BUG_ON(delta < 0);
		if (delta > window_size) {
			p->se.ravg.window_start += window_size;
			now = p->se.ravg.window_start;
			p->ravg.window_start += window_size;
			now = p->ravg.window_start;
			new_window = 1;
		}

		if (update_sum) {
			delta = now - p->se.ravg.mark_start;
			unsigned int cur_freq = rq->cur_freq;

			delta = now - p->ravg.mark_start;
			BUG_ON(delta < 0);

			if (likely(rq->cur_freq &&
					rq->cur_freq <= max_possible_freq))
				delta = div64_u64(delta  * rq->cur_freq,
			if (unlikely(cur_freq > max_possible_freq))
				cur_freq = max_possible_freq;

			delta = div64_u64(delta  * cur_freq,
							max_possible_freq);
			p->se.ravg.sum += delta;
			WARN_ON(p->se.ravg.sum > window_size);
			p->ravg.sum += delta;
			WARN_ON(p->ravg.sum > window_size);
		}

		if (!new_window)
			break;

		update_history(rq, p, p->se.ravg.sum, 1);
		update_history(rq, p, p->ravg.sum, 1);

		delta = wallclock - p->se.ravg.window_start;
		delta = wallclock - p->ravg.window_start;
		BUG_ON(delta < 0);
		n = div64_u64(delta, window_size);
		if (n) {
			if (!update_sum)
				p->se.ravg.window_start = wallclock;
				p->ravg.window_start = wallclock;
			else
				p->se.ravg.window_start += n * window_size;
			BUG_ON(p->se.ravg.window_start > wallclock);
				p->ravg.window_start += n * window_size;
			BUG_ON(p->ravg.window_start > wallclock);
			if (update_sum)
				update_history(rq, p, window_size, n);
		}
		p->se.ravg.mark_start =  p->se.ravg.window_start;
		p->ravg.mark_start =  p->ravg.window_start;
	} while (new_window);

	p->se.ravg.mark_start = wallclock;
	p->ravg.mark_start = wallclock;
}

/*
@@ -1670,11 +1688,8 @@ out:

		mnd.src_cpu = src_cpu;
		mnd.dest_cpu = cpu;
		if (sysctl_sched_ravg_window)
			mnd.load = div64_u64((u64)p->se.ravg.demand * 100,
				(u64)(sysctl_sched_ravg_window));
		else
			mnd.load = 0;
		mnd.load = pct_task_load(p);

		/*
		 * Call the migration notifier with mnd for foreground task
		 * migrations as well as for wakeups if their load is above
@@ -1760,8 +1775,6 @@ int wake_up_state(struct task_struct *p, unsigned int state)
 */
static void __sched_fork(struct task_struct *p)
{
	int i;

	p->on_rq			= 0;

	p->se.on_rq			= 0;
@@ -1770,12 +1783,7 @@ static void __sched_fork(struct task_struct *p)
	p->se.prev_sum_exec_runtime	= 0;
	p->se.nr_migrations		= 0;
	p->se.vruntime			= 0;
	p->se.ravg.sum			= 0;
	p->se.ravg.demand		= 0;
	p->se.ravg.window_start		= 0;
	p->se.ravg.mark_start		= 0;
	for (i = 0; i < RAVG_HIST_SIZE; ++i)
		p->se.ravg.sum_history[i] = 0;
	init_new_task_load(p);

	INIT_LIST_HEAD(&p->se.group_node);

@@ -1920,7 +1928,6 @@ void wake_up_new_task(struct task_struct *p)
{
	unsigned long flags;
	struct rq *rq;
	u64 wallclock = sched_clock();

	raw_spin_lock_irqsave(&p->pi_lock, flags);
#ifdef CONFIG_SMP
@@ -1934,8 +1941,6 @@ void wake_up_new_task(struct task_struct *p)

	rq = __task_rq_lock(p);
	activate_task(rq, p, 0);
	p->se.ravg.window_start	= wallclock;
	p->se.ravg.mark_start	= wallclock;
	p->on_rq = 1;
	trace_sched_wakeup_new(p, true);
	check_preempt_curr(rq, p, WF_FORK);
@@ -5074,11 +5079,7 @@ fail:

		mnd.src_cpu = src_cpu;
		mnd.dest_cpu = dest_cpu;
		if (sysctl_sched_ravg_window)
			mnd.load = div64_u64((u64)p->se.ravg.demand * 100,
				(u64)(sysctl_sched_ravg_window));
		else
			mnd.load = 0;
		mnd.load = pct_task_load(p);
		atomic_notifier_call_chain(&migration_notifier_head,
					   0, (void *)&mnd);
	}
@@ -7127,6 +7128,7 @@ static int cpufreq_notifier_trans(struct notifier_block *nb,
	if (val != CPUFREQ_POSTCHANGE)
		return 0;

	BUG_ON(!new_freq);
	cpu_rq(cpu)->cur_freq = new_freq;

	return 0;
+55 −18
Original line number Diff line number Diff line
@@ -1125,6 +1125,39 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
}
#endif /* CONFIG_FAIR_GROUP_SCHED */

static inline unsigned int task_load(struct task_struct *p)
{
	return p->ravg.demand;
}

static inline unsigned int max_task_load(void)
{
	return sched_ravg_window;
}

/* Return task demand in percentage scale */
unsigned int pct_task_load(struct task_struct *p)
{
	unsigned int load;

	load = div64_u64((u64)task_load(p) * 100, (u64)max_task_load());

	return load;
}

void init_new_task_load(struct task_struct *p)
{
	int i;
	u64 wallclock = sched_clock();

	p->ravg.sum			= 0;
	p->ravg.demand			= 0;
	p->ravg.window_start		= wallclock;
	p->ravg.mark_start		= wallclock;
	for (i = 0; i < RAVG_HIST_SIZE; ++i)
		p->ravg.sum_history[i] = 0;
}

/* Only depends on SMP, FAIR_GROUP_SCHED may be removed when useful in lb */
#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
/*
@@ -3882,6 +3915,7 @@ struct lb_env {
};

static DEFINE_PER_CPU(bool, dbs_boost_needed);
static DEFINE_PER_CPU(int, dbs_boost_load_moved);

/*
 * move_task - move a task from one runqueue to another runqueue.
@@ -4011,7 +4045,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 *
 * Called with both runqueues locked.
 */
static int move_one_task(struct lb_env *env, int *total_run_moved)
static int move_one_task(struct lb_env *env)
{
	struct task_struct *p, *n;

@@ -4026,9 +4060,7 @@ static int move_one_task(struct lb_env *env, int *total_run_moved)
		 * stats here rather than inside move_task().
		 */
		schedstat_inc(env->sd, lb_gained[env->idle]);
		if (sysctl_sched_ravg_window)
			*total_run_moved += div64_u64((u64)p->se.ravg.demand *
					100, (u64)(sysctl_sched_ravg_window));
		per_cpu(dbs_boost_load_moved, env->dst_cpu) += pct_task_load(p);

		return 1;
	}
@@ -4046,7 +4078,7 @@ static const unsigned int sched_nr_migrate_break = 32;
 *
 * Called with both runqueues locked.
 */
static int move_tasks(struct lb_env *env, int *total_run_moved)
static int move_tasks(struct lb_env *env)
{
	struct list_head *tasks = &env->src_rq->cfs_tasks;
	struct task_struct *p;
@@ -4085,9 +4117,7 @@ static int move_tasks(struct lb_env *env, int *total_run_moved)
		move_task(p, env);
		pulled++;
		env->imbalance -= load;
		if (sysctl_sched_ravg_window)
			*total_run_moved += div64_u64((u64)p->se.ravg.demand *
					100, (u64)(sysctl_sched_ravg_window));
		per_cpu(dbs_boost_load_moved, env->dst_cpu) += pct_task_load(p);

#ifdef CONFIG_PREEMPT
		/*
@@ -5060,7 +5090,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
			int *balance)
{
	int ld_moved, cur_ld_moved, active_balance = 0;
	int total_run_moved = 0;
	struct sched_group *group;
	struct rq *busiest = NULL;
	unsigned long flags;
@@ -5085,6 +5114,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,

	cpumask_copy(cpus, cpu_active_mask);

	per_cpu(dbs_boost_load_moved, this_cpu) = 0;
	schedstat_inc(sd, lb_count[idle]);

redo:
@@ -5130,7 +5160,7 @@ more_balance:
		 * cur_ld_moved - load moved in current iteration
		 * ld_moved     - cumulative load moved across iterations
		 */
		cur_ld_moved = move_tasks(&env, &total_run_moved);
		cur_ld_moved = move_tasks(&env);
		ld_moved += cur_ld_moved;
		double_rq_unlock(env.dst_rq, busiest);
		local_irq_restore(flags);
@@ -5250,13 +5280,16 @@ more_balance:
		if (per_cpu(dbs_boost_needed, this_cpu)) {
			struct migration_notify_data mnd;

			per_cpu(dbs_boost_needed, this_cpu) = false;

			mnd.src_cpu = cpu_of(busiest);
			mnd.dest_cpu = this_cpu;
			mnd.load = total_run_moved;
			mnd.load = per_cpu(dbs_boost_load_moved, this_cpu);
			if (mnd.load > 100)
				mnd.load = 100;
			atomic_notifier_call_chain(&migration_notifier_head,
						   0, (void *)&mnd);
			per_cpu(dbs_boost_needed, this_cpu) = false;
			per_cpu(dbs_boost_load_moved, this_cpu) = 0;

		}
	}
	if (likely(!active_balance)) {
@@ -5364,12 +5397,13 @@ static int active_load_balance_cpu_stop(void *data)
	struct rq *busiest_rq = data;
	int busiest_cpu = cpu_of(busiest_rq);
	int target_cpu = busiest_rq->push_cpu;
	int total_run_moved = 0;
	struct rq *target_rq = cpu_rq(target_cpu);
	struct sched_domain *sd;

	raw_spin_lock_irq(&busiest_rq->lock);

	per_cpu(dbs_boost_load_moved, target_cpu) = 0;

	/* make sure the requested cpu hasn't gone down in the meantime */
	if (unlikely(busiest_cpu != smp_processor_id() ||
		     !busiest_rq->active_balance))
@@ -5409,7 +5443,7 @@ static int active_load_balance_cpu_stop(void *data)

		schedstat_inc(sd, alb_count);

		if (move_one_task(&env, &total_run_moved))
		if (move_one_task(&env))
			schedstat_inc(sd, alb_pushed);
		else
			schedstat_inc(sd, alb_failed);
@@ -5422,13 +5456,16 @@ out_unlock:
	if (per_cpu(dbs_boost_needed, target_cpu)) {
		struct migration_notify_data mnd;

		per_cpu(dbs_boost_needed, target_cpu) = false;

		mnd.src_cpu = cpu_of(busiest_rq);
		mnd.dest_cpu = target_cpu;
		mnd.load = total_run_moved;
		mnd.load = per_cpu(dbs_boost_load_moved, target_cpu);
		if (mnd.load > 100)
			mnd.load = 100;
		atomic_notifier_call_chain(&migration_notifier_head,
					   0, (void *)&mnd);

		per_cpu(dbs_boost_needed, target_cpu) = false;
		per_cpu(dbs_boost_load_moved, target_cpu) = 0;
	}
	return 0;
}
+17 −1
Original line number Diff line number Diff line
@@ -12,7 +12,6 @@

extern __read_mostly int scheduler_running;

extern unsigned int sysctl_sched_ravg_window;
/*
 * Convert user-nice values [ -20 ... 0 ... 19 ]
 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -653,6 +652,23 @@ extern int group_balance_cpu(struct sched_group *sg);
#include "stats.h"
#include "auto_group.h"

extern unsigned int sched_ravg_window;
extern unsigned int pct_task_load(struct task_struct *p);
extern void init_new_task_load(struct task_struct *p);

static inline void
inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
{
	rq->cumulative_runnable_avg += p->ravg.demand;
}

static inline void
dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
{
	rq->cumulative_runnable_avg -= p->ravg.demand;
	BUG_ON((s64)rq->cumulative_runnable_avg < 0);
}

#ifdef CONFIG_CGROUP_SCHED

/*
Loading