Merge "sched: Window-based load stat improvements" (d0f48d93) · Commits · e / devices / android_kernel_sony_msm8994

include/linux/sched.h

+1 −5

Original line number	Diff line number	Diff line
		@@ -1001,11 +1001,6 @@ struct ravg {

		struct sched_entity {
		struct load_weight load; /* for load-balancing */
		/*
		* Todo : Move ravg to 'struct task_struct', as this is common for both
		* real-time and non-realtime tasks
		*/
		struct ravg ravg;
		struct rb_node run_node;
		struct list_head group_node;
		unsigned int on_rq;
		@@ -1084,6 +1079,7 @@ struct task_struct {
		const struct sched_class *sched_class;
		struct sched_entity se;
		struct sched_rt_entity rt;
		struct ravg ravg;
		#ifdef CONFIG_CGROUP_SCHED
		struct task_group *sched_task_group;
		#endif

include/linux/sched/sysctl.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -36,7 +36,6 @@ extern unsigned int sysctl_sched_min_granularity;
		extern unsigned int sysctl_sched_wakeup_granularity;
		extern unsigned int sysctl_sched_child_runs_first;
		extern unsigned int sysctl_sched_wake_to_idle;
		extern unsigned int sysctl_sched_ravg_window;
		extern unsigned int sysctl_sched_wakeup_load_threshold;

		enum sched_tunable_scaling {

kernel/sched/core.c

+59 −57

Original line number	Diff line number	Diff line
		@@ -784,7 +784,7 @@ static void enqueue_task(struct rq rq, struct task_struct p, int flags)
		sched_info_queued(p);
		p->sched_class->enqueue_task(rq, p, flags);
		trace_sched_enq_deq_task(p, 1);
		rq->cumulative_runnable_avg += p->se.ravg.demand;
		inc_cumulative_runnable_avg(rq, p);
		}

		static void dequeue_task(struct rq rq, struct task_struct p, int flags)
		@@ -793,8 +793,7 @@ static void dequeue_task(struct rq rq, struct task_struct p, int flags)
		sched_info_dequeued(p);
		p->sched_class->dequeue_task(rq, p, flags);
		trace_sched_enq_deq_task(p, 0);
		rq->cumulative_runnable_avg -= p->se.ravg.demand;
		BUG_ON((s64)rq->cumulative_runnable_avg < 0);
		dec_cumulative_runnable_avg(rq, p);
		}

		void activate_task(struct rq rq, struct task_struct p, int flags)
		@@ -1346,6 +1345,15 @@ static void ttwu_activate(struct rq rq, struct task_struct p, int en_flags)
		wq_worker_waking_up(p, cpu_of(rq));
		}

		/* Window size (in ns) */
		__read_mostly unsigned int sched_ravg_window = 10000000;

		/* Min window size (in ns) = 10ms */
		__read_mostly unsigned int min_sched_ravg_window = 10000000;

		/* Max window size (in ns) = 1s */
		__read_mostly unsigned int max_sched_ravg_window = 1000000000;

		/*
		* Called when new window is starting for a task, to record cpu usage over
		* recently concluded window(s). Normally 'samples' should be 1. It can be > 1
		@@ -1355,9 +1363,9 @@ static void ttwu_activate(struct rq rq, struct task_struct p, int en_flags)
		static inline void
		update_history(struct rq rq, struct task_struct p, u32 runtime, int samples)
		{
		u32 *hist = &p->se.ravg.sum_history[0];
		u32 *hist = &p->ravg.sum_history[0];
		int ridx, widx;
		u32 max = 0;
		u32 sum = 0, avg;

		/* Ignore windows where task had no activity */
		if (!runtime)
		@@ -1368,86 +1376,96 @@ update_history(struct rq rq, struct task_struct p, u32 runtime, int samples)
		ridx = widx - samples;
		for (; ridx >= 0; --widx, --ridx) {
		hist[widx] = hist[ridx];
		if (hist[widx] > max)
		max = hist[widx];
		sum += hist[widx];
		}

		for (widx = 0; widx < samples && widx < RAVG_HIST_SIZE; widx++) {
		hist[widx] = runtime;
		if (hist[widx] > max)
		max = hist[widx];
		sum += hist[widx];
		}

		p->se.ravg.sum = 0;
		p->ravg.sum = 0;
		if (p->on_rq) {
		rq->cumulative_runnable_avg -= p->se.ravg.demand;
		rq->cumulative_runnable_avg -= p->ravg.demand;
		BUG_ON((s64)rq->cumulative_runnable_avg < 0);
		}
		/*
		* Maximum demand seen over previous RAVG_HIST_SIZE windows drives
		* frequency demand for a task. Record maximum in 'demand' attribute.
		*/
		p->se.ravg.demand = max;

		avg = sum / RAVG_HIST_SIZE;

		p->ravg.demand = max(avg, runtime);

		if (p->on_rq)
		rq->cumulative_runnable_avg += p->se.ravg.demand;
		rq->cumulative_runnable_avg += p->ravg.demand;
		}

		/* Window size (in ns) */
		__read_mostly unsigned int sysctl_sched_ravg_window = 50000000;
		static int __init set_sched_ravg_window(char *str)
		{
		get_option(&str, &sched_ravg_window);

		return 0;
		}

		early_param("sched_ravg_window", set_sched_ravg_window);

		void update_task_ravg(struct task_struct p, struct rq rq, int update_sum)
		{
		u32 window_size = sysctl_sched_ravg_window;
		u32 window_size = sched_ravg_window;
		int new_window;
		u64 wallclock = sched_clock();

		if (sched_ravg_window < min_sched_ravg_window)
		return;

		do {
		s64 delta = 0;
		int n;
		u64 now = wallclock;

		new_window = 0;
		delta = now - p->se.ravg.window_start;
		delta = now - p->ravg.window_start;
		BUG_ON(delta < 0);
		if (delta > window_size) {
		p->se.ravg.window_start += window_size;
		now = p->se.ravg.window_start;
		p->ravg.window_start += window_size;
		now = p->ravg.window_start;
		new_window = 1;
		}

		if (update_sum) {
		delta = now - p->se.ravg.mark_start;
		unsigned int cur_freq = rq->cur_freq;

		delta = now - p->ravg.mark_start;
		BUG_ON(delta < 0);

		if (likely(rq->cur_freq &&
		rq->cur_freq <= max_possible_freq))
		delta = div64_u64(delta * rq->cur_freq,
		if (unlikely(cur_freq > max_possible_freq))
		cur_freq = max_possible_freq;

		delta = div64_u64(delta * cur_freq,
		max_possible_freq);
		p->se.ravg.sum += delta;
		WARN_ON(p->se.ravg.sum > window_size);
		p->ravg.sum += delta;
		WARN_ON(p->ravg.sum > window_size);
		}

		if (!new_window)
		break;

		update_history(rq, p, p->se.ravg.sum, 1);
		update_history(rq, p, p->ravg.sum, 1);

		delta = wallclock - p->se.ravg.window_start;
		delta = wallclock - p->ravg.window_start;
		BUG_ON(delta < 0);
		n = div64_u64(delta, window_size);
		if (n) {
		if (!update_sum)
		p->se.ravg.window_start = wallclock;
		p->ravg.window_start = wallclock;
		else
		p->se.ravg.window_start += n * window_size;
		BUG_ON(p->se.ravg.window_start > wallclock);
		p->ravg.window_start += n * window_size;
		BUG_ON(p->ravg.window_start > wallclock);
		if (update_sum)
		update_history(rq, p, window_size, n);
		}
		p->se.ravg.mark_start = p->se.ravg.window_start;
		p->ravg.mark_start = p->ravg.window_start;
		} while (new_window);

		p->se.ravg.mark_start = wallclock;
		p->ravg.mark_start = wallclock;
		}

		/*
		@@ -1670,11 +1688,8 @@ out:

		mnd.src_cpu = src_cpu;
		mnd.dest_cpu = cpu;
		if (sysctl_sched_ravg_window)
		mnd.load = div64_u64((u64)p->se.ravg.demand * 100,
		(u64)(sysctl_sched_ravg_window));
		else
		mnd.load = 0;
		mnd.load = pct_task_load(p);

		/*
		* Call the migration notifier with mnd for foreground task
		* migrations as well as for wakeups if their load is above
		@@ -1760,8 +1775,6 @@ int wake_up_state(struct task_struct *p, unsigned int state)
		*/
		static void __sched_fork(struct task_struct *p)
		{
		int i;

		p->on_rq = 0;

		p->se.on_rq = 0;
		@@ -1770,12 +1783,7 @@ static void __sched_fork(struct task_struct *p)
		p->se.prev_sum_exec_runtime = 0;
		p->se.nr_migrations = 0;
		p->se.vruntime = 0;
		p->se.ravg.sum = 0;
		p->se.ravg.demand = 0;
		p->se.ravg.window_start = 0;
		p->se.ravg.mark_start = 0;
		for (i = 0; i < RAVG_HIST_SIZE; ++i)
		p->se.ravg.sum_history[i] = 0;
		init_new_task_load(p);

		INIT_LIST_HEAD(&p->se.group_node);

		@@ -1920,7 +1928,6 @@ void wake_up_new_task(struct task_struct *p)
		{
		unsigned long flags;
		struct rq *rq;
		u64 wallclock = sched_clock();

		raw_spin_lock_irqsave(&p->pi_lock, flags);
		#ifdef CONFIG_SMP
		@@ -1934,8 +1941,6 @@ void wake_up_new_task(struct task_struct *p)

		rq = __task_rq_lock(p);
		activate_task(rq, p, 0);
		p->se.ravg.window_start = wallclock;
		p->se.ravg.mark_start = wallclock;
		p->on_rq = 1;
		trace_sched_wakeup_new(p, true);
		check_preempt_curr(rq, p, WF_FORK);
		@@ -5074,11 +5079,7 @@ fail:

		mnd.src_cpu = src_cpu;
		mnd.dest_cpu = dest_cpu;
		if (sysctl_sched_ravg_window)
		mnd.load = div64_u64((u64)p->se.ravg.demand * 100,
		(u64)(sysctl_sched_ravg_window));
		else
		mnd.load = 0;
		mnd.load = pct_task_load(p);
		atomic_notifier_call_chain(&migration_notifier_head,
		0, (void *)&mnd);
		}
		@@ -7127,6 +7128,7 @@ static int cpufreq_notifier_trans(struct notifier_block *nb,
		if (val != CPUFREQ_POSTCHANGE)
		return 0;

		BUG_ON(!new_freq);
		cpu_rq(cpu)->cur_freq = new_freq;

		return 0;

kernel/sched/fair.c

+55 −18

Original line number	Diff line number	Diff line
		@@ -1125,6 +1125,39 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
		}
		#endif /* CONFIG_FAIR_GROUP_SCHED */

		static inline unsigned int task_load(struct task_struct *p)
		{
		return p->ravg.demand;
		}

		static inline unsigned int max_task_load(void)
		{
		return sched_ravg_window;
		}

		/* Return task demand in percentage scale */
		unsigned int pct_task_load(struct task_struct *p)
		{
		unsigned int load;

		load = div64_u64((u64)task_load(p) * 100, (u64)max_task_load());

		return load;
		}

		void init_new_task_load(struct task_struct *p)
		{
		int i;
		u64 wallclock = sched_clock();

		p->ravg.sum = 0;
		p->ravg.demand = 0;
		p->ravg.window_start = wallclock;
		p->ravg.mark_start = wallclock;
		for (i = 0; i < RAVG_HIST_SIZE; ++i)
		p->ravg.sum_history[i] = 0;
		}

		/* Only depends on SMP, FAIR_GROUP_SCHED may be removed when useful in lb */
		#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
		/*
		@@ -3882,6 +3915,7 @@ struct lb_env {
		};

		static DEFINE_PER_CPU(bool, dbs_boost_needed);
		static DEFINE_PER_CPU(int, dbs_boost_load_moved);

		/*
		* move_task - move a task from one runqueue to another runqueue.
		@@ -4011,7 +4045,7 @@ int can_migrate_task(struct task_struct p, struct lb_env env)
		*
		* Called with both runqueues locked.
		*/
		static int move_one_task(struct lb_env env, int total_run_moved)
		static int move_one_task(struct lb_env *env)
		{
		struct task_struct p, n;

		@@ -4026,9 +4060,7 @@ static int move_one_task(struct lb_env env, int total_run_moved)
		* stats here rather than inside move_task().
		*/
		schedstat_inc(env->sd, lb_gained[env->idle]);
		if (sysctl_sched_ravg_window)
		total_run_moved += div64_u64((u64)p->se.ravg.demand
		100, (u64)(sysctl_sched_ravg_window));
		per_cpu(dbs_boost_load_moved, env->dst_cpu) += pct_task_load(p);

		return 1;
		}
		@@ -4046,7 +4078,7 @@ static const unsigned int sched_nr_migrate_break = 32;
		*
		* Called with both runqueues locked.
		*/
		static int move_tasks(struct lb_env env, int total_run_moved)
		static int move_tasks(struct lb_env *env)
		{
		struct list_head *tasks = &env->src_rq->cfs_tasks;
		struct task_struct *p;
		@@ -4085,9 +4117,7 @@ static int move_tasks(struct lb_env env, int total_run_moved)
		move_task(p, env);
		pulled++;
		env->imbalance -= load;
		if (sysctl_sched_ravg_window)
		total_run_moved += div64_u64((u64)p->se.ravg.demand
		100, (u64)(sysctl_sched_ravg_window));
		per_cpu(dbs_boost_load_moved, env->dst_cpu) += pct_task_load(p);

		#ifdef CONFIG_PREEMPT
		/*
		@@ -5060,7 +5090,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
		int *balance)
		{
		int ld_moved, cur_ld_moved, active_balance = 0;
		int total_run_moved = 0;
		struct sched_group *group;
		struct rq *busiest = NULL;
		unsigned long flags;
		@@ -5085,6 +5114,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,

		cpumask_copy(cpus, cpu_active_mask);

		per_cpu(dbs_boost_load_moved, this_cpu) = 0;
		schedstat_inc(sd, lb_count[idle]);

		redo:
		@@ -5130,7 +5160,7 @@ more_balance:
		* cur_ld_moved - load moved in current iteration
		* ld_moved - cumulative load moved across iterations
		*/
		cur_ld_moved = move_tasks(&env, &total_run_moved);
		cur_ld_moved = move_tasks(&env);
		ld_moved += cur_ld_moved;
		double_rq_unlock(env.dst_rq, busiest);
		local_irq_restore(flags);
		@@ -5250,13 +5280,16 @@ more_balance:
		if (per_cpu(dbs_boost_needed, this_cpu)) {
		struct migration_notify_data mnd;

		per_cpu(dbs_boost_needed, this_cpu) = false;

		mnd.src_cpu = cpu_of(busiest);
		mnd.dest_cpu = this_cpu;
		mnd.load = total_run_moved;
		mnd.load = per_cpu(dbs_boost_load_moved, this_cpu);
		if (mnd.load > 100)
		mnd.load = 100;
		atomic_notifier_call_chain(&migration_notifier_head,
		0, (void *)&mnd);
		per_cpu(dbs_boost_needed, this_cpu) = false;
		per_cpu(dbs_boost_load_moved, this_cpu) = 0;

		}
		}
		if (likely(!active_balance)) {
		@@ -5364,12 +5397,13 @@ static int active_load_balance_cpu_stop(void *data)
		struct rq *busiest_rq = data;
		int busiest_cpu = cpu_of(busiest_rq);
		int target_cpu = busiest_rq->push_cpu;
		int total_run_moved = 0;
		struct rq *target_rq = cpu_rq(target_cpu);
		struct sched_domain *sd;

		raw_spin_lock_irq(&busiest_rq->lock);

		per_cpu(dbs_boost_load_moved, target_cpu) = 0;

		/* make sure the requested cpu hasn't gone down in the meantime */
		if (unlikely(busiest_cpu != smp_processor_id() \|\|
		!busiest_rq->active_balance))
		@@ -5409,7 +5443,7 @@ static int active_load_balance_cpu_stop(void *data)

		schedstat_inc(sd, alb_count);

		if (move_one_task(&env, &total_run_moved))
		if (move_one_task(&env))
		schedstat_inc(sd, alb_pushed);
		else
		schedstat_inc(sd, alb_failed);
		@@ -5422,13 +5456,16 @@ out_unlock:
		if (per_cpu(dbs_boost_needed, target_cpu)) {
		struct migration_notify_data mnd;

		per_cpu(dbs_boost_needed, target_cpu) = false;

		mnd.src_cpu = cpu_of(busiest_rq);
		mnd.dest_cpu = target_cpu;
		mnd.load = total_run_moved;
		mnd.load = per_cpu(dbs_boost_load_moved, target_cpu);
		if (mnd.load > 100)
		mnd.load = 100;
		atomic_notifier_call_chain(&migration_notifier_head,
		0, (void *)&mnd);

		per_cpu(dbs_boost_needed, target_cpu) = false;
		per_cpu(dbs_boost_load_moved, target_cpu) = 0;
		}
		return 0;
		}

kernel/sched/sched.h

+17 −1

Original line number	Diff line number	Diff line
		@@ -12,7 +12,6 @@

		extern __read_mostly int scheduler_running;

		extern unsigned int sysctl_sched_ravg_window;
		/*
		* Convert user-nice values [ -20 ... 0 ... 19 ]
		* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
		@@ -653,6 +652,23 @@ extern int group_balance_cpu(struct sched_group *sg);
		#include "stats.h"
		#include "auto_group.h"

		extern unsigned int sched_ravg_window;
		extern unsigned int pct_task_load(struct task_struct *p);
		extern void init_new_task_load(struct task_struct *p);

		static inline void
		inc_cumulative_runnable_avg(struct rq rq, struct task_struct p)
		{
		rq->cumulative_runnable_avg += p->ravg.demand;
		}

		static inline void
		dec_cumulative_runnable_avg(struct rq rq, struct task_struct p)
		{
		rq->cumulative_runnable_avg -= p->ravg.demand;
		BUG_ON((s64)rq->cumulative_runnable_avg < 0);
		}

		#ifdef CONFIG_CGROUP_SCHED

		/*