sched: account new task load so that governor can apply different policy (67ddb178) · Commits · e / devices / android_kernel_xiaomi_markw

drivers/cpufreq/cpufreq_interactive.c

+12 −11

Original line number	Diff line number	Diff line
		@@ -52,7 +52,7 @@ struct cpufreq_interactive_policyinfo {
		bool reject_notification;
		int governor_enabled;
		struct cpufreq_interactive_tunables *cached_tunables;
		unsigned long *cpu_busy_times;
		struct sched_load *sl;
		};

		/* Protected by per-policy load_lock */
		@@ -447,6 +447,7 @@ static void __cpufreq_interactive_timer(unsigned long data, bool is_notif)
		unsigned long flags;
		unsigned long max_cpu;
		int i, fcpu;
		struct sched_load *sl;
		struct cpufreq_govinfo govinfo;
		bool skip_hispeed_logic, skip_min_sample_time;
		bool policy_max_fast_restore = false;
		@@ -462,14 +463,14 @@ static void __cpufreq_interactive_timer(unsigned long data, bool is_notif)
		ppol->last_evaluated_jiffy = get_jiffies_64();

		if (tunables->use_sched_load)
		sched_get_cpus_busy(ppol->cpu_busy_times,
		ppol->policy->related_cpus);
		sched_get_cpus_busy(ppol->sl, ppol->policy->related_cpus);
		max_cpu = cpumask_first(ppol->policy->cpus);
		for_each_cpu(i, ppol->policy->cpus) {
		pcpu = &per_cpu(cpuinfo, i);
		sl = &ppol->sl[i - fcpu];
		if (tunables->use_sched_load) {
		cputime_speedadj = (u64)ppol->cpu_busy_times[i - fcpu]
		* ppol->policy->cpuinfo.max_freq;
		cputime_speedadj = (u64)sl->prev_load *
		ppol->policy->cpuinfo.max_freq;
		do_div(cputime_speedadj, tunables->timer_rate);
		} else {
		now = update_load(i);
		@@ -1503,7 +1504,7 @@ static struct cpufreq_interactive_policyinfo *get_policyinfo(
		struct cpufreq_interactive_policyinfo *ppol =
		per_cpu(polinfo, policy->cpu);
		int i;
		unsigned long *busy;
		struct sched_load *sl;

		/* polinfo already allocated for policy, return */
		if (ppol)
		@@ -1513,13 +1514,13 @@ static struct cpufreq_interactive_policyinfo *get_policyinfo(
		if (!ppol)
		return ERR_PTR(-ENOMEM);

		busy = kcalloc(cpumask_weight(policy->related_cpus), sizeof(*busy),
		sl = kcalloc(cpumask_weight(policy->related_cpus), sizeof(*sl),
		GFP_KERNEL);
		if (!busy) {
		if (!sl) {
		kfree(ppol);
		return ERR_PTR(-ENOMEM);
		}
		ppol->cpu_busy_times = busy;
		ppol->sl = sl;

		init_timer_deferrable(&ppol->policy_timer);
		ppol->policy_timer.function = cpufreq_interactive_timer;
		@@ -1547,7 +1548,7 @@ static void free_policyinfo(int cpu)
		if (per_cpu(polinfo, j) == ppol)
		per_cpu(polinfo, cpu) = NULL;
		kfree(ppol->cached_tunables);
		kfree(ppol->cpu_busy_times);
		kfree(ppol->sl);
		kfree(ppol);
		}

include/linux/sched.h

+10 −3

Original line number	Diff line number	Diff line
		@@ -1167,6 +1167,7 @@ struct ravg {
		u32 sum_history[RAVG_HIST_SIZE_MAX];
		#ifdef CONFIG_SCHED_FREQ_INPUT
		u32 curr_window, prev_window;
		u16 active_windows;
		#endif
		};

		@@ -1963,10 +1964,16 @@ extern void thread_group_cputime_adjusted(struct task_struct p, cputime_t ut,

		extern int task_free_register(struct notifier_block *n);
		extern int task_free_unregister(struct notifier_block *n);

		struct sched_load {
		unsigned long prev_load;
		unsigned long new_task_load;
		};

		#if defined(CONFIG_SCHED_FREQ_INPUT)
		extern int sched_set_window(u64 window_start, unsigned int window_size);
		extern unsigned long sched_get_busy(int cpu);
		extern void sched_get_cpus_busy(unsigned long *busy,
		extern void sched_get_cpus_busy(struct sched_load *busy,
		const struct cpumask *query_cpus);
		extern void sched_set_io_is_busy(int val);
		#ifdef CONFIG_SCHED_QHMP
		@@ -1986,7 +1993,7 @@ static inline unsigned long sched_get_busy(int cpu)
		{
		return 0;
		}
		static inline void sched_get_cpus_busy(unsigned long *busy,
		static inline void sched_get_cpus_busy(struct sched_load *busy,
		const struct cpumask *query_cpus) {};
		static inline void sched_set_io_is_busy(int val) {};

include/linux/sched/sysctl.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -74,6 +74,9 @@ extern unsigned int sysctl_sched_small_task_pct;
		#else
		extern unsigned int sysctl_sched_lowspill_freq;
		extern unsigned int sysctl_sched_pack_freq;
		#if defined(CONFIG_SCHED_FREQ_INPUT)
		extern unsigned int sysctl_sched_new_task_windows;
		#endif
		#endif

		#else /* CONFIG_SCHED_HMP */

include/trace/events/sched.h

+23 −8

Original line number	Diff line number	Diff line
		@@ -247,6 +247,9 @@ TRACE_EVENT(sched_update_task_ravg,
		__field( u64, ps )
		__field( u32, curr_window )
		__field( u32, prev_window )
		__field( u64, nt_cs )
		__field( u64, nt_ps )
		__field( u32, active_windows )
		#endif
		),

		@@ -270,12 +273,15 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->ps = rq->prev_runnable_sum;
		__entry->curr_window = p->ravg.curr_window;
		__entry->prev_window = p->ravg.prev_window;
		__entry->nt_cs = rq->nt_curr_runnable_sum;
		__entry->nt_ps = rq->nt_prev_runnable_sum;
		__entry->active_windows = p->ravg.active_windows;
		#endif
		),

		TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
		#ifdef CONFIG_SCHED_FREQ_INPUT
		" cs %llu ps %llu cur_window %u prev_window %u"
		" cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u"
		#endif
		, __entry->wallclock, __entry->win_start, __entry->delta,
		task_event_names[__entry->evt], __entry->cpu,
		@@ -285,7 +291,9 @@ TRACE_EVENT(sched_update_task_ravg,
		__entry->sum, __entry->irqtime
		#ifdef CONFIG_SCHED_FREQ_INPUT
		, __entry->cs, __entry->ps, __entry->curr_window,
		__entry->prev_window
		__entry->prev_window,
		__entry->nt_cs, __entry->nt_ps,
		__entry->active_windows
		#endif
		)
		);
		@@ -377,37 +385,44 @@ TRACE_EVENT(sched_migration_update_sum,
		__field(int, pid )
		__field( u64, cs )
		__field( u64, ps )
		__field( s64, nt_cs )
		__field( s64, nt_ps )
		),

		TP_fast_assign(
		__entry->cpu = cpu_of(rq);
		__entry->cs = rq->curr_runnable_sum;
		__entry->ps = rq->prev_runnable_sum;
		__entry->nt_cs = (s64)rq->nt_curr_runnable_sum;
		__entry->nt_ps = (s64)rq->nt_prev_runnable_sum;
		__entry->pid = p->pid;
		),

		TP_printk("cpu %d: cs %llu ps %llu pid %d", __entry->cpu,
		__entry->cs, __entry->ps, __entry->pid)
		TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d",
		__entry->cpu, __entry->cs, __entry->ps,
		__entry->nt_cs, __entry->nt_ps, __entry->pid)
		);

		TRACE_EVENT(sched_get_busy,

		TP_PROTO(int cpu, u64 load),
		TP_PROTO(int cpu, u64 load, u64 nload),

		TP_ARGS(cpu, load),
		TP_ARGS(cpu, load, nload),

		TP_STRUCT__entry(
		__field( int, cpu )
		__field( u64, load )
		__field( u64, nload )
		),

		TP_fast_assign(
		__entry->cpu = cpu;
		__entry->load = load;
		__entry->nload = nload;
		),

		TP_printk("cpu %d load %lld",
		__entry->cpu, __entry->load)
		TP_printk("cpu %d load %lld new_task_load %lld",
		__entry->cpu, __entry->load, __entry->nload)
		);

		TRACE_EVENT(sched_freq_alert,

kernel/sched/core.c

+86 −11

Original line number	Diff line number	Diff line
		@@ -1229,6 +1229,8 @@ static __read_mostly unsigned int sched_window_stats_policy =
		__read_mostly unsigned int sysctl_sched_window_stats_policy =
		WINDOW_STATS_MAX_RECENT_AVG;

		__read_mostly unsigned int sysctl_sched_new_task_windows = 5;

		static __read_mostly unsigned int sched_account_wait_time = 1;
		__read_mostly unsigned int sysctl_sched_account_wait_time = 1;

		@@ -1472,6 +1474,11 @@ heavy_task_wakeup(struct task_struct p, struct rq rq, int event)
		return (rq->window_start - p->ravg.mark_start > sched_ravg_window);
		}

		static inline bool is_new_task(struct task_struct *p)
		{
		return p->ravg.active_windows < sysctl_sched_new_task_windows;
		}

		/*
		* Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
		*/
		@@ -1484,11 +1491,17 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		u64 window_start = rq->window_start;
		u32 window_size = sched_ravg_window;
		u64 delta;
		bool new_task;

		new_window = mark_start < window_start;
		if (new_window)
		if (new_window) {
		nr_full_windows = div64_u64((window_start - mark_start),
		window_size);
		if (p->ravg.active_windows < USHRT_MAX)
		p->ravg.active_windows++;
		}

		new_task = is_new_task(p);

		/* Handle per-task window rollover. We don't care about the idle
		* task or exiting tasks. */
		@@ -1519,14 +1532,18 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		/* A new window has started. The RQ demand must be rolled
		* over if p is the current task. */
		if (p_is_curr_task) {
		u64 prev_sum = 0;
		u64 prev_sum = 0, nt_prev_sum = 0;

		/* p is either idle task or an exiting task */
		if (!nr_full_windows)
		if (!nr_full_windows) {
		prev_sum = rq->curr_runnable_sum;
		nt_prev_sum = rq->nt_curr_runnable_sum;
		}

		rq->prev_runnable_sum = prev_sum;
		rq->curr_runnable_sum = 0;
		rq->nt_prev_runnable_sum = nt_prev_sum;
		rq->nt_curr_runnable_sum = 0;

		} else if (heavy_task_wakeup(p, rq, event)) {
		/* A new window has started. If p is a waking
		@@ -1538,6 +1555,8 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		* tunable. */
		p->ravg.prev_window = p->ravg.demand;
		rq->prev_runnable_sum += p->ravg.demand;
		if (new_task)
		rq->nt_prev_runnable_sum += p->ravg.demand;
		}

		return;
		@@ -1556,6 +1575,8 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		delta = irqtime;
		delta = scale_exec_time(delta, rq);
		rq->curr_runnable_sum += delta;
		if (new_task)
		rq->nt_curr_runnable_sum += delta;
		if (!is_idle_task(p) && !exiting_task(p))
		p->ravg.curr_window += delta;

		@@ -1589,10 +1610,14 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		p->ravg.prev_window = delta;
		}
		rq->prev_runnable_sum += delta;
		if (new_task)
		rq->nt_prev_runnable_sum += delta;

		/* Account piece of busy time in the current window. */
		delta = scale_exec_time(wallclock - window_start, rq);
		rq->curr_runnable_sum += delta;
		if (new_task)
		rq->nt_curr_runnable_sum += delta;
		if (!exiting_task(p))
		p->ravg.curr_window = delta;

		@@ -1618,6 +1643,11 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		delta = scale_exec_time(window_start - mark_start, rq);
		if (!is_idle_task(p) && !exiting_task(p))
		p->ravg.prev_window += delta;

		rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum;
		if (new_task)
		rq->nt_prev_runnable_sum += delta;

		delta += rq->curr_runnable_sum;
		} else {
		/* Since at least one full window has elapsed,
		@@ -1626,14 +1656,27 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		delta = scale_exec_time(window_size, rq);
		if (!is_idle_task(p) && !exiting_task(p))
		p->ravg.prev_window = delta;

		if (new_task)
		rq->nt_prev_runnable_sum = delta;
		else
		rq->nt_prev_runnable_sum = 0;
		}
		/* Rollover is done here by overwriting the values in
		* prev_runnable_sum and curr_runnable_sum. */
		/*
		* Rollover for normal runnable sum is done here by overwriting
		* the values in prev_runnable_sum and curr_runnable_sum.
		* Rollover for new task runnable sum has completed by previous
		* if-else statement.
		*/
		rq->prev_runnable_sum = delta;

		/* Account piece of busy time in the current window. */
		delta = scale_exec_time(wallclock - window_start, rq);
		rq->curr_runnable_sum = delta;
		if (new_task)
		rq->nt_curr_runnable_sum = delta;
		else
		rq->nt_curr_runnable_sum = 0;
		if (!is_idle_task(p) && !exiting_task(p))
		p->ravg.curr_window = delta;

		@@ -1657,6 +1700,8 @@ static void update_cpu_busy_time(struct task_struct p, struct rq rq,
		/* Roll window over. If IRQ busy time was just in the current
		* window then that is all that need be accounted. */
		rq->prev_runnable_sum = rq->curr_runnable_sum;
		rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum;
		rq->nt_curr_runnable_sum = 0;
		if (mark_start > window_start) {
		rq->curr_runnable_sum = scale_exec_time(irqtime, rq);
		return;
		@@ -2083,6 +2128,7 @@ static inline void set_window_start(struct rq *rq)
		rq->window_start = cpu_rq(sync_cpu)->window_start;
		#ifdef CONFIG_SCHED_FREQ_INPUT
		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
		rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
		#endif
		raw_spin_unlock(&sync_rq->lock);
		}
		@@ -2215,6 +2261,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
		rq->window_start = window_start;
		#ifdef CONFIG_SCHED_FREQ_INPUT
		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
		rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
		#endif
		reset_cpu_hmp_stats(cpu, 1);

		@@ -2272,12 +2319,13 @@ scale_load_to_freq(u64 load, unsigned int src_freq, unsigned int dst_freq)
		return div64_u64(load * (u64)src_freq, (u64)dst_freq);
		}

		void sched_get_cpus_busy(unsigned long busy, const struct cpumask query_cpus)
		void sched_get_cpus_busy(struct sched_load *busy,
		const struct cpumask *query_cpus)
		{
		unsigned long flags;
		struct rq *rq;
		const int cpus = cpumask_weight(query_cpus);
		u64 load[cpus];
		u64 load[cpus], nload[cpus];
		unsigned int cur_freq[cpus], max_freq[cpus];
		int notifier_sent[cpus];
		int cpu, i = 0;
		@@ -2302,6 +2350,7 @@ void sched_get_cpus_busy(unsigned long busy, const struct cpumask query_cpus)

		update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0);
		load[i] = rq->old_busy_time = rq->prev_runnable_sum;
		nload[i] = rq->nt_prev_runnable_sum;
		/*
		* Scale load in reference to rq->max_possible_freq.
		*
		@@ -2309,6 +2358,7 @@ void sched_get_cpus_busy(unsigned long busy, const struct cpumask query_cpus)
		* rq->max_freq.
		*/
		load[i] = scale_load_to_cpu(load[i], cpu);
		nload[i] = scale_load_to_cpu(nload[i], cpu);

		notifier_sent[i] = rq->notifier_sent;
		rq->notifier_sent = 0;
		@@ -2328,18 +2378,29 @@ void sched_get_cpus_busy(unsigned long busy, const struct cpumask query_cpus)
		if (!notifier_sent[i]) {
		load[i] = scale_load_to_freq(load[i], max_freq[i],
		cur_freq[i]);
		nload[i] = scale_load_to_freq(nload[i], max_freq[i],
		cur_freq[i]);
		if (load[i] > window_size)
		load[i] = window_size;
		if (nload[i] > window_size)
		nload[i] = window_size;

		load[i] = scale_load_to_freq(load[i], cur_freq[i],
		rq->max_possible_freq);
		nload[i] = scale_load_to_freq(nload[i], cur_freq[i],
		rq->max_possible_freq);
		} else {
		load[i] = scale_load_to_freq(load[i], max_freq[i],
		rq->max_possible_freq);
		nload[i] = scale_load_to_freq(nload[i], max_freq[i],
		rq->max_possible_freq);
		}

		busy[i] = div64_u64(load[i], NSEC_PER_USEC);
		busy[i].prev_load = div64_u64(load[i], NSEC_PER_USEC);
		busy[i].new_task_load = div64_u64(nload[i], NSEC_PER_USEC);

		trace_sched_get_busy(cpu, busy[i]);
		trace_sched_get_busy(cpu, busy[i].prev_load,
		busy[i].new_task_load);
		i++;
		}
		}
		@@ -2347,12 +2408,12 @@ void sched_get_cpus_busy(unsigned long busy, const struct cpumask query_cpus)
		unsigned long sched_get_busy(int cpu)
		{
		struct cpumask query_cpu = CPU_MASK_NONE;
		unsigned long busy;
		struct sched_load busy;

		cpumask_set_cpu(cpu, &query_cpu);
		sched_get_cpus_busy(&busy, &query_cpu);

		return busy;
		return busy.prev_load;
		}

		void sched_set_io_is_busy(int val)
		@@ -2402,6 +2463,7 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
		struct rq *src_rq = task_rq(p);
		struct rq *dest_rq = cpu_rq(new_cpu);
		u64 wallclock;
		bool new_task;

		if (!sched_enable_hmp \|\| !sched_migration_fixup \|\|
		exiting_task(p) \|\| (!p->on_rq && p->state != TASK_WAKING))
		@@ -2424,18 +2486,30 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
		update_task_ravg(p, task_rq(p), TASK_MIGRATE,
		wallclock, 0);

		new_task = is_new_task(p);

		if (p->ravg.curr_window) {
		src_rq->curr_runnable_sum -= p->ravg.curr_window;
		dest_rq->curr_runnable_sum += p->ravg.curr_window;
		if (new_task) {
		src_rq->nt_curr_runnable_sum -= p->ravg.curr_window;
		dest_rq->nt_curr_runnable_sum += p->ravg.curr_window;
		}
		}

		if (p->ravg.prev_window) {
		src_rq->prev_runnable_sum -= p->ravg.prev_window;
		dest_rq->prev_runnable_sum += p->ravg.prev_window;
		if (new_task) {
		src_rq->nt_prev_runnable_sum -= p->ravg.prev_window;
		dest_rq->nt_prev_runnable_sum += p->ravg.prev_window;
		}
		}

		BUG_ON((s64)src_rq->prev_runnable_sum < 0);
		BUG_ON((s64)src_rq->curr_runnable_sum < 0);
		BUG_ON((s64)src_rq->nt_prev_runnable_sum < 0);
		BUG_ON((s64)src_rq->nt_curr_runnable_sum < 0);

		trace_sched_migration_update_sum(src_rq, p);
		trace_sched_migration_update_sum(dest_rq, p);
		@@ -9064,6 +9138,7 @@ void __init sched_init(void)

		#ifdef CONFIG_SCHED_FREQ_INPUT
		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
		rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
		rq->old_busy_time = 0;
		rq->notifier_sent = 0;
		#endif