sched: improve logic for alerting governor (86df7337) · Commits · e / devices / samsung / android_kernel_samsung_msm8916

include/linux/sched/sysctl.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -41,6 +41,7 @@ extern unsigned int sysctl_sched_window_stats_policy;
		extern unsigned int sysctl_sched_account_wait_time;
		extern unsigned int sysctl_sched_ravg_hist_size;
		extern unsigned int sysctl_sched_freq_legacy_mode;
		extern unsigned int sysctl_sched_gov_response_time;

		#if defined(CONFIG_SCHED_FREQ_INPUT) \|\| defined(CONFIG_SCHED_HMP)
		extern unsigned int sysctl_sched_init_task_load_pct;

kernel/sched/core.c

+162 −53

Original line number	Diff line number	Diff line
		@@ -1167,6 +1167,12 @@ __read_mostly int sysctl_sched_freq_dec_notify_slack_pct = INT_MAX;

		static __read_mostly unsigned int sched_io_is_busy;

		/*
		* Force-issue notification to governor if we waited long enough since sending
		* last notification and did not see any freq change.
		*/
		__read_mostly unsigned int sysctl_sched_gov_response_time = 10000000;

		/*
		* Maximum possible frequency across all cpus. Task demand and cpu
		* capacity (cpu_power) metrics are scaled in reference to it.
		@@ -1193,33 +1199,119 @@ static u64 sched_clock_at_init_jiffy;
		#define PREV_WINDOW_CONTRIB 2
		#define DONT_ACCOUNT 4

		/* Returns how undercommitted a CPU is given its current frequency and
		* task load (as measured in the previous window). Returns this value
		* as a percentage of the CPU's maximum frequency. A negative value
		* means the CPU is overcommitted at its current frequency.
		#ifdef CONFIG_SCHED_FREQ_INPUT

		/* Does freq_required sufficiently exceed or fall behind cur_freq? */
		static inline int
		nearly_same_freq(unsigned int cur_freq, unsigned int freq_required)
		{
		int margin;

		margin = cur_freq - freq_required;
		margin *= 100;
		margin /= (int)cur_freq;

		/*
		* + margin implies cur_freq > req_freq
		* - margin implies cur_freq < req_freq
		*/
		int rq_freq_margin(struct rq *rq)

		return (margin > sysctl_sched_freq_inc_notify_slack_pct &&
		margin < sysctl_sched_freq_dec_notify_slack_pct);
		}

		/* Is governor late in responding? */
		static inline int freq_request_timeout(struct rq *rq)
		{
		u64 now = sched_clock();

		return ((now - rq->freq_requested_ts) > sysctl_sched_gov_response_time);
		}

		/* Should scheduler alert governor for changing frequency? */
		static int send_notification(struct rq *rq, unsigned int freq_required)
		{
		int cpu, rc = 0;
		unsigned int freq_requested = rq->freq_requested;
		struct rq *domain_rq;
		unsigned long flags;

		if (freq_required > rq->max_freq)
		freq_required = rq->max_freq;
		else if (freq_required < rq->min_freq)
		freq_required = rq->min_freq;

		if (nearly_same_freq(rq->cur_freq, freq_required))
		return 0;

		if (freq_requested && nearly_same_freq(freq_requested, freq_required) &&
		!freq_request_timeout(rq))
		return 0;

		cpu = cpumask_first(&rq->freq_domain_cpumask);
		if (cpu >= nr_cpu_ids)
		return 0;

		domain_rq = cpu_rq(cpu);
		raw_spin_lock_irqsave(&domain_rq->lock, flags);
		freq_requested = domain_rq->freq_requested;
		if (!freq_requested \|\|
		!nearly_same_freq(freq_requested, freq_required) \|\|
		freq_request_timeout(domain_rq)) {

		u64 now = sched_clock();

		/*
		* Cache the new frequency requested in rq of all cpus that are
		* in same freq domain. This saves frequent grabbing of
		* domain_rq->lock
		*/
		for_each_cpu(cpu, &rq->freq_domain_cpumask) {
		cpu_rq(cpu)->freq_requested = freq_required;
		cpu_rq(cpu)->freq_requested_ts = now;
		}
		rc = 1;
		}
		raw_spin_unlock_irqrestore(&domain_rq->lock, flags);

		return rc;
		}

		/* Alert governor if there is a need to change frequency */
		void check_for_freq_change(struct rq *rq)
		{
		unsigned int freq_required;
		int margin;
		u64 demand;
		int i, max_demand_cpu = 0;
		u64 max_demand = 0;

		if (!sched_enable_hmp)
		return INT_MAX;
		return;

		/* Find out max demand across cpus in same frequency domain */
		for_each_cpu(i, &rq->freq_domain_cpumask) {
		if (cpu_rq(i)->prev_runnable_sum > max_demand) {
		max_demand = cpu_rq(i)->prev_runnable_sum;
		max_demand_cpu = i;
		}
		}

		demand = scale_load_to_cpu(rq->prev_runnable_sum, rq->cpu);
		demand *= 128;
		demand = div64_u64(demand, max_task_load());
		max_demand = scale_load_to_cpu(max_demand, rq->cpu);
		max_demand *= 128;
		max_demand = div64_u64(max_demand, max_task_load());

		freq_required = demand * rq->max_possible_freq;
		freq_required = max_demand * rq->max_possible_freq;
		freq_required /= 128;

		margin = rq->cur_freq - freq_required;
		margin *= 100;
		margin /= (int)rq->max_possible_freq;
		return margin;
		if (!send_notification(rq, freq_required))
		return;

		atomic_notifier_call_chain(
		&load_alert_notifier_head, 0,
		(void *)(long)max_demand_cpu);
		}

		#endif /* CONFIG_SCHED_FREQ_INPUT */

		/*
		* Called when new window is starting for a task, to record cpu usage over
		* recently concluded window(s). Normally 'samples' should be 1. It can be > 1
		@@ -1798,6 +1890,7 @@ unsigned long sched_get_busy(int cpu)
		{
		unsigned long flags;
		struct rq *rq = cpu_rq(cpu);
		u64 load;

		/*
		* This function could be called in timer context, and the
		@@ -1808,8 +1901,17 @@ unsigned long sched_get_busy(int cpu)
		update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0);
		raw_spin_unlock_irqrestore(&rq->lock, flags);

		return div64_u64(scale_load_to_cpu(rq->prev_runnable_sum, cpu),
		NSEC_PER_USEC);
		/*
		* Scale load in reference to rq->max_possible_freq.
		*
		* Note that scale_load_to_cpu() scales load in reference to
		* rq->max_freq
		*/
		load = scale_load_to_cpu(rq->prev_runnable_sum, cpu);
		load = div64_u64(load * (u64)rq->max_freq, (u64)rq->max_possible_freq);
		load = div64_u64(load, NSEC_PER_USEC);

		return load;
		}

		void sched_set_io_is_busy(int val)
		@@ -2038,6 +2140,23 @@ static int cpufreq_notifier_trans(struct notifier_block *nb,
		cpu_rq(cpu)->cur_freq = new_freq;
		raw_spin_unlock_irqrestore(&rq->lock, flags);

		/* clear freq request for CPUs in the same freq domain */
		if (!rq->freq_requested)
		return 0;

		/* The first CPU (and its rq lock) in a freq domain is used to
		* serialize all freq change tests and notifications for CPUs
		* in that domain. */
		cpu = cpumask_first(&rq->freq_domain_cpumask);
		if (cpu >= nr_cpu_ids)
		return 0;

		rq = cpu_rq(cpu);
		raw_spin_lock_irqsave(&rq->lock, flags);
		for_each_cpu(cpu, &rq->freq_domain_cpumask)
		cpu_rq(cpu)->freq_requested = 0;
		raw_spin_unlock_irqrestore(&rq->lock, flags);

		return 0;
		}

		@@ -2079,7 +2198,6 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
		struct rq *src_rq = task_rq(p);
		struct rq *dest_rq = cpu_rq(new_cpu);
		u64 wallclock;
		int freq_notify = 0;

		if (p->state == TASK_WAKING)
		double_rq_lock(src_rq, dest_rq);
		@@ -2087,7 +2205,6 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
		if (sched_disable_window_stats)
		goto done;

		freq_notify = 1;
		wallclock = sched_clock();

		update_task_ravg(task_rq(p)->curr, task_rq(p),
		@@ -2142,29 +2259,25 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
		done:
		if (p->state == TASK_WAKING)
		double_rq_unlock(src_rq, dest_rq);
		}

		if (!freq_notify && cpumask_test_cpu(new_cpu,
		&src_rq->freq_domain_cpumask))
		return;

		/* Evaluate possible frequency notifications for
		* source and destination CPUs in different frequency
		* domains. */
		if (rq_freq_margin(dest_rq) <
		sysctl_sched_freq_inc_notify_slack_pct)
		atomic_notifier_call_chain(
		&load_alert_notifier_head, 0,
		(void *)(long)new_cpu);
		/* A long sleep is defined as sleeping at least one full window prior
		* to the current window start. */
		static inline int is_long_sleep(struct rq rq, struct task_struct p)
		{
		if (p->ravg.mark_start > rq->window_start)
		return 0;

		if (rq_freq_margin(src_rq) >
		sysctl_sched_freq_dec_notify_slack_pct)
		atomic_notifier_call_chain(
		&load_alert_notifier_head, 0,
		(void *)(long)task_cpu(p));
		return ((rq->window_start - p->ravg.mark_start) > sched_ravg_window);
		}

		#else /* CONFIG_SCHED_FREQ_INPUT \|\| CONFIG_SCHED_HMP */

		static inline int is_long_sleep(struct rq rq, struct task_struct p)
		{
		return 0;
		}

		static inline void
		update_task_ravg(struct task_struct p, struct rq rq,
		int event, u64 wallclock, u64 irqtime)
		@@ -2714,6 +2827,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
		unsigned long src_cpu;
		int notify = 0;
		struct migration_notify_data mnd;
		int long_sleep = 0;
		#ifdef CONFIG_SMP
		struct rq *rq;
		u64 wallclock;
		@@ -2754,6 +2868,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
		raw_spin_lock(&rq->lock);
		wallclock = sched_clock();
		update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
		long_sleep = is_long_sleep(rq, p);
		update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
		raw_spin_unlock(&rq->lock);

		@@ -2770,14 +2885,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
		if (src_cpu != cpu) {
		wake_flags \|= WF_MIGRATED;
		set_task_cpu(p, cpu);
		} else {
		#ifdef CONFIG_SCHED_FREQ_INPUT
		if (rq_freq_margin(cpu_rq(cpu)) <
		sysctl_sched_freq_inc_notify_slack_pct)
		atomic_notifier_call_chain(
		&load_alert_notifier_head, 0,
		(void *)(long)cpu);
		#endif
		}
		#endif /* CONFIG_SMP */

		@@ -2809,6 +2916,11 @@ out:
		atomic_notifier_call_chain(&migration_notifier_head,
		0, (void *)&mnd);

		if (long_sleep \|\| !same_freq_domain(src_cpu, cpu))
		check_for_freq_change(cpu_rq(cpu));
		if (!long_sleep && !same_freq_domain(src_cpu, cpu))
		check_for_freq_change(cpu_rq(src_cpu));

		return success;
		}

		@@ -3052,13 +3164,6 @@ void wake_up_new_task(struct task_struct *p)

		rq = __task_rq_lock(p);
		mark_task_starting(p);
		#ifdef CONFIG_SCHED_FREQ_INPUT
		if (rq_freq_margin(task_rq(p)) <
		sysctl_sched_freq_inc_notify_slack_pct)
		atomic_notifier_call_chain(
		&load_alert_notifier_head, 0,
		(void *)(long)task_cpu(p));
		#endif
		activate_task(rq, p, 0);
		p->on_rq = 1;
		trace_sched_wakeup_new(p, true);
		@@ -3068,6 +3173,8 @@ void wake_up_new_task(struct task_struct *p)
		p->sched_class->task_woken(rq, p);
		#endif
		task_rq_unlock(rq, p, &flags);
		if (init_task_load)
		check_for_freq_change(rq);
		}

		#ifdef CONFIG_PREEMPT_NOTIFIERS
		@@ -8617,6 +8724,8 @@ void __init sched_init(void)
		rq->capacity = 1024;
		rq->load_scale_factor = 1024;
		rq->window_start = 0;
		rq->freq_requested = 0;
		rq->freq_requested_ts = 0;
		#endif
		#ifdef CONFIG_SCHED_HMP
		rq->nr_small_tasks = rq->nr_big_tasks = 0;

kernel/sched/fair.c

+6 −0

Original line number	Diff line number	Diff line
		@@ -6553,6 +6553,12 @@ more_balance:
		per_cpu(dbs_boost_load_moved, this_cpu) = 0;

		}

		/* Assumes one 'busiest' cpu that we pulled tasks from */
		if (!same_freq_domain(this_cpu, cpu_of(busiest))) {
		check_for_freq_change(this_rq);
		check_for_freq_change(busiest);
		}
		}
		if (likely(!active_balance)) {
		/* We were unbalanced, so reset the balancing interval */

kernel/sched/sched.h

+35 −0

Original line number	Diff line number	Diff line
		@@ -490,6 +490,8 @@ struct rq {
		*/
		unsigned int cur_freq, max_freq, min_freq, max_possible_freq;
		struct cpumask freq_domain_cpumask;
		unsigned int freq_requested;
		u64 freq_requested_ts;

		u64 cumulative_runnable_avg;
		int efficiency; /* Differentiate cpus with different IPC capability */
		@@ -773,6 +775,39 @@ static inline void sched_account_irqtime(int cpu, struct task_struct *curr,

		#endif /* CONFIG_SCHED_FREQ_INPUT \|\| CONFIG_SCHED_HMP */

		#ifdef CONFIG_SCHED_FREQ_INPUT
		extern void check_for_freq_change(struct rq *rq);

		/* Is frequency of two cpus synchronized with each other? */
		static inline int same_freq_domain(int src_cpu, int dst_cpu)
		{
		struct rq *rq = cpu_rq(src_cpu);

		if (src_cpu == dst_cpu)
		return 1;

		return cpumask_test_cpu(dst_cpu, &rq->freq_domain_cpumask);
		}

		#ifdef CONFIG_SCHED_HMP
		#define init_task_load sysctl_sched_init_task_load_pct
		#else
		#define init_task_load 0
		#endif

		#else /* CONFIG_SCHED_FREQ_INPUT */

		#define init_task_load 0

		static inline void check_for_freq_change(struct rq *rq) { }

		static inline int same_freq_domain(int src_cpu, int dst_cpu)
		{
		return 1;
		}

		#endif /* CONFIG_SCHED_FREQ_INPUT */

		#ifdef CONFIG_SCHED_HMP

		#define BOOST_KICK 0

kernel/sysctl.c

+7 −0

Original line number	Diff line number	Diff line
		@@ -339,6 +339,13 @@ static struct ctl_table kern_table[] = {
		.mode = 0644,
		.proc_handler = sched_window_update_handler,
		},
		{
		.procname = "sched_gov_response_time",
		.data = &sysctl_sched_gov_response_time,
		.maxlen = sizeof(unsigned int),
		.mode = 0644,
		.proc_handler = proc_dointvec,
		},
		{
		.procname = "sched_wakeup_load_threshold",
		.data = &sysctl_sched_wakeup_load_threshold,