cpufreq: Drop schedfreq governor (e0907557) · Commits · e / devices / android_kernel_fairphone_FP3

drivers/cpufreq/Kconfig

+0 −21

Original line number	Diff line number	Diff line
		@@ -102,14 +102,6 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
		governor. If unsure have a look at the help section of the
		driver. Fallback governor will be the performance governor.

		config CPU_FREQ_DEFAULT_GOV_SCHED
		bool "sched"
		select CPU_FREQ_GOV_SCHED
		help
		Use the CPUfreq governor 'sched' as default. This scales
		cpu frequency using CPU utilization estimates from the
		scheduler.

		config CPU_FREQ_DEFAULT_GOV_INTERACTIVE
		bool "interactive"
		select CPU_FREQ_GOV_INTERACTIVE
		@@ -211,19 +203,6 @@ config CPU_FREQ_GOV_CONSERVATIVE

		If in doubt, say N.

		config CPU_FREQ_GOV_SCHED
		bool "'sched' cpufreq governor"
		depends on CPU_FREQ
		depends on SMP
		select CPU_FREQ_GOV_COMMON
		help
		'sched' - this governor scales cpu frequency from the
		scheduler as a function of cpu capacity utilization. It does
		not evaluate utilization on a periodic basis (as ondemand
		does) but instead is event-driven by the scheduler.

		If in doubt, say N.

		config CPU_FREQ_GOV_INTERACTIVE
		tristate "'interactive' cpufreq policy governor"
		depends on CPU_FREQ

include/linux/sched/sysctl.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -19,7 +19,6 @@ extern unsigned int sysctl_sched_min_granularity;
		extern unsigned int sysctl_sched_wakeup_granularity;
		extern unsigned int sysctl_sched_child_runs_first;
		extern unsigned int sysctl_sched_sync_hint_enable;
		extern unsigned int sysctl_sched_initial_task_util;
		extern unsigned int sysctl_sched_cstate_aware;
		#ifdef CONFIG_SCHED_WALT
		extern unsigned int sysctl_sched_use_walt_cpu_util;

kernel/sched/Makefile

+0 −1

Original line number	Diff line number	Diff line
		@@ -26,5 +26,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o
		obj-$(CONFIG_SCHED_TUNE) += tune.o
		obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
		obj-$(CONFIG_CPU_FREQ) += cpufreq.o
		obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o
		obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o

kernel/sched/core.c

+0 −93

Original line number	Diff line number	Diff line
		@@ -3142,98 +3142,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
		return ns;
		}

		#ifdef CONFIG_CPU_FREQ_GOV_SCHED

		static inline
		unsigned long add_capacity_margin(unsigned long cpu_capacity)
		{
		cpu_capacity = cpu_capacity * capacity_margin;
		cpu_capacity /= SCHED_CAPACITY_SCALE;
		return cpu_capacity;
		}

		static inline
		unsigned long sum_capacity_reqs(unsigned long cfs_cap,
		struct sched_capacity_reqs *scr)
		{
		unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
		return total += scr->dl;
		}

		unsigned long boosted_cpu_util(int cpu);
		static void sched_freq_tick_pelt(int cpu)
		{
		unsigned long cpu_utilization = boosted_cpu_util(cpu);
		unsigned long capacity_curr = capacity_curr_of(cpu);
		struct sched_capacity_reqs *scr;

		scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
		if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
		return;

		/*
		* To make free room for a task that is building up its "real"
		* utilization and to harm its performance the least, request
		* a jump to a higher OPP as soon as the margin of free capacity
		* is impacted (specified by capacity_margin).
		* Remember CPU utilization in sched_capacity_reqs should be normalised.
		*/
		cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
		set_cfs_cpu_capacity(cpu, true, cpu_utilization);
		}

		#ifdef CONFIG_SCHED_WALT
		static void sched_freq_tick_walt(int cpu)
		{
		unsigned long cpu_utilization = cpu_util(cpu);
		unsigned long capacity_curr = capacity_curr_of(cpu);

		if (walt_disabled \|\| !sysctl_sched_use_walt_cpu_util)
		return sched_freq_tick_pelt(cpu);

		/*
		* Add a margin to the WALT utilization.
		* NOTE: WALT tracks a single CPU signal for all the scheduling
		* classes, thus this margin is going to be added to the DL class as
		* well, which is something we do not do in sched_freq_tick_pelt case.
		*/
		cpu_utilization = add_capacity_margin(cpu_utilization);
		if (cpu_utilization <= capacity_curr)
		return;

		/*
		* It is likely that the load is growing so we
		* keep the added margin in our request as an
		* extra boost.
		* Remember CPU utilization in sched_capacity_reqs should be normalised.
		*/
		cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
		set_cfs_cpu_capacity(cpu, true, cpu_utilization);

		}
		#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
		#else
		#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
		#endif /* CONFIG_SCHED_WALT */

		static void sched_freq_tick(int cpu)
		{
		unsigned long capacity_orig, capacity_curr;

		if (!sched_freq())
		return;

		capacity_orig = capacity_orig_of(cpu);
		capacity_curr = capacity_curr_of(cpu);
		if (capacity_curr == capacity_orig)
		return;

		_sched_freq_tick(cpu);
		}
		#else
		static inline void sched_freq_tick(int cpu) { }
		#endif /* CONFIG_CPU_FREQ_GOV_SCHED */

		/*
		* This function gets called by the timer code, with HZ frequency.
		* We call it with interrupts disabled.
		@@ -3254,7 +3162,6 @@ void scheduler_tick(void)
		walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
		walt_ktime_clock(), 0);
		calc_global_load_tick(rq);
		sched_freq_tick(cpu);
		raw_spin_unlock(&rq->lock);

		perf_event_task_tick();

kernel/sched/cpufreq_sched.c

deleted100644 → 0

+0 −499

Original line number	Diff line number	Diff line
		/*
		* Copyright (C) 2015 Michael Turquette <mturquette@linaro.org>
		*
		* This program is free software; you can redistribute it and/or modify
		* it under the terms of the GNU General Public License version 2 as
		* published by the Free Software Foundation.
		*/

		#include <linux/cpufreq.h>
		#include <linux/module.h>
		#include <linux/kthread.h>
		#include <linux/percpu.h>
		#include <linux/irq_work.h>
		#include <linux/delay.h>
		#include <linux/string.h>

		#define CREATE_TRACE_POINTS
		#include <trace/events/cpufreq_sched.h>

		#include "sched.h"

		#define THROTTLE_DOWN_NSEC 50000000 /* 50ms default */
		#define THROTTLE_UP_NSEC 500000 /* 500us default */

		struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
		static bool __read_mostly cpufreq_driver_slow;

		#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
		static struct cpufreq_governor cpufreq_gov_sched;
		#endif

		static DEFINE_PER_CPU(unsigned long, enabled);
		DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);

		struct gov_tunables {
		struct gov_attr_set attr_set;
		unsigned int up_throttle_nsec;
		unsigned int down_throttle_nsec;
		};

		/**
		* gov_data - per-policy data internal to the governor
		* @up_throttle: next throttling period expiry if increasing OPP
		* @down_throttle: next throttling period expiry if decreasing OPP
		* @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
		* @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
		* @task: worker thread for dvfs transition that may block/sleep
		* @irq_work: callback used to wake up worker thread
		* @requested_freq: last frequency requested by the sched governor
		*
		* struct gov_data is the per-policy cpufreq_sched-specific data structure. A
		* per-policy instance of it is created when the cpufreq_sched governor receives
		* the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
		* member of struct cpufreq_policy.
		*
		* Readers of this data must call down_read(policy->rwsem). Writers must
		* call down_write(policy->rwsem).
		*/
		struct gov_data {
		ktime_t up_throttle;
		ktime_t down_throttle;
		struct gov_tunables *tunables;
		struct list_head tunables_hook;
		struct task_struct *task;
		struct irq_work irq_work;
		unsigned int requested_freq;
		};

		static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
		unsigned int freq)
		{
		struct gov_data *gd = policy->governor_data;

		/* avoid race with cpufreq_sched_stop */
		if (!down_write_trylock(&policy->rwsem))
		return;

		__cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);

		gd->up_throttle = ktime_add_ns(ktime_get(),
		gd->tunables->up_throttle_nsec);
		gd->down_throttle = ktime_add_ns(ktime_get(),
		gd->tunables->down_throttle_nsec);
		up_write(&policy->rwsem);
		}

		static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
		{
		ktime_t now = ktime_get();

		ktime_t throttle = gd->requested_freq < cur_freq ?
		gd->down_throttle : gd->up_throttle;

		if (ktime_after(now, throttle))
		return false;

		while (1) {
		int usec_left = ktime_to_ns(ktime_sub(throttle, now));

		usec_left /= NSEC_PER_USEC;
		trace_cpufreq_sched_throttled(usec_left);
		usleep_range(usec_left, usec_left + 100);
		now = ktime_get();
		if (ktime_after(now, throttle))
		return true;
		}
		}

		/*
		* we pass in struct cpufreq_policy. This is safe because changing out the
		* policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
		* which tears down all of the data structures and __cpufreq_governor(policy,
		* CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
		* new policy pointer
		*/
		static int cpufreq_sched_thread(void *data)
		{
		struct sched_param param;
		struct cpufreq_policy *policy;
		struct gov_data *gd;
		unsigned int new_request = 0;
		unsigned int last_request = 0;
		int ret;

		policy = (struct cpufreq_policy *) data;
		gd = policy->governor_data;

		param.sched_priority = 50;
		ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
		if (ret) {
		pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
		do_exit(-EINVAL);
		} else {
		pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
		__func__, gd->task->pid);
		}

		do {
		new_request = gd->requested_freq;
		if (new_request == last_request) {
		set_current_state(TASK_INTERRUPTIBLE);
		if (kthread_should_stop())
		break;
		schedule();
		} else {
		/*
		* if the frequency thread sleeps while waiting to be
		* unthrottled, start over to check for a newer request
		*/
		if (finish_last_request(gd, policy->cur))
		continue;
		last_request = new_request;
		cpufreq_sched_try_driver_target(policy, new_request);
		}
		} while (!kthread_should_stop());

		return 0;
		}

		static void cpufreq_sched_irq_work(struct irq_work *irq_work)
		{
		struct gov_data *gd;

		gd = container_of(irq_work, struct gov_data, irq_work);
		if (!gd)
		return;

		wake_up_process(gd->task);
		}

		static void update_fdomain_capacity_request(int cpu)
		{
		unsigned int freq_new, index_new, cpu_tmp;
		struct cpufreq_policy *policy;
		struct gov_data *gd;
		unsigned long capacity = 0;

		/*
		* Avoid grabbing the policy if possible. A test is still
		* required after locking the CPU's policy to avoid racing
		* with the governor changing.
		*/
		if (!per_cpu(enabled, cpu))
		return;

		policy = cpufreq_cpu_get(cpu);
		if (IS_ERR_OR_NULL(policy))
		return;

		if (policy->governor != &cpufreq_gov_sched \|\|
		!policy->governor_data)
		goto out;

		gd = policy->governor_data;

		/* find max capacity requested by cpus in this policy */
		for_each_cpu(cpu_tmp, policy->cpus) {
		struct sched_capacity_reqs *scr;

		scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
		capacity = max(capacity, scr->total);
		}

		/* Convert the new maximum capacity request into a cpu frequency */
		freq_new = capacity * policy->cpuinfo.max_freq >> SCHED_CAPACITY_SHIFT;
		index_new = cpufreq_frequency_table_target(policy, freq_new, CPUFREQ_RELATION_L);
		freq_new = policy->freq_table[index_new].frequency;

		if (freq_new > policy->max)
		freq_new = policy->max;

		if (freq_new < policy->min)
		freq_new = policy->min;

		trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
		gd->requested_freq);
		if (freq_new == gd->requested_freq)
		goto out;

		gd->requested_freq = freq_new;

		/*
		* Throttling is not yet supported on platforms with fast cpufreq
		* drivers.
		*/
		if (cpufreq_driver_slow)
		irq_work_queue_on(&gd->irq_work, cpu);
		else
		cpufreq_sched_try_driver_target(policy, freq_new);

		out:
		cpufreq_cpu_put(policy);
		}

		void update_cpu_capacity_request(int cpu, bool request)
		{
		unsigned long new_capacity;
		struct sched_capacity_reqs *scr;

		/* The rq lock serializes access to the CPU's sched_capacity_reqs. */
		lockdep_assert_held(&cpu_rq(cpu)->lock);

		scr = &per_cpu(cpu_sched_capacity_reqs, cpu);

		new_capacity = scr->cfs + scr->rt;
		new_capacity = new_capacity * capacity_margin
		/ SCHED_CAPACITY_SCALE;
		new_capacity += scr->dl;

		if (new_capacity == scr->total)
		return;

		trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);

		scr->total = new_capacity;
		if (request)
		update_fdomain_capacity_request(cpu);
		}

		static inline void set_sched_freq(void)
		{
		static_key_slow_inc(&__sched_freq);
		}

		static inline void clear_sched_freq(void)
		{
		static_key_slow_dec(&__sched_freq);
		}

		/* Tunables */
		static struct gov_tunables *global_tunables;

		static inline struct gov_tunables to_tunables(struct gov_attr_set attr_set)
		{
		return container_of(attr_set, struct gov_tunables, attr_set);
		}

		static ssize_t up_throttle_nsec_show(struct gov_attr_set attr_set, char buf)
		{
		struct gov_tunables *tunables = to_tunables(attr_set);

		return sprintf(buf, "%u\n", tunables->up_throttle_nsec);
		}

		static ssize_t up_throttle_nsec_store(struct gov_attr_set *attr_set,
		const char *buf, size_t count)
		{
		struct gov_tunables *tunables = to_tunables(attr_set);
		int ret;
		long unsigned int val;

		ret = kstrtoul(buf, 0, &val);
		if (ret < 0)
		return ret;
		tunables->up_throttle_nsec = val;
		return count;
		}

		static ssize_t down_throttle_nsec_show(struct gov_attr_set attr_set, char buf)
		{
		struct gov_tunables *tunables = to_tunables(attr_set);

		return sprintf(buf, "%u\n", tunables->down_throttle_nsec);
		}

		static ssize_t down_throttle_nsec_store(struct gov_attr_set *attr_set,
		const char *buf, size_t count)
		{
		struct gov_tunables *tunables = to_tunables(attr_set);
		int ret;
		long unsigned int val;

		ret = kstrtoul(buf, 0, &val);
		if (ret < 0)
		return ret;
		tunables->down_throttle_nsec = val;
		return count;
		}

		static struct governor_attr up_throttle_nsec = __ATTR_RW(up_throttle_nsec);
		static struct governor_attr down_throttle_nsec = __ATTR_RW(down_throttle_nsec);

		static struct attribute *schedfreq_attributes[] = {
		&up_throttle_nsec.attr,
		&down_throttle_nsec.attr,
		NULL
		};

		static struct kobj_type tunables_ktype = {
		.default_attrs = schedfreq_attributes,
		.sysfs_ops = &governor_sysfs_ops,
		};

		static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
		{
		struct gov_data *gd;
		int cpu;
		int rc;

		for_each_cpu(cpu, policy->cpus)
		memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
		sizeof(struct sched_capacity_reqs));

		gd = kzalloc(sizeof(*gd), GFP_KERNEL);
		if (!gd)
		return -ENOMEM;

		policy->governor_data = gd;

		if (!global_tunables) {
		gd->tunables = kzalloc(sizeof(*gd->tunables), GFP_KERNEL);
		if (!gd->tunables)
		goto free_gd;

		gd->tunables->up_throttle_nsec =
		policy->cpuinfo.transition_latency ?
		policy->cpuinfo.transition_latency :
		THROTTLE_UP_NSEC;
		gd->tunables->down_throttle_nsec =
		THROTTLE_DOWN_NSEC;

		rc = kobject_init_and_add(&gd->tunables->attr_set.kobj,
		&tunables_ktype,
		get_governor_parent_kobj(policy),
		"%s", cpufreq_gov_sched.name);
		if (rc)
		goto free_tunables;

		gov_attr_set_init(&gd->tunables->attr_set,
		&gd->tunables_hook);

		pr_debug("%s: throttle_threshold = %u [ns]\n",
		__func__, gd->tunables->up_throttle_nsec);

		if (!have_governor_per_policy())
		global_tunables = gd->tunables;
		} else {
		gd->tunables = global_tunables;
		gov_attr_set_get(&global_tunables->attr_set,
		&gd->tunables_hook);
		}

		policy->governor_data = gd;
		if (cpufreq_driver_is_slow()) {
		cpufreq_driver_slow = true;
		gd->task = kthread_create(cpufreq_sched_thread, policy,
		"kschedfreq:%d",
		cpumask_first(policy->related_cpus));
		if (IS_ERR_OR_NULL(gd->task)) {
		pr_err("%s: failed to create kschedfreq thread\n",
		__func__);
		goto free_tunables;
		}
		get_task_struct(gd->task);
		kthread_bind_mask(gd->task, policy->related_cpus);
		wake_up_process(gd->task);
		init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
		}

		set_sched_freq();

		return 0;

		free_tunables:
		kfree(gd->tunables);
		free_gd:
		policy->governor_data = NULL;
		kfree(gd);
		return -ENOMEM;
		}

		static void cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
		{
		unsigned int count;
		struct gov_data *gd = policy->governor_data;

		clear_sched_freq();
		if (cpufreq_driver_slow) {
		kthread_stop(gd->task);
		put_task_struct(gd->task);
		}

		count = gov_attr_set_put(&gd->tunables->attr_set, &gd->tunables_hook);
		if (!count) {
		if (!have_governor_per_policy())
		global_tunables = NULL;
		kfree(gd->tunables);
		}

		policy->governor_data = NULL;

		kfree(gd);
		}

		static int cpufreq_sched_start(struct cpufreq_policy *policy)
		{
		int cpu;

		for_each_cpu(cpu, policy->cpus)
		per_cpu(enabled, cpu) = 1;

		return 0;
		}

		static void cpufreq_sched_limits(struct cpufreq_policy *policy)
		{
		unsigned int clamp_freq;
		struct gov_data *gd = policy->governor_data;;

		pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
		policy->cpu, policy->min, policy->max,
		policy->cur);

		clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);

		if (policy->cur != clamp_freq)
		__cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
		}

		static void cpufreq_sched_stop(struct cpufreq_policy *policy)
		{
		int cpu;

		for_each_cpu(cpu, policy->cpus)
		per_cpu(enabled, cpu) = 0;
		}


		#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
		static
		#endif
		struct cpufreq_governor cpufreq_gov_sched = {
		.name = "sched",
		.init = cpufreq_sched_policy_init,
		.exit = cpufreq_sched_policy_exit,
		.start = cpufreq_sched_start,
		.stop = cpufreq_sched_stop,
		.limits = cpufreq_sched_limits,
		.owner = THIS_MODULE,
		};

		static int __init cpufreq_sched_init(void)
		{
		int cpu;

		for_each_cpu(cpu, cpu_possible_mask)
		per_cpu(enabled, cpu) = 0;
		return cpufreq_register_governor(&cpufreq_gov_sched);
		}

		#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
		struct cpufreq_governor *cpufreq_default_governor(void)
		{
		return &cpufreq_gov_sched;
		}
		#endif

		/* Try to make this the default governor */
		fs_initcall(cpufreq_sched_init);