Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 34e2c555 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki
Browse files

cpufreq: Add mechanism for registering utilization update callbacks



Introduce a mechanism by which parts of the cpufreq subsystem
("setpolicy" drivers or the core) can register callbacks to be
executed from cpufreq_update_util() which is invoked by the
scheduler's update_load_avg() on CPU utilization changes.

This allows the "setpolicy" drivers to dispense with their timers
and do all of the computations they need and frequency/voltage
adjustments in the update_load_avg() code path, among other things.

The update_load_avg() changes were suggested by Peter Zijlstra.

Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: default avatarViresh Kumar <viresh.kumar@linaro.org>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarIngo Molnar <mingo@kernel.org>
parent de1df26b
Loading
Loading
Loading
Loading
+45 −0
Original line number Original line Diff line number Diff line
@@ -102,6 +102,51 @@ static LIST_HEAD(cpufreq_governor_list);
static struct cpufreq_driver *cpufreq_driver;
static struct cpufreq_driver *cpufreq_driver;
static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
static DEFINE_RWLOCK(cpufreq_driver_lock);
static DEFINE_RWLOCK(cpufreq_driver_lock);

static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);

/**
 * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
 * @cpu: The CPU to set the pointer for.
 * @data: New pointer value.
 *
 * Set and publish the update_util_data pointer for the given CPU.  That pointer
 * points to a struct update_util_data object containing a callback function
 * to call from cpufreq_update_util().  That function will be called from an RCU
 * read-side critical section, so it must not sleep.
 *
 * Callers must use RCU callbacks to free any memory that might be accessed
 * via the old update_util_data pointer or invoke synchronize_rcu() right after
 * this function to avoid use-after-free.
 */
void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
{
	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
}
EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);

/**
 * cpufreq_update_util - Take a note about CPU utilization changes.
 * @time: Current time.
 * @util: Current utilization.
 * @max: Utilization ceiling.
 *
 * This function is called by the scheduler on every invocation of
 * update_load_avg() on the CPU whose utilization is being updated.
 */
void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
{
	struct update_util_data *data;

	rcu_read_lock();

	data = rcu_dereference(*this_cpu_ptr(&cpufreq_update_util_data));
	if (data && data->func)
		data->func(data, time, util, max);

	rcu_read_unlock();
}

DEFINE_MUTEX(cpufreq_governor_lock);
DEFINE_MUTEX(cpufreq_governor_lock);


/* Flag to suspend/resume CPUFreq governors */
/* Flag to suspend/resume CPUFreq governors */
+34 −0
Original line number Original line Diff line number Diff line
@@ -151,6 +151,36 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy)
extern struct kobject *cpufreq_global_kobject;
extern struct kobject *cpufreq_global_kobject;


#ifdef CONFIG_CPU_FREQ
#ifdef CONFIG_CPU_FREQ
void cpufreq_update_util(u64 time, unsigned long util, unsigned long max);

/**
 * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
 * @time: Current time.
 *
 * The way cpufreq is currently arranged requires it to evaluate the CPU
 * performance state (frequency/voltage) on a regular basis to prevent it from
 * being stuck in a completely inadequate performance level for too long.
 * That is not guaranteed to happen if the updates are only triggered from CFS,
 * though, because they may not be coming in if RT or deadline tasks are active
 * all the time (or there are RT and DL tasks only).
 *
 * As a workaround for that issue, this function is called by the RT and DL
 * sched classes to trigger extra cpufreq updates to prevent it from stalling,
 * but that really is a band-aid.  Going forward it should be replaced with
 * solutions targeted more specifically at RT and DL tasks.
 */
static inline void cpufreq_trigger_update(u64 time)
{
	cpufreq_update_util(time, ULONG_MAX, 0);
}

struct update_util_data {
	void (*func)(struct update_util_data *data,
		     u64 time, unsigned long util, unsigned long max);
};

void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);

unsigned int cpufreq_get(unsigned int cpu);
unsigned int cpufreq_get(unsigned int cpu);
unsigned int cpufreq_quick_get(unsigned int cpu);
unsigned int cpufreq_quick_get(unsigned int cpu);
unsigned int cpufreq_quick_get_max(unsigned int cpu);
unsigned int cpufreq_quick_get_max(unsigned int cpu);
@@ -162,6 +192,10 @@ int cpufreq_update_policy(unsigned int cpu);
bool have_governor_per_policy(void);
bool have_governor_per_policy(void);
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
#else
#else
static inline void cpufreq_update_util(u64 time, unsigned long util,
				       unsigned long max) {}
static inline void cpufreq_trigger_update(u64 time) {}

static inline unsigned int cpufreq_get(unsigned int cpu)
static inline unsigned int cpufreq_get(unsigned int cpu)
{
{
	return 0;
	return 0;
+4 −0
Original line number Original line Diff line number Diff line
@@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq)
	if (!dl_task(curr) || !on_dl_rq(dl_se))
	if (!dl_task(curr) || !on_dl_rq(dl_se))
		return;
		return;


	/* Kick cpufreq (see the comment in linux/cpufreq.h). */
	if (cpu_of(rq) == smp_processor_id())
		cpufreq_trigger_update(rq_clock(rq));

	/*
	/*
	 * Consumed budget is computed considering the time as
	 * Consumed budget is computed considering the time as
	 * observed by schedulable tasks (excluding time spent
	 * observed by schedulable tasks (excluding time spent
+25 −1
Original line number Original line Diff line number Diff line
@@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
{
{
	struct cfs_rq *cfs_rq = cfs_rq_of(se);
	struct cfs_rq *cfs_rq = cfs_rq_of(se);
	u64 now = cfs_rq_clock_task(cfs_rq);
	u64 now = cfs_rq_clock_task(cfs_rq);
	int cpu = cpu_of(rq_of(cfs_rq));
	struct rq *rq = rq_of(cfs_rq);
	int cpu = cpu_of(rq);


	/*
	/*
	 * Track task load average for carrying it to new CPU after migrated, and
	 * Track task load average for carrying it to new CPU after migrated, and
@@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)


	if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
	if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
		update_tg_load_avg(cfs_rq, 0);
		update_tg_load_avg(cfs_rq, 0);

	if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
		unsigned long max = rq->cpu_capacity_orig;

		/*
		 * There are a few boundary cases this might miss but it should
		 * get called often enough that that should (hopefully) not be
		 * a real problem -- added to that it only calls on the local
		 * CPU, so if we enqueue remotely we'll miss an update, but
		 * the next tick/schedule should update.
		 *
		 * It will not get called when we go idle, because the idle
		 * thread is a different class (!fair), nor will the utilization
		 * number include things like RT tasks.
		 *
		 * As is, the util number is not freq-invariant (we'd have to
		 * implement arch_scale_freq_capacity() for that).
		 *
		 * See cpu_util().
		 */
		cpufreq_update_util(rq_clock(rq),
				    min(cfs_rq->avg.util_avg, max), max);
	}
}
}


static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
+4 −0
Original line number Original line Diff line number Diff line
@@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq)
	if (curr->sched_class != &rt_sched_class)
	if (curr->sched_class != &rt_sched_class)
		return;
		return;


	/* Kick cpufreq (see the comment in linux/cpufreq.h). */
	if (cpu_of(rq) == smp_processor_id())
		cpufreq_trigger_update(rq_clock(rq));

	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
	if (unlikely((s64)delta_exec <= 0))
	if (unlikely((s64)delta_exec <= 0))
		return;
		return;
Loading