Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c54df071 authored by Viresh Kumar's avatar Viresh Kumar Committed by Rafael J. Wysocki
Browse files

cpufreq: governor: Create and traverse list of policy_dbs to avoid deadlock



The dbs_data_mutex lock is currently used in two places.  First,
cpufreq_governor_dbs() uses it to guarantee mutual exclusion between
invocations of governor operations from the core.  Second, it is used by
ondemand governor's update_sampling_rate() to ensure the stability of
data structures walked by it.

The second usage is quite problematic, because update_sampling_rate() is
called from a governor sysfs attribute's ->store callback and that leads
to a deadlock scenario involving cpufreq_governor_exit() which runs
under dbs_data_mutex.  Thus it is better to rework the code so
update_sampling_rate() doesn't need to acquire dbs_data_mutex.

To that end, rework update_sampling_rate() to walk a list of policy_dbs
objects supported by the dbs_data one it has been called for (instead of
walking cpu_dbs_info object for all CPUs).  The list manipulation is
protected with dbs_data->mutex which also is held around the execution
of update_sampling_rate(), it is not necessary to hold dbs_data_mutex in
that function any more.

Reported-by: default avatarJuri Lelli <juri.lelli@arm.com>
Reported-by: default avatarShilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Signed-off-by: default avatarViresh Kumar <viresh.kumar@linaro.org>
[ rjw: Subject & changelog ]
Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
parent 68e80dae
Loading
Loading
Loading
Loading
+18 −4
Original line number Diff line number Diff line
@@ -385,9 +385,14 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
			ret = -EINVAL;
			goto free_policy_dbs_info;
		}
		dbs_data->usage_count++;
		policy_dbs->dbs_data = dbs_data;
		policy->governor_data = policy_dbs;

		mutex_lock(&dbs_data->mutex);
		dbs_data->usage_count++;
		list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
		mutex_unlock(&dbs_data->mutex);

		return 0;
	}

@@ -397,7 +402,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
		goto free_policy_dbs_info;
	}

	dbs_data->usage_count = 1;
	INIT_LIST_HEAD(&dbs_data->policy_dbs_list);
	mutex_init(&dbs_data->mutex);

	ret = gov->init(dbs_data, !policy->governor->initialized);
@@ -418,9 +423,12 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
	if (!have_governor_per_policy())
		gov->gdbs_data = dbs_data;

	policy_dbs->dbs_data = dbs_data;
	policy->governor_data = policy_dbs;

	policy_dbs->dbs_data = dbs_data;
	dbs_data->usage_count = 1;
	list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);

	gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
	ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type,
				   get_governor_parent_kobj(policy),
@@ -448,12 +456,18 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy)
	struct dbs_governor *gov = dbs_governor_of(policy);
	struct policy_dbs_info *policy_dbs = policy->governor_data;
	struct dbs_data *dbs_data = policy_dbs->dbs_data;
	int count;

	/* State should be equivalent to INIT */
	if (policy_dbs->policy)
		return -EBUSY;

	if (!--dbs_data->usage_count) {
	mutex_lock(&dbs_data->mutex);
	list_del(&policy_dbs->list);
	count = --dbs_data->usage_count;
	mutex_unlock(&dbs_data->mutex);

	if (!count) {
		kobject_put(&dbs_data->kobj);

		policy->governor_data = NULL;
+6 −1
Original line number Diff line number Diff line
@@ -73,7 +73,11 @@ struct dbs_data {
	unsigned int up_threshold;

	struct kobject kobj;
	/* Protect concurrent updates to governor tunables from sysfs */
	struct list_head policy_dbs_list;
	/*
	 * Protect concurrent updates to governor tunables from sysfs,
	 * policy_dbs_list and usage_count.
	 */
	struct mutex mutex;
};

@@ -125,6 +129,7 @@ struct policy_dbs_info {
	struct work_struct work;
	/* dbs_data may be shared between multiple policy objects */
	struct dbs_data *dbs_data;
	struct list_head list;
};

static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs,
+30 −59
Original line number Diff line number Diff line
@@ -226,86 +226,57 @@ static struct dbs_governor od_dbs_gov;
 * @new_rate: new sampling rate
 *
 * If new rate is smaller than the old, simply updating
 * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the
 * dbs.sampling_rate might not be appropriate. For example, if the
 * original sampling_rate was 1 second and the requested new sampling rate is 10
 * ms because the user needs immediate reaction from ondemand governor, but not
 * sure if higher frequency will be required or not, then, the governor may
 * change the sampling rate too late; up to 1 second later. Thus, if we are
 * reducing the sampling rate, we need to make the new value effective
 * immediately.
 *
 * On the other hand, if new rate is larger than the old, then we may evaluate
 * the load too soon, and it might we worth updating sample_delay_ns then as
 * well.
 *
 * This must be called with dbs_data->mutex held, otherwise traversing
 * policy_dbs_list isn't safe.
 */
static void update_sampling_rate(struct dbs_data *dbs_data,
		unsigned int new_rate)
{
	struct cpumask cpumask;
	int cpu;
	struct policy_dbs_info *policy_dbs;

	dbs_data->sampling_rate = new_rate = max(new_rate,
			dbs_data->min_sampling_rate);

	/*
	 * Lock governor so that governor start/stop can't execute in parallel.
	 */
	mutex_lock(&dbs_data_mutex);

	cpumask_copy(&cpumask, cpu_online_mask);

	for_each_cpu(cpu, &cpumask) {
		struct cpufreq_policy *policy;
		struct od_cpu_dbs_info_s *dbs_info;
		struct cpu_dbs_info *cdbs;
		struct policy_dbs_info *policy_dbs;

		dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
		cdbs = &dbs_info->cdbs;
		policy_dbs = cdbs->policy_dbs;

		/*
		 * A valid policy_dbs and policy_dbs->policy means governor
		 * hasn't stopped or exited yet.
	 * We are operating under dbs_data->mutex and so the list and its
	 * entries can't be freed concurrently.
	 */
		if (!policy_dbs || !policy_dbs->policy)
			continue;

		policy = policy_dbs->policy;

		/* clear all CPUs of this policy */
		cpumask_andnot(&cpumask, &cpumask, policy->cpus);

		/*
		 * Update sampling rate for CPUs whose policy is governed by
		 * dbs_data. In case of governor_per_policy, only a single
		 * policy will be governed by dbs_data, otherwise there can be
		 * multiple policies that are governed by the same dbs_data.
		 */
		if (dbs_data == policy_dbs->dbs_data) {
	list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
		mutex_lock(&policy_dbs->timer_mutex);
		/*
		 * On 32-bit architectures this may race with the
			 * sample_delay_ns read in dbs_update_util_handler(),
			 * but that really doesn't matter.  If the read returns
			 * a value that's too big, the sample will be skipped,
			 * but the next invocation of dbs_update_util_handler()
			 * (when the update has been completed) will take a
			 * sample.  If the returned value is too small, the
			 * sample will be taken immediately, but that isn't a
			 * problem, as we want the new rate to take effect
			 * immediately anyway.
		 * sample_delay_ns read in dbs_update_util_handler(), but that
		 * really doesn't matter.  If the read returns a value that's
		 * too big, the sample will be skipped, but the next invocation
		 * of dbs_update_util_handler() (when the update has been
		 * completed) will take a sample.  If the returned value is too
		 * small, the sample will be taken immediately, but that isn't a
		 * problem, as we want the new rate to take effect immediately
		 * anyway.
		 *
			 * If this runs in parallel with dbs_work_handler(), we
			 * may end up overwriting the sample_delay_ns value that
			 * it has just written, but the difference should not be
			 * too big and it will be corrected next time a sample
			 * is taken, so it shouldn't be significant.
		 * If this runs in parallel with dbs_work_handler(), we may end
		 * up overwriting the sample_delay_ns value that it has just
		 * written, but the difference should not be too big and it will
		 * be corrected next time a sample is taken, so it shouldn't be
		 * significant.
		 */
		gov_update_sample_delay(policy_dbs, new_rate);
		mutex_unlock(&policy_dbs->timer_mutex);
	}
}

	mutex_unlock(&dbs_data_mutex);
}

static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
		size_t count)
{