Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5a01f2e8 authored by Venkatesh Pallipadi's avatar Venkatesh Pallipadi Committed by Dave Jones
Browse files

[CPUFREQ] Rewrite lock in cpufreq to eliminate cpufreq/hotplug related issues



Yet another attempt to resolve cpufreq and hotplug locking issues.

Patchset has 3 patches:
* Rewrite the lock infrastructure of cpufreq using a per cpu rwsem.
* Minor restructuring of work callback in ondemand driver.
* Use the new cpufreq rwsem infrastructure in ondemand work.

This patch:

Convert policy->lock to rwsem and move it to per_cpu area.
This rwsem will protect against both changing/accessing policy
related parameters and CPU hot plug/unplug.

[malattia@linux.it: fix oops in kref_put()]
Cc: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: default avatarVenkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: default avatarMattia Dongili <malattia@linux.it>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarDave Jones <davej@redhat.com>
parent c1200697
Loading
Loading
Loading
Loading
+181 −63
Original line number Diff line number Diff line
@@ -41,8 +41,67 @@ static struct cpufreq_driver *cpufreq_driver;
static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
static DEFINE_SPINLOCK(cpufreq_driver_lock);

/*
 * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
 * all cpufreq/hotplug/workqueue/etc related lock issues.
 *
 * The rules for this semaphore:
 * - Any routine that wants to read from the policy structure will
 *   do a down_read on this semaphore.
 * - Any routine that will write to the policy structure and/or may take away
 *   the policy altogether (eg. CPU hotplug), will hold this lock in write
 *   mode before doing so.
 *
 * Additional rules:
 * - All holders of the lock should check to make sure that the CPU they
 *   are concerned with are online after they get the lock.
 * - Governor routines that can be called in cpufreq hotplug path should not
 *   take this sem as top level hotplug notifier handler takes this.
 */
static DEFINE_PER_CPU(int, policy_cpu);
static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);

#define lock_policy_rwsem(mode, cpu)					\
int lock_policy_rwsem_##mode						\
(int cpu)								\
{									\
	int policy_cpu = per_cpu(policy_cpu, cpu);			\
	BUG_ON(policy_cpu == -1);					\
	down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));		\
	if (unlikely(!cpu_online(cpu))) {				\
		up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));	\
		return -1;						\
	}								\
									\
	return 0;							\
}

lock_policy_rwsem(read, cpu);
EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);

lock_policy_rwsem(write, cpu);
EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);

void unlock_policy_rwsem_read(int cpu)
{
	int policy_cpu = per_cpu(policy_cpu, cpu);
	BUG_ON(policy_cpu == -1);
	up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
}
EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);

void unlock_policy_rwsem_write(int cpu)
{
	int policy_cpu = per_cpu(policy_cpu, cpu);
	BUG_ON(policy_cpu == -1);
	up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
}
EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);


/* internal prototypes */
static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
static unsigned int __cpufreq_get(unsigned int cpu);
static void handle_update(struct work_struct *work);

/**
@@ -415,10 +474,8 @@ static ssize_t store_##file_name \
	if (ret != 1)							\
		return -EINVAL;						\
									\
	mutex_lock(&policy->lock);					\
	ret = __cpufreq_set_policy(policy, &new_policy);		\
	policy->user_policy.object = policy->object;			\
	mutex_unlock(&policy->lock);					\
									\
	return ret ? ret : count;					\
}
@@ -432,7 +489,7 @@ store_one(scaling_max_freq,max);
static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
							char *buf)
{
	unsigned int cur_freq = cpufreq_get(policy->cpu);
	unsigned int cur_freq = __cpufreq_get(policy->cpu);
	if (!cur_freq)
		return sprintf(buf, "<unknown>");
	return sprintf(buf, "%u\n", cur_freq);
@@ -479,12 +536,10 @@ static ssize_t store_scaling_governor (struct cpufreq_policy * policy,

	/* Do not use cpufreq_set_policy here or the user_policy.max
	   will be wrongly overridden */
	mutex_lock(&policy->lock);
	ret = __cpufreq_set_policy(policy, &new_policy);

	policy->user_policy.policy = policy->policy;
	policy->user_policy.governor = policy->governor;
	mutex_unlock(&policy->lock);

	if (ret)
		return ret;
@@ -589,11 +644,17 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf)
	policy = cpufreq_cpu_get(policy->cpu);
	if (!policy)
		return -EINVAL;

	if (lock_policy_rwsem_read(policy->cpu) < 0)
		return -EINVAL;

	if (fattr->show)
		ret = fattr->show(policy, buf);
	else
		ret = -EIO;

	unlock_policy_rwsem_read(policy->cpu);

	cpufreq_cpu_put(policy);
	return ret;
}
@@ -607,11 +668,17 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr,
	policy = cpufreq_cpu_get(policy->cpu);
	if (!policy)
		return -EINVAL;

	if (lock_policy_rwsem_write(policy->cpu) < 0)
		return -EINVAL;

	if (fattr->store)
		ret = fattr->store(policy, buf, count);
	else
		ret = -EIO;

	unlock_policy_rwsem_write(policy->cpu);

	cpufreq_cpu_put(policy);
	return ret;
}
@@ -685,8 +752,10 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
	policy->cpu = cpu;
	policy->cpus = cpumask_of_cpu(cpu);

	mutex_init(&policy->lock);
	mutex_lock(&policy->lock);
	/* Initially set CPU itself as the policy_cpu */
	per_cpu(policy_cpu, cpu) = cpu;
	lock_policy_rwsem_write(cpu);

	init_completion(&policy->kobj_unregister);
	INIT_WORK(&policy->update, handle_update);

@@ -696,7 +765,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
	ret = cpufreq_driver->init(policy);
	if (ret) {
		dprintk("initialization failed\n");
		mutex_unlock(&policy->lock);
		unlock_policy_rwsem_write(cpu);
		goto err_out;
	}

@@ -710,6 +779,14 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
		 */
		managed_policy = cpufreq_cpu_get(j);
		if (unlikely(managed_policy)) {

			/* Set proper policy_cpu */
			unlock_policy_rwsem_write(cpu);
			per_cpu(policy_cpu, cpu) = managed_policy->cpu;

			if (lock_policy_rwsem_write(cpu) < 0)
				goto err_out_driver_exit;

			spin_lock_irqsave(&cpufreq_driver_lock, flags);
			managed_policy->cpus = policy->cpus;
			cpufreq_cpu_data[cpu] = managed_policy;
@@ -720,13 +797,13 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
						&managed_policy->kobj,
						"cpufreq");
			if (ret) {
				mutex_unlock(&policy->lock);
				unlock_policy_rwsem_write(cpu);
				goto err_out_driver_exit;
			}

			cpufreq_debug_enable_ratelimit();
			mutex_unlock(&policy->lock);
			ret = 0;
			unlock_policy_rwsem_write(cpu);
			goto err_out_driver_exit; /* call driver->exit() */
		}
	}
@@ -740,7 +817,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)

	ret = kobject_register(&policy->kobj);
	if (ret) {
		mutex_unlock(&policy->lock);
		unlock_policy_rwsem_write(cpu);
		goto err_out_driver_exit;
	}
	/* set up files for this cpu device */
@@ -755,8 +832,10 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
		sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);

	spin_lock_irqsave(&cpufreq_driver_lock, flags);
	for_each_cpu_mask(j, policy->cpus)
	for_each_cpu_mask(j, policy->cpus) {
		cpufreq_cpu_data[j] = policy;
		per_cpu(policy_cpu, j) = policy->cpu;
	}
	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);

	/* symlink affected CPUs */
@@ -772,14 +851,14 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
					"cpufreq");
		if (ret) {
			mutex_unlock(&policy->lock);
			unlock_policy_rwsem_write(cpu);
			goto err_out_unregister;
		}
	}

	policy->governor = NULL; /* to assure that the starting sequence is
				  * run in cpufreq_set_policy */
	mutex_unlock(&policy->lock);
	unlock_policy_rwsem_write(cpu);

	/* set default policy */
	ret = cpufreq_set_policy(&new_policy);
@@ -820,11 +899,13 @@ module_out:


/**
 * cpufreq_remove_dev - remove a CPU device
 * __cpufreq_remove_dev - remove a CPU device
 *
 * Removes the cpufreq interface for a CPU device.
 * Caller should already have policy_rwsem in write mode for this CPU.
 * This routine frees the rwsem before returning.
 */
static int cpufreq_remove_dev (struct sys_device * sys_dev)
static int __cpufreq_remove_dev (struct sys_device * sys_dev)
{
	unsigned int cpu = sys_dev->id;
	unsigned long flags;
@@ -843,6 +924,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
	if (!data) {
		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
		cpufreq_debug_enable_ratelimit();
		unlock_policy_rwsem_write(cpu);
		return -EINVAL;
	}
	cpufreq_cpu_data[cpu] = NULL;
@@ -859,6 +941,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
		sysfs_remove_link(&sys_dev->kobj, "cpufreq");
		cpufreq_cpu_put(data);
		cpufreq_debug_enable_ratelimit();
		unlock_policy_rwsem_write(cpu);
		return 0;
	}
#endif
@@ -867,6 +950,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
	if (!kobject_get(&data->kobj)) {
		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
		cpufreq_debug_enable_ratelimit();
		unlock_policy_rwsem_write(cpu);
		return -EFAULT;
	}

@@ -900,10 +984,10 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
#endif

	mutex_lock(&data->lock);
	if (cpufreq_driver->target)
		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
	mutex_unlock(&data->lock);

	unlock_policy_rwsem_write(cpu);

	kobject_unregister(&data->kobj);

@@ -927,6 +1011,18 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
}


static int cpufreq_remove_dev (struct sys_device * sys_dev)
{
	unsigned int cpu = sys_dev->id;
	int retval;
	if (unlikely(lock_policy_rwsem_write(cpu)))
		BUG();

	retval = __cpufreq_remove_dev(sys_dev);
	return retval;
}


static void handle_update(struct work_struct *work)
{
	struct cpufreq_policy *policy =
@@ -974,9 +1070,12 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
	unsigned int ret_freq = 0;

	if (policy) {
		mutex_lock(&policy->lock);
		if (unlikely(lock_policy_rwsem_read(cpu)))
			return ret_freq;

		ret_freq = policy->cur;
		mutex_unlock(&policy->lock);

		unlock_policy_rwsem_read(cpu);
		cpufreq_cpu_put(policy);
	}

@@ -985,24 +1084,13 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
EXPORT_SYMBOL(cpufreq_quick_get);


/**
 * cpufreq_get - get the current CPU frequency (in kHz)
 * @cpu: CPU number
 *
 * Get the CPU current (static) CPU frequency
 */
unsigned int cpufreq_get(unsigned int cpu)
static unsigned int __cpufreq_get(unsigned int cpu)
{
	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
	struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
	unsigned int ret_freq = 0;

	if (!policy)
		return 0;

	if (!cpufreq_driver->get)
		goto out;

	mutex_lock(&policy->lock);
		return (ret_freq);

	ret_freq = cpufreq_driver->get(cpu);

@@ -1016,11 +1104,33 @@ unsigned int cpufreq_get(unsigned int cpu)
		}
	}

	mutex_unlock(&policy->lock);
	return (ret_freq);
}

out:
	cpufreq_cpu_put(policy);
/**
 * cpufreq_get - get the current CPU frequency (in kHz)
 * @cpu: CPU number
 *
 * Get the CPU current (static) CPU frequency
 */
unsigned int cpufreq_get(unsigned int cpu)
{
	unsigned int ret_freq = 0;
	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);

	if (!policy)
		goto out;

	if (unlikely(lock_policy_rwsem_read(cpu)))
		goto out_policy;

	ret_freq = __cpufreq_get(cpu);

	unlock_policy_rwsem_read(cpu);

out_policy:
	cpufreq_cpu_put(policy);
out:
	return (ret_freq);
}
EXPORT_SYMBOL(cpufreq_get);
@@ -1297,18 +1407,19 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
	if (!policy)
		return -EINVAL;

	mutex_lock(&policy->lock);
	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
		return -EINVAL;

	ret = __cpufreq_driver_target(policy, target_freq, relation);

	mutex_unlock(&policy->lock);
	unlock_policy_rwsem_write(policy->cpu);

	cpufreq_cpu_put(policy);
	return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_driver_target);

int cpufreq_driver_getavg(struct cpufreq_policy *policy)
int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
{
	int ret = 0;

@@ -1316,17 +1427,13 @@ int cpufreq_driver_getavg(struct cpufreq_policy *policy)
	if (!policy)
		return -EINVAL;

	mutex_lock(&policy->lock);

	if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
		ret = cpufreq_driver->getavg(policy->cpu);

	mutex_unlock(&policy->lock);

	cpufreq_cpu_put(policy);
	return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_driver_getavg);
EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);

/*
 * when "event" is CPUFREQ_GOV_LIMITS
@@ -1410,9 +1517,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
	if (!cpu_policy)
		return -EINVAL;

	mutex_lock(&cpu_policy->lock);
	memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
	mutex_unlock(&cpu_policy->lock);

	cpufreq_cpu_put(cpu_policy);
	return 0;
@@ -1528,8 +1633,9 @@ int cpufreq_set_policy(struct cpufreq_policy *policy)
	if (!data)
		return -EINVAL;

	/* lock this CPU */
	mutex_lock(&data->lock);
	if (unlikely(lock_policy_rwsem_write(policy->cpu)))
		return -EINVAL;


	ret = __cpufreq_set_policy(data, policy);
	data->user_policy.min = data->min;
@@ -1537,7 +1643,7 @@ int cpufreq_set_policy(struct cpufreq_policy *policy)
	data->user_policy.policy = data->policy;
	data->user_policy.governor = data->governor;

	mutex_unlock(&data->lock);
	unlock_policy_rwsem_write(policy->cpu);

	cpufreq_cpu_put(data);

@@ -1562,7 +1668,8 @@ int cpufreq_update_policy(unsigned int cpu)
	if (!data)
		return -ENODEV;

	mutex_lock(&data->lock);
	if (unlikely(lock_policy_rwsem_write(cpu)))
		return -EINVAL;

	dprintk("updating policy for CPU %u\n", cpu);
	memcpy(&policy, data, sizeof(struct cpufreq_policy));
@@ -1587,7 +1694,8 @@ int cpufreq_update_policy(unsigned int cpu)

	ret = __cpufreq_set_policy(data, &policy);

	mutex_unlock(&data->lock);
	unlock_policy_rwsem_write(cpu);

	cpufreq_cpu_put(data);
	return ret;
}
@@ -1597,31 +1705,28 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
					unsigned long action, void *hcpu)
{
	unsigned int cpu = (unsigned long)hcpu;
	struct cpufreq_policy *policy;
	struct sys_device *sys_dev;
	struct cpufreq_policy *policy;

	sys_dev = get_cpu_sysdev(cpu);

	if (sys_dev) {
		switch (action) {
		case CPU_ONLINE:
			cpufreq_add_dev(sys_dev);
			break;
		case CPU_DOWN_PREPARE:
			/*
			 * We attempt to put this cpu in lowest frequency
			 * possible before going down. This will permit
			 * hardware-managed P-State to switch other related
			 * threads to min or higher speeds if possible.
			 */
			if (unlikely(lock_policy_rwsem_write(cpu)))
				BUG();

			policy = cpufreq_cpu_data[cpu];
			if (policy) {
				cpufreq_driver_target(policy, policy->min,
				__cpufreq_driver_target(policy, policy->min,
						CPUFREQ_RELATION_H);
			}
			__cpufreq_remove_dev(sys_dev);
			break;
		case CPU_DEAD:
			cpufreq_remove_dev(sys_dev);
		case CPU_DOWN_FAILED:
			cpufreq_add_dev(sys_dev);
			break;
		}
	}
@@ -1735,3 +1840,16 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
	return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);

static int __init cpufreq_core_init(void)
{
	int cpu;

	for_each_possible_cpu(cpu) {
		per_cpu(policy_cpu, cpu) = -1;
		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
	}
	return 0;
}

core_initcall(cpufreq_core_init);
+6 −4
Original line number Diff line number Diff line
@@ -84,9 +84,6 @@ struct cpufreq_policy {
        unsigned int		policy; /* see above */
	struct cpufreq_governor	*governor; /* see below */

 	struct mutex		lock;   /* CPU ->setpolicy or ->target may
					   only be called once a time */

	struct work_struct	update; /* if update_policy() needs to be
					 * called, but you're in IRQ context */

@@ -172,11 +169,16 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
				   unsigned int relation);


extern int cpufreq_driver_getavg(struct cpufreq_policy *policy);
extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy);

int cpufreq_register_governor(struct cpufreq_governor *governor);
void cpufreq_unregister_governor(struct cpufreq_governor *governor);

int lock_policy_rwsem_read(int cpu);
int lock_policy_rwsem_write(int cpu);
void unlock_policy_rwsem_read(int cpu);
void unlock_policy_rwsem_write(int cpu);


/*********************************************************************
 *                      CPUFREQ DRIVER INTERFACE                     *