Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eadba2a3 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman Committed by Alistair Delva
Browse files

Revert "FROMLIST: thermal: cpu_cooling: Migrate to using the EM framework"



This reverts commit f3e98119.

We want to back-out the eas-dev merge that happened in the tree after
5.3-rc1 as those patches "should" all be in Linus's tree now.

This is done to handle the merge conflicts with 5.4-rc1.

Cc: Todd Kjos <tkjos@google.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@google.com>
Change-Id: I5afe7d731264b20090a3c8a2237ba5c45ba01716
parent a02c37bc
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -144,7 +144,6 @@ config THERMAL_GOV_USER_SPACE

config THERMAL_GOV_POWER_ALLOCATOR
	bool "Power allocator thermal governor"
	depends on ENERGY_MODEL
	help
	  Enable this to manage platform thermals by dynamically
	  allocating and limiting power to devices.
+160 −90
Original line number Diff line number Diff line
@@ -19,7 +19,6 @@
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/cpu_cooling.h>
#include <linux/energy_model.h>

#include <trace/events/thermal.h>

@@ -37,6 +36,21 @@
 *	...
 */

/**
 * struct freq_table - frequency table along with power entries
 * @frequency:	frequency in KHz
 * @power:	power in mW
 *
 * This structure is built when the cooling device registers and helps
 * in translating frequency to power and vice versa.
 */
struct freq_table {
	u32 frequency;
#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
	u32 power;
#endif
};

/**
 * struct time_in_idle - Idle time stats
 * @time: previous reading of the absolute time that this cpu was idle
@@ -58,7 +72,7 @@ struct time_in_idle {
 *	frequency.
 * @max_level: maximum cooling level. One less than total number of valid
 *	cpufreq frequencies.
 * @em: Reference on the Energy Model of the device
 * @freq_table: Freq table in descending order of frequencies
 * @cdev: thermal_cooling_device pointer to keep track of the
 *	registered cooling device.
 * @policy: cpufreq policy.
@@ -74,7 +88,7 @@ struct cpufreq_cooling_device {
	unsigned int cpufreq_state;
	unsigned int clipped_freq;
	unsigned int max_level;
	struct em_perf_domain *em;
	struct freq_table *freq_table;	/* In descending order */
	struct cpufreq_policy *policy;
	struct list_head node;
	struct time_in_idle *idle_time;
@@ -148,40 +162,114 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
			       unsigned int freq)
{
	int i;
	struct freq_table *freq_table = cpufreq_cdev->freq_table;
	unsigned long level;

	for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
		if (freq > cpufreq_cdev->em->table[i].frequency)
	for (level = 1; level <= cpufreq_cdev->max_level; level++)
		if (freq > freq_table[level].frequency)
			break;

	return level - 1;
}

/**
 * update_freq_table() - Update the freq table with power numbers
 * @cpufreq_cdev:	the cpufreq cooling device in which to update the table
 * @capacitance: dynamic power coefficient for these cpus
 *
 * Update the freq table with power numbers.  This table will be used in
 * cpu_power_to_freq() and cpu_freq_to_power() to convert between power and
 * frequency efficiently.  Power is stored in mW, frequency in KHz.  The
 * resulting table is in descending order.
 *
 * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
 * or -ENOMEM if we run out of memory.
 */
static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev,
			     u32 capacitance)
{
	struct freq_table *freq_table = cpufreq_cdev->freq_table;
	struct dev_pm_opp *opp;
	struct device *dev = NULL;
	int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i;

	dev = get_cpu_device(cpu);
	if (unlikely(!dev)) {
		pr_warn("No cpu device for cpu %d\n", cpu);
		return -ENODEV;
	}

	num_opps = dev_pm_opp_get_opp_count(dev);
	if (num_opps < 0)
		return num_opps;

	/*
	 * The cpufreq table is also built from the OPP table and so the count
	 * should match.
	 */
	if (num_opps != cpufreq_cdev->max_level + 1) {
		dev_warn(dev, "Number of OPPs not matching with max_levels\n");
		return -EINVAL;
	}

	return cpufreq_cdev->max_level - i - 1;
	for (i = 0; i <= cpufreq_cdev->max_level; i++) {
		unsigned long freq = freq_table[i].frequency * 1000;
		u32 freq_mhz = freq_table[i].frequency / 1000;
		u64 power;
		u32 voltage_mv;

		/*
		 * Find ceil frequency as 'freq' may be slightly lower than OPP
		 * freq due to truncation while converting to kHz.
		 */
		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
		if (IS_ERR(opp)) {
			dev_err(dev, "failed to get opp for %lu frequency\n",
				freq);
			return -EINVAL;
		}

		voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
		dev_pm_opp_put(opp);

		/*
		 * Do the multiplication with MHz and millivolt so as
		 * to not overflow.
		 */
		power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
		do_div(power, 1000000000);

		/* power is stored in mW */
		freq_table[i].power = power;
	}

	return 0;
}

static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
			     u32 freq)
{
	int i;
	struct freq_table *freq_table = cpufreq_cdev->freq_table;

	for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
		if (freq > cpufreq_cdev->em->table[i].frequency)
	for (i = 1; i <= cpufreq_cdev->max_level; i++)
		if (freq > freq_table[i].frequency)
			break;
	}

	return cpufreq_cdev->em->table[i + 1].power;
	return freq_table[i - 1].power;
}

static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
			     u32 power)
{
	int i;
	struct freq_table *freq_table = cpufreq_cdev->freq_table;

	for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
		if (power > cpufreq_cdev->em->table[i].power)
	for (i = 1; i <= cpufreq_cdev->max_level; i++)
		if (power > freq_table[i].power)
			break;
	}

	return cpufreq_cdev->em->table[i + 1].frequency;
	return freq_table[i - 1].frequency;
}

/**
@@ -322,7 +410,7 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
			       struct thermal_zone_device *tz,
			       unsigned long state, u32 *power)
{
	unsigned int freq, num_cpus, idx;
	unsigned int freq, num_cpus;
	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;

	/* Request state should be less than max_level */
@@ -331,8 +419,7 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,

	num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);

	idx = cpufreq_cdev->max_level - state;
	freq = cpufreq_cdev->em->table[idx].frequency;
	freq = cpufreq_cdev->freq_table[state].frequency;
	*power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;

	return 0;
@@ -376,60 +463,8 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
				      power);
	return 0;
}

static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
			      struct em_perf_domain *em) {
	struct cpufreq_policy *policy;
	unsigned int nr_levels;

	if (!em)
		return false;

	policy = cpufreq_cdev->policy;
	if (!cpumask_equal(policy->related_cpus, to_cpumask(em->cpus))) {
		pr_err("The span of pd %*pbl is misaligned with cpufreq policy %*pbl\n",
			cpumask_pr_args(to_cpumask(em->cpus)),
			cpumask_pr_args(policy->related_cpus));
		return false;
	}

	nr_levels = cpufreq_cdev->max_level + 1;
	if (em->nr_cap_states != nr_levels) {
		pr_err("The number of cap states in pd %*pbl (%u) doesn't match the number of cooling levels (%u)\n",
			cpumask_pr_args(to_cpumask(em->cpus)),
			em->nr_cap_states, nr_levels);
		return false;
	}

	return true;
}
#endif /* CONFIG_THERMAL_GOV_POWER_ALLOCATOR */

static unsigned int get_state_freq(struct cpufreq_cooling_device *cpufreq_cdev,
                             unsigned long state)
{
       struct cpufreq_policy *policy;
       unsigned long idx;

#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
       /* Use the Energy Model table if available */
       if (cpufreq_cdev->em) {
               idx = cpufreq_cdev->max_level - state;
               return cpufreq_cdev->em->table[idx].frequency;
       }
#endif

       /* Otherwise, fallback on the CPUFreq table */
       policy = cpufreq_cdev->policy;
       if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
               idx = cpufreq_cdev->max_level - state;
       else
               idx = state;

       return policy->freq_table[idx].frequency;
}


/* cpufreq cooling device callback functions are defined below */

/**
@@ -495,7 +530,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
	if (cpufreq_cdev->cpufreq_state == state)
		return 0;

	clip_freq = get_state_freq(cpufreq_cdev, state);
	clip_freq = cpufreq_cdev->freq_table[state].frequency;
	cpufreq_cdev->cpufreq_state = state;
	cpufreq_cdev->clipped_freq = clip_freq;

@@ -517,12 +552,26 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
	.notifier_call = cpufreq_thermal_notifier,
};

static unsigned int find_next_max(struct cpufreq_frequency_table *table,
				  unsigned int prev_max)
{
	struct cpufreq_frequency_table *pos;
	unsigned int max = 0;

	cpufreq_for_each_valid_entry(pos, table) {
		if (pos->frequency > max && pos->frequency < prev_max)
			max = pos->frequency;
	}

	return max;
}

/**
 * __cpufreq_cooling_register - helper function to create cpufreq cooling device
 * @np: a valid struct device_node to the cooling device device tree node
 * @policy: cpufreq policy
 * Normally this should be same as cpufreq policy->related_cpus.
 * @em: Energy Model of the cpufreq policy
 * @capacitance: dynamic power coefficient for these cpus
 *
 * This interface function registers the cpufreq cooling device with the name
 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -534,13 +583,12 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
 */
static struct thermal_cooling_device *
__cpufreq_cooling_register(struct device_node *np,
			struct cpufreq_policy *policy,
			struct em_perf_domain *em)
			struct cpufreq_policy *policy, u32 capacitance)
{
	struct thermal_cooling_device *cdev;
	struct cpufreq_cooling_device *cpufreq_cdev;
	char dev_name[THERMAL_NAME_LENGTH];
	unsigned int i, num_cpus;
	unsigned int freq, i, num_cpus;
	int ret;
	struct thermal_cooling_device_ops *cooling_ops;
	bool first;
@@ -574,38 +622,55 @@ __cpufreq_cooling_register(struct device_node *np,
	/* max_level is an index, not a counter */
	cpufreq_cdev->max_level = i - 1;

	cpufreq_cdev->freq_table = kmalloc_array(i,
					sizeof(*cpufreq_cdev->freq_table),
					GFP_KERNEL);
	if (!cpufreq_cdev->freq_table) {
		cdev = ERR_PTR(-ENOMEM);
		goto free_idle_time;
	}

	ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
	if (ret < 0) {
		cdev = ERR_PTR(ret);
		goto free_idle_time;
		goto free_table;
	}
	cpufreq_cdev->id = ret;

	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
		 cpufreq_cdev->id);

	/* Fill freq-table in descending order of frequencies */
	for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
		freq = find_next_max(policy->freq_table, freq);
		cpufreq_cdev->freq_table[i].frequency = freq;

		/* Warn for duplicate entries */
		if (!freq)
			pr_warn("%s: table has duplicate entries\n", __func__);
		else
			pr_debug("%s: freq:%u KHz\n", __func__, freq);
	}

	cooling_ops = &cpufreq_cooling_ops;
#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
	if (em_is_sane(cpufreq_cdev, em)) {
		cpufreq_cdev->em = em;
	if (capacitance) {
		ret = update_freq_table(cpufreq_cdev, capacitance);
		if (ret) {
			cdev = ERR_PTR(ret);
			goto remove_ida;
		}
		cooling_ops->get_requested_power = cpufreq_get_requested_power;
		cooling_ops->state2power = cpufreq_state2power;
		cooling_ops->power2state = cpufreq_power2state;
	} else
#endif
	if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED) {
		pr_err("%s: unsorted frequency tables are not supported\n",
				__func__);
		cdev = ERR_PTR(-EINVAL);
		goto remove_ida;
	}

#endif
	cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
						  cooling_ops);
	if (IS_ERR(cdev))
		goto remove_ida;

	cpufreq_cdev->clipped_freq = get_state_freq(cpufreq_cdev, 0);
	cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency;

	mutex_lock(&cooling_list_lock);
	/* Register the notifier for first cpufreq cooling device */
@@ -621,6 +686,8 @@ __cpufreq_cooling_register(struct device_node *np,

remove_ida:
	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
free_table:
	kfree(cpufreq_cdev->freq_table);
free_idle_time:
	kfree(cpufreq_cdev->idle_time);
free_cdev:
@@ -642,7 +709,7 @@ __cpufreq_cooling_register(struct device_node *np,
struct thermal_cooling_device *
cpufreq_cooling_register(struct cpufreq_policy *policy)
{
	return __cpufreq_cooling_register(NULL, policy, NULL);
	return __cpufreq_cooling_register(NULL, policy, 0);
}
EXPORT_SYMBOL_GPL(cpufreq_cooling_register);

@@ -670,6 +737,7 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy)
{
	struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
	struct thermal_cooling_device *cdev = NULL;
	u32 capacitance = 0;

	if (!np) {
		pr_err("cpu_cooling: OF node not available for cpu%d\n",
@@ -678,9 +746,10 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy)
	}

	if (of_find_property(np, "#cooling-cells", NULL)) {
		struct em_perf_domain *em = em_cpu_get(policy->cpu);
		of_property_read_u32(np, "dynamic-power-coefficient",
				     &capacitance);

		cdev = __cpufreq_cooling_register(np, policy, em);
		cdev = __cpufreq_cooling_register(np, policy, capacitance);
		if (IS_ERR(cdev)) {
			pr_err("cpu_cooling: cpu%d failed to register as cooling device: %ld\n",
			       policy->cpu, PTR_ERR(cdev));
@@ -722,6 +791,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
	thermal_cooling_device_unregister(cdev);
	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
	kfree(cpufreq_cdev->idle_time);
	kfree(cpufreq_cdev->freq_table);
	kfree(cpufreq_cdev);
}
EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);