Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5e4358c5 authored by Rohit Gupta's avatar Rohit Gupta
Browse files

PM / devfreq: Introduce a memory-latency governor



Use performance counters to detect the memory latency sensitivity
of CPU workloads and vote for higher DDR frequency if required.

Change-Id: Ie77a3523bc5713fc0315bd0abc3913f485a96e0e
Signed-off-by: default avatarRohit Gupta <rohgup@codeaurora.org>
Suggested-by: default avatarSaravana Kannan <skannan@codeaurora.org>
[junjiew@codeaurora.org: dropped changes in arch/arm64/Kconfig]
Signed-off-by: default avatarJunjie Wu <junjiew@codeaurora.org>
parent 4d1f4f49
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
ARM CPU memory latency monitor device

arm-memlat-mon is a device that represents the use of the PMU in ARM cores
to measure the parameters for latency driven memory access patterns.

Required properties:
- compatible:			Must be "qcom,arm-memlat-mon"
- qcom,cpulist:			List of CPU phandles to be monitored in a cluster
- qcom,target-dev:		The DT device that corresponds to this master port

Example:
	qcom,arm-memlat-mon {
		compatible = "qcom,arm-memlat-mon";
		qcom,cpulist = <&CPU0 &CPU1>;
		qcom,target-dev = <&memlat0>;
	};
+19 −0
Original line number Diff line number Diff line
@@ -89,6 +89,15 @@ config QCOM_BIMC_BWMON
	  has the capability to raise an IRQ when the count exceeds a
	  programmable limit.

config ARM_MEMLAT_MON
	tristate "ARM CPU Memory Latency monitor hardware"
	depends on ARCH_QCOM
	help
	  The PMU present on these ARM cores allow for the use of counters to
	  monitor the memory latency characteristics of an ARM CPU workload.
	  This driver uses these counters to implement the APIs needed by
	  the mem_latency devfreq governor.

config QCOMCCI_HWMON
	tristate "QCOM CCI Cache monitor hardware"
	depends on ARCH_QCOM
@@ -126,6 +135,16 @@ config DEVFREQ_GOV_QCOM_CACHE_HWMON
	  it can conflict with existing profiling tools. This governor is
	  unlikely to be useful for other devices.

config DEVFREQ_GOV_MEMLAT
	tristate "HW monitor based governor for device BW"
	depends on ARM_MEMLAT_MON
	help
	  HW monitor based governor for device to DDR bandwidth voting.
	  This governor sets the CPU BW vote based on stats obtained from memalat
	  monitor if it determines that a workload is memory latency bound. Since
	  this uses target specific counters it can conflict with existing profiling
	  tools.

comment "DEVFREQ Drivers"

config DEVFREQ_GOV_QCOM_ADRENO_TZ
+2 −0
Original line number Diff line number Diff line
@@ -9,10 +9,12 @@ obj-$(CONFIG_DEVFREQ_GOV_CPUFREQ) += governor_cpufreq.o
obj-$(CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ) += governor_msm_adreno_tz.o
obj-$(CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON) += governor_bw_vbif.o
obj-$(CONFIG_QCOM_BIMC_BWMON)		+= bimc-bwmon.o
obj-$(CONFIG_ARM_MEMLAT_MON)		+= arm-memlat-mon.o
obj-$(CONFIG_QCOMCCI_HWMON)		+= msmcci-hwmon.o
obj-$(CONFIG_QCOM_M4M_HWMON)		+= m4m-hwmon.o
obj-$(CONFIG_DEVFREQ_GOV_QCOM_BW_HWMON)	+= governor_bw_hwmon.o
obj-$(CONFIG_DEVFREQ_GOV_QCOM_CACHE_HWMON)	+= governor_cache_hwmon.o
obj-$(CONFIG_DEVFREQ_GOV_MEMLAT)       += governor_memlat.o

# DEVFREQ Drivers
obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ)	+= exynos-bus.o
+357 −0
Original line number Diff line number Diff line
/*
 * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#define pr_fmt(fmt) "arm-memlat-mon: " fmt

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/slab.h>
#include <linux/irq.h>
#include <linux/cpu_pm.h>
#include <linux/cpu.h>
#include "governor.h"
#include "governor_memlat.h"
#include <linux/perf_event.h>

enum ev_index {
	INST_IDX,
	L2DM_IDX,
	CYC_IDX,
	NUM_EVENTS
};
#define INST_EV		0x08
#define L2DM_EV		0x17
#define CYC_EV		0x11

struct event_data {
	struct perf_event *pevent;
	unsigned long prev_count;
};

struct memlat_hwmon_data {
	struct event_data events[NUM_EVENTS];
	ktime_t prev_ts;
	bool init_pending;
};
static DEFINE_PER_CPU(struct memlat_hwmon_data, pm_data);

struct cpu_grp_info {
	cpumask_t cpus;
	struct memlat_hwmon hw;
	struct notifier_block arm_memlat_cpu_notif;
};

static unsigned long compute_freq(struct memlat_hwmon_data *hw_data,
						unsigned long cyc_cnt)
{
	ktime_t ts;
	unsigned int diff;
	unsigned long freq = 0;

	ts = ktime_get();
	diff = ktime_to_us(ktime_sub(ts, hw_data->prev_ts));
	if (!diff)
		diff = 1;
	hw_data->prev_ts = ts;
	freq = cyc_cnt;
	do_div(freq, diff);

	return freq;
}

#define MAX_COUNT_LIM 0xFFFFFFFFFFFFFFFF
static inline unsigned long read_event(struct event_data *event)
{
	unsigned long ev_count;
	u64 total, enabled, running;

	total = perf_event_read_value(event->pevent, &enabled, &running);
	if (total >= event->prev_count)
		ev_count = total - event->prev_count;
	else
		ev_count = (MAX_COUNT_LIM - event->prev_count) + total;

	event->prev_count = total;

	return ev_count;
}

static void read_perf_counters(int cpu, struct cpu_grp_info *cpu_grp)
{
	int cpu_idx;
	struct memlat_hwmon_data *hw_data = &per_cpu(pm_data, cpu);
	struct memlat_hwmon *hw = &cpu_grp->hw;
	unsigned long cyc_cnt;

	if (hw_data->init_pending)
		return;

	cpu_idx = cpu - cpumask_first(&cpu_grp->cpus);

	hw->core_stats[cpu_idx].inst_count =
			read_event(&hw_data->events[INST_IDX]);

	hw->core_stats[cpu_idx].mem_count =
			read_event(&hw_data->events[L2DM_IDX]);

	cyc_cnt = read_event(&hw_data->events[CYC_IDX]);
	hw->core_stats[cpu_idx].freq = compute_freq(hw_data, cyc_cnt);
}

static unsigned long get_cnt(struct memlat_hwmon *hw)
{
	int cpu;
	struct cpu_grp_info *cpu_grp = container_of(hw,
					struct cpu_grp_info, hw);

	for_each_cpu(cpu, &cpu_grp->cpus)
		read_perf_counters(cpu, cpu_grp);

	return 0;
}

static void delete_events(struct memlat_hwmon_data *hw_data)
{
	int i;

	for (i = 0; i < NUM_EVENTS; i++) {
		hw_data->events[i].prev_count = 0;
		perf_event_release_kernel(hw_data->events[i].pevent);
	}
}

static void stop_hwmon(struct memlat_hwmon *hw)
{
	int cpu, idx;
	struct memlat_hwmon_data *hw_data;
	struct cpu_grp_info *cpu_grp = container_of(hw,
					struct cpu_grp_info, hw);

	get_online_cpus();
	for_each_cpu(cpu, &cpu_grp->cpus) {
		hw_data = &per_cpu(pm_data, cpu);
		if (hw_data->init_pending)
			hw_data->init_pending = false;
		else
			delete_events(hw_data);

		/* Clear governor data */
		idx = cpu - cpumask_first(&cpu_grp->cpus);
		hw->core_stats[idx].inst_count = 0;
		hw->core_stats[idx].mem_count = 0;
		hw->core_stats[idx].freq = 0;
	}
	put_online_cpus();

	unregister_cpu_notifier(&cpu_grp->arm_memlat_cpu_notif);
}

static struct perf_event_attr *alloc_attr(void)
{
	struct perf_event_attr *attr;

	attr = kzalloc(sizeof(struct perf_event_attr), GFP_KERNEL);
	if (!attr)
		return ERR_PTR(-ENOMEM);

	attr->type = PERF_TYPE_RAW;
	attr->size = sizeof(struct perf_event_attr);
	attr->pinned = 1;
	attr->exclude_idle = 1;

	return attr;
}

static int set_events(struct memlat_hwmon_data *hw_data, int cpu)
{
	struct perf_event *pevent;
	struct perf_event_attr *attr;
	int err;

	/* Allocate an attribute for event initialization */
	attr = alloc_attr();
	if (IS_ERR(attr))
		return PTR_ERR(attr);

	attr->config = INST_EV;
	pevent = perf_event_create_kernel_counter(attr, cpu, NULL, NULL, NULL);
	if (IS_ERR(pevent))
		goto err_out;
	hw_data->events[INST_IDX].pevent = pevent;
	perf_event_enable(hw_data->events[INST_IDX].pevent);

	attr->config = L2DM_EV;
	pevent = perf_event_create_kernel_counter(attr, cpu, NULL, NULL, NULL);
	if (IS_ERR(pevent))
		goto err_out;
	hw_data->events[L2DM_IDX].pevent = pevent;
	perf_event_enable(hw_data->events[L2DM_IDX].pevent);

	attr->config = CYC_EV;
	pevent = perf_event_create_kernel_counter(attr, cpu, NULL, NULL, NULL);
	if (IS_ERR(pevent))
		goto err_out;
	hw_data->events[CYC_IDX].pevent = pevent;
	perf_event_enable(hw_data->events[CYC_IDX].pevent);

	kfree(attr);
	return 0;

err_out:
	err = PTR_ERR(pevent);
	kfree(attr);
	return err;
}

static int arm_memlat_cpu_callback(struct notifier_block *nb,
		unsigned long action, void *hcpu)
{
	unsigned long cpu = (unsigned long)hcpu;
	struct memlat_hwmon_data *hw_data = &per_cpu(pm_data, cpu);

	if ((action != CPU_ONLINE) || !hw_data->init_pending)
		return NOTIFY_OK;

	if (set_events(hw_data, cpu))
		pr_warn("Failed to create perf event for CPU%lu\n", cpu);

	hw_data->init_pending = false;

	return NOTIFY_OK;
}

static int start_hwmon(struct memlat_hwmon *hw)
{
	int cpu, ret = 0;
	struct memlat_hwmon_data *hw_data;
	struct cpu_grp_info *cpu_grp = container_of(hw,
					struct cpu_grp_info, hw);

	register_cpu_notifier(&cpu_grp->arm_memlat_cpu_notif);

	get_online_cpus();
	for_each_cpu(cpu, &cpu_grp->cpus) {
		hw_data = &per_cpu(pm_data, cpu);
		ret = set_events(hw_data, cpu);
		if (ret) {
			if (!cpu_online(cpu)) {
				hw_data->init_pending = true;
				ret = 0;
			} else {
				pr_warn("Perf event init failed on CPU%d\n",
					cpu);
				break;
			}
		}
	}

	put_online_cpus();
	return ret;
}

static int get_mask_from_dev_handle(struct platform_device *pdev,
					cpumask_t *mask)
{
	struct device *dev = &pdev->dev;
	struct device_node *dev_phandle;
	struct device *cpu_dev;
	int cpu, i = 0;
	int ret = -ENOENT;

	dev_phandle = of_parse_phandle(dev->of_node, "qcom,cpulist", i++);
	while (dev_phandle) {
		for_each_possible_cpu(cpu) {
			cpu_dev = get_cpu_device(cpu);
			if (cpu_dev && cpu_dev->of_node == dev_phandle) {
				cpumask_set_cpu(cpu, mask);
				ret = 0;
				break;
			}
		}
		dev_phandle = of_parse_phandle(dev->of_node,
						"qcom,cpulist", i++);
	}

	return ret;
}

static int arm_memlat_mon_driver_probe(struct platform_device *pdev)
{
	struct device *dev = &pdev->dev;
	struct memlat_hwmon *hw;
	struct cpu_grp_info *cpu_grp;
	int cpu, ret;

	cpu_grp = devm_kzalloc(dev, sizeof(*cpu_grp), GFP_KERNEL);
	if (!cpu_grp)
		return -ENOMEM;
	cpu_grp->arm_memlat_cpu_notif.notifier_call = arm_memlat_cpu_callback;
	hw = &cpu_grp->hw;

	hw->dev = dev;
	hw->of_node = of_parse_phandle(dev->of_node, "qcom,target-dev", 0);
	if (!hw->of_node) {
		dev_err(dev, "Couldn't find a target device\n");
		return -ENODEV;
	}

	if (get_mask_from_dev_handle(pdev, &cpu_grp->cpus)) {
		dev_err(dev, "CPU list is empty\n");
		return -ENODEV;
	}

	hw->num_cores = cpumask_weight(&cpu_grp->cpus);
	hw->core_stats = devm_kzalloc(dev, hw->num_cores *
				sizeof(*(hw->core_stats)), GFP_KERNEL);
	if (!hw->core_stats)
		return -ENOMEM;

	for_each_cpu(cpu, &cpu_grp->cpus)
		hw->core_stats[cpu - cpumask_first(&cpu_grp->cpus)].id = cpu;

	hw->start_hwmon = &start_hwmon;
	hw->stop_hwmon = &stop_hwmon;
	hw->get_cnt = &get_cnt;

	ret = register_memlat(dev, hw);
	if (ret) {
		pr_err("Mem Latency Gov registration failed\n");
		return ret;
	}

	return 0;
}

static const struct of_device_id memlat_match_table[] = {
	{ .compatible = "qcom,arm-memlat-mon" },
	{}
};

static struct platform_driver arm_memlat_mon_driver = {
	.probe = arm_memlat_mon_driver_probe,
	.driver = {
		.name = "arm-memlat-mon",
		.of_match_table = memlat_match_table,
	},
};

module_platform_driver(arm_memlat_mon_driver);
+337 −0
Original line number Diff line number Diff line
/*
 * Copyright (c) 2015, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#define pr_fmt(fmt) "mem_lat: " fmt

#include <linux/kernel.h>
#include <linux/sizes.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/ktime.h>
#include <linux/time.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/mutex.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/devfreq.h>
#include "governor.h"
#include "governor_memlat.h"

#include <trace/events/power.h>

struct memlat_node {
	unsigned int ratio_ceil;
	unsigned int freq_thresh_mhz;
	unsigned int mult_factor;
	bool mon_started;
	struct list_head list;
	void *orig_data;
	struct memlat_hwmon *hw;
	struct devfreq_governor *gov;
	struct attribute_group *attr_grp;
};

static LIST_HEAD(memlat_list);
static DEFINE_MUTEX(list_lock);

static int use_cnt;
static DEFINE_MUTEX(state_lock);

#define show_attr(name) \
static ssize_t show_##name(struct device *dev,				\
			struct device_attribute *attr, char *buf)	\
{									\
	struct devfreq *df = to_devfreq(dev);				\
	struct memlat_node *hw = df->data;				\
	return snprintf(buf, PAGE_SIZE, "%u\n", hw->name);		\
}

#define store_attr(name, _min, _max) \
static ssize_t store_##name(struct device *dev,				\
			struct device_attribute *attr, const char *buf,	\
			size_t count)					\
{									\
	struct devfreq *df = to_devfreq(dev);				\
	struct memlat_node *hw = df->data;				\
	int ret;							\
	unsigned int val;						\
	ret = kstrtouint(buf, 10, &val);				\
	if (ret)							\
		return ret;						\
	val = max(val, _min);						\
	val = min(val, _max);						\
	hw->name = val;							\
	return count;							\
}

#define gov_attr(__attr, min, max)	\
show_attr(__attr)			\
store_attr(__attr, min, max)		\
static DEVICE_ATTR(__attr, 0644, show_##__attr, store_##__attr)

static unsigned long compute_dev_vote(struct devfreq *df)
{
	int i, lat_dev;
	struct memlat_node *node = df->data;
	struct memlat_hwmon *hw = node->hw;
	unsigned long max_freq = 0;
	unsigned int ratio;

	hw->get_cnt(hw);

	for (i = 0; i < hw->num_cores; i++) {
		ratio = hw->core_stats[i].inst_count;

		if (hw->core_stats[i].mem_count)
			ratio /= hw->core_stats[i].mem_count;

		trace_memlat_dev_meas(dev_name(df->dev.parent),
					hw->core_stats[i].id,
					hw->core_stats[i].inst_count,
					hw->core_stats[i].mem_count,
					hw->core_stats[i].freq, ratio);

		if (ratio && ratio <= node->ratio_ceil
		    && hw->core_stats[i].freq >= node->freq_thresh_mhz
		    && hw->core_stats[i].freq > max_freq) {
			lat_dev = i;
			max_freq = hw->core_stats[i].freq;
		}
	}

	if (max_freq)
		trace_memlat_dev_update(dev_name(df->dev.parent),
					hw->core_stats[lat_dev].id,
					hw->core_stats[lat_dev].inst_count,
					hw->core_stats[lat_dev].mem_count,
					hw->core_stats[lat_dev].freq,
					max_freq * node->mult_factor);

	return max_freq;
}

static struct memlat_node *find_memlat_node(struct devfreq *df)
{
	struct memlat_node *node, *found = NULL;

	mutex_lock(&list_lock);
	list_for_each_entry(node, &memlat_list, list)
		if (node->hw->dev == df->dev.parent ||
		    node->hw->of_node == df->dev.parent->of_node) {
			found = node;
			break;
		}
	mutex_unlock(&list_lock);

	return found;
}

static int start_monitor(struct devfreq *df)
{
	struct memlat_node *node = df->data;
	struct memlat_hwmon *hw = node->hw;
	struct device *dev = df->dev.parent;
	int ret;

	ret = hw->start_hwmon(hw);

	if (ret) {
		dev_err(dev, "Unable to start HW monitor! (%d)\n", ret);
		return ret;
	}

	devfreq_monitor_start(df);

	node->mon_started = true;

	return 0;
}

static void stop_monitor(struct devfreq *df)
{
	struct memlat_node *node = df->data;
	struct memlat_hwmon *hw = node->hw;

	node->mon_started = false;

	devfreq_monitor_stop(df);
	hw->stop_hwmon(hw);
}

static int gov_start(struct devfreq *df)
{
	int ret = 0;
	struct device *dev = df->dev.parent;
	struct memlat_node *node;
	struct memlat_hwmon *hw;

	node = find_memlat_node(df);
	if (!node) {
		dev_err(dev, "Unable to find HW monitor!\n");
		return -ENODEV;
	}
	hw = node->hw;

	hw->df = df;
	node->orig_data = df->data;
	df->data = node;

	if (start_monitor(df))
		goto err_start;

	ret = sysfs_create_group(&df->dev.kobj, node->attr_grp);
	if (ret)
		goto err_sysfs;

	return 0;

err_sysfs:
	stop_monitor(df);
err_start:
	df->data = node->orig_data;
	node->orig_data = NULL;
	hw->df = NULL;
	return ret;
}

static void gov_stop(struct devfreq *df)
{
	struct memlat_node *node = df->data;
	struct memlat_hwmon *hw = node->hw;

	sysfs_remove_group(&df->dev.kobj, node->attr_grp);
	stop_monitor(df);
	df->data = node->orig_data;
	node->orig_data = NULL;
	hw->df = NULL;
}

static int devfreq_memlat_get_freq(struct devfreq *df,
					unsigned long *freq)
{
	unsigned long mhz;
	struct memlat_node *node = df->data;

	mhz = compute_dev_vote(df);
	*freq = mhz ? (mhz * node->mult_factor) : 0;

	return 0;
}

gov_attr(ratio_ceil, 1U, 1000U);
gov_attr(freq_thresh_mhz, 300U, 5000U);
gov_attr(mult_factor, 1U, 10U);

static struct attribute *dev_attr[] = {
	&dev_attr_ratio_ceil.attr,
	&dev_attr_freq_thresh_mhz.attr,
	&dev_attr_mult_factor.attr,
	NULL,
};

static struct attribute_group dev_attr_group = {
	.name = "mem_latency",
	.attrs = dev_attr,
};

#define MIN_MS	10U
#define MAX_MS	500U
static int devfreq_memlat_ev_handler(struct devfreq *df,
					unsigned int event, void *data)
{
	int ret;
	unsigned int sample_ms;

	switch (event) {
	case DEVFREQ_GOV_START:
		sample_ms = df->profile->polling_ms;
		sample_ms = max(MIN_MS, sample_ms);
		sample_ms = min(MAX_MS, sample_ms);
		df->profile->polling_ms = sample_ms;

		ret = gov_start(df);
		if (ret)
			return ret;

		dev_dbg(df->dev.parent,
			"Enabled Memory Latency governor\n");
		break;

	case DEVFREQ_GOV_STOP:
		gov_stop(df);
		dev_dbg(df->dev.parent,
			"Disabled Memory Latency governor\n");
		break;

	case DEVFREQ_GOV_INTERVAL:
		sample_ms = *(unsigned int *)data;
		sample_ms = max(MIN_MS, sample_ms);
		sample_ms = min(MAX_MS, sample_ms);
		devfreq_interval_update(df, &sample_ms);
		break;
	}

	return 0;
}

static struct devfreq_governor devfreq_gov_memlat = {
	.name = "mem_latency",
	.get_target_freq = devfreq_memlat_get_freq,
	.event_handler = devfreq_memlat_ev_handler,
};

int register_memlat(struct device *dev, struct memlat_hwmon *hw)
{
	int ret = 0;
	struct memlat_node *node;

	if (!hw->dev && !hw->of_node)
		return -EINVAL;

	node = devm_kzalloc(dev, sizeof(*node), GFP_KERNEL);
	if (!node)
		return -ENOMEM;

	node->gov = &devfreq_gov_memlat;
	node->attr_grp = &dev_attr_group;

	node->ratio_ceil = 10;
	node->freq_thresh_mhz = 900;
	node->mult_factor = 8;
	node->hw = hw;

	mutex_lock(&list_lock);
	list_add_tail(&node->list, &memlat_list);
	mutex_unlock(&list_lock);

	mutex_lock(&state_lock);
	if (!use_cnt)
		ret = devfreq_add_governor(&devfreq_gov_memlat);
	if (!ret)
		use_cnt++;
	mutex_unlock(&state_lock);

	if (!ret)
		dev_info(dev, "Memory Latency governor registered.\n");
	else
		dev_err(dev, "Memory Latency governor registration failed!\n");

	return ret;
}

MODULE_DESCRIPTION("HW monitor based dev DDR bandwidth voting driver");
MODULE_LICENSE("GPL v2");
Loading