Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 76e6ac30 authored by Jonathan Avila's avatar Jonathan Avila
Browse files

perf: Introduce a LLCC PMU



Some chips have hardware that can count misses for LLCC at a per-CPU level.
This PMU serves as an intermediary that allows us to retrieve these values
for use in other drivers.

Change-Id: I1dc3090a64ec7d5b12a36b0395c42930128287fe
Signed-off-by: default avatarJonathan Avila <avilaj@codeaurora.org>
parent 8c72aa73
Loading
Loading
Loading
Loading
+25 −0
Original line number Diff line number Diff line
* QCOM LLCC PMU Bindings

This represents the miss counters located in the LLCC hardware counters.
Only one event is supported:

 0x1000      	  - LLCC misses

The follow section describes the LLCC PMU DT node binding.

Required properties:
- compatible		: Shall be "qcom,qcom-llcc-pmu"
- reg			: There shall be two resources, each a pair of the form
			  < base_address total_size >.  One will represent the
			  DDR LAGG region, the other will represent the LLCC
			  BEAC region.
- reg-names		: There shall be two values, one 'lagg-base', one
			  'beac-base', corresponding to the order of the two
			  'reg' values.

Example:
	llcc_pmu: llcc-pmu {
		compatible = "qcom,qcom-llcc-pmu";
		reg = < 0x090CC000 0x300 >, < 0x09648000 0x110 >;
		reg-names = "lagg-base", "beac-base";
	};
+9 −0
Original line number Diff line number Diff line
@@ -36,6 +36,15 @@ config QCOM_L3_PMU
	   Adds the L3 cache PMU into the perf events subsystem for
	   monitoring L3 cache events.

config QCOM_LLCC_PMU
	bool "Qualcomm Technologies LLCC PMU"
	depends on ARCH_QCOM && ARM64
	help
	   Provides support for the LLCC performance monitor unit (PMU) in
	   Qualcomm Technologies processors.
	   Adds the LLCC PMU into the perf events subsystem for monitoring
	   LLCC miss events.

config XGENE_PMU
        depends on ARCH_XGENE
        bool "APM X-Gene SoC PMU"
+1 −0
Original line number Diff line number Diff line
@@ -3,4 +3,5 @@ obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
obj-$(CONFIG_QCOM_LLCC_PMU) += qcom_llcc_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
+212 −0
Original line number Diff line number Diff line
/*
 * Copyright (c) 2017, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#include <linux/of.h>
#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
#include <linux/ktime.h>

struct llcc_pmu {
	struct pmu pmu;
	struct hlist_node node;
	void __iomem *lagg_base;
	void __iomem *beac_base;
	struct perf_event event;
};

#define MON_CFG(m) ((m)->lagg_base + 0x200)
#define MON_CNT(m, cpu) ((m)->lagg_base + 0x220 + 0x4 * cpu)
#define BEAC_ENABLE(m) ((m)->beac_base + 0x100)
#define BEAC_SCALE(m) ((m)->beac_base + 0x110)
#define to_llcc_pmu(ptr) (container_of(ptr, struct llcc_pmu, pmu))

#define LLCC_RD_EV 0x1000
#define ENABLE 0x01
#define CLEAR 0x10
#define DISABLE 0x00
#define SCALING_FACTOR 0x4
#define NUM_COUNTERS NR_CPUS
#define VALUE_MASK 0xFFFFFF

static u64 llcc_stats[NUM_COUNTERS];
static unsigned int users;
static raw_spinlock_t counter_lock;
static raw_spinlock_t users_lock;
static ktime_t last_read;

static int qcom_llcc_event_init(struct perf_event *event)
{
	u64 config = event->attr.config;
	u64 type = event->attr.type;

	if (config == LLCC_RD_EV) {
		event->hw.config_base = event->attr.config;
		return 0;
	} else
		return -ENOENT;
}

static void qcom_llcc_event_read(struct perf_event *event)
{
	int i = 0, cpu = event->cpu;
	unsigned long raw, irq_flags;
	struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu);
	ktime_t cur;

	raw_spin_lock_irqsave(&counter_lock, irq_flags);
	cur = ktime_get();
	if (ktime_ms_delta(cur, last_read) > 1) {
		writel_relaxed(DISABLE, MON_CFG(llccpmu));
		for (i = 0; i < NUM_COUNTERS; i++) {
			raw = readl_relaxed(MON_CNT(llccpmu, i));
			raw &= VALUE_MASK;
			llcc_stats[i] += (u64) raw << SCALING_FACTOR;
		}
		last_read = cur;
		writel_relaxed(CLEAR, MON_CFG(llccpmu));
		writel_relaxed(ENABLE, MON_CFG(llccpmu));
	}

	if (!(event->hw.state & PERF_HES_STOPPED))
		local64_set(&event->count, llcc_stats[cpu]);
	raw_spin_unlock_irqrestore(&counter_lock, irq_flags);
}

static void qcom_llcc_event_start(struct perf_event *event, int flags)
{
	struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu);

	if (flags & PERF_EF_RELOAD)
		WARN_ON(!(event->hw.state & PERF_HES_UPTODATE));
	event->hw.state = 0;
}

static void qcom_llcc_event_stop(struct perf_event *event, int flags)
{
	struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu);

	qcom_llcc_event_read(event);
	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
}

static int qcom_llcc_event_add(struct perf_event *event, int flags)
{
	int i;
	unsigned int cpu = event->cpu;
	unsigned long irq_flags;
	struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu);

	raw_spin_lock(&users_lock);
	if (!users) {
		writel_relaxed(ENABLE, MON_CFG(llccpmu));
		writel_relaxed(ENABLE, BEAC_ENABLE(llccpmu));
	}
	users++;
	raw_spin_unlock(&users_lock);

	event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;

	if (flags & PERF_EF_START)
		qcom_llcc_event_start(event, PERF_EF_RELOAD);

	return 0;
}

static void qcom_llcc_event_del(struct perf_event *event, int flags)
{
	int i;
	unsigned int cpu = event->cpu;
	unsigned long irq_flags;
	struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu);

	raw_spin_lock(&users_lock);
	users--;
	if (!users) {
		writel_relaxed(DISABLE, MON_CFG(llccpmu));
		writel_relaxed(DISABLE, BEAC_ENABLE(llccpmu));
	}
	raw_spin_unlock(&users_lock);
}

static int qcom_llcc_pmu_probe(struct platform_device *pdev)
{
	struct llcc_pmu *llccpmu;
	struct resource *res;
	int ret, i;

	llccpmu = devm_kzalloc(&pdev->dev, sizeof(struct llcc_pmu), GFP_KERNEL);
	if (!llccpmu)
		return -ENOMEM;

	llccpmu->pmu = (struct pmu) {
		.task_ctx_nr = perf_invalid_context,

		.event_init	= qcom_llcc_event_init,
		.add		= qcom_llcc_event_add,
		.del		= qcom_llcc_event_del,
		.start		= qcom_llcc_event_start,
		.stop		= qcom_llcc_event_stop,
		.read		= qcom_llcc_event_read,
	};

	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lagg-base");
	llccpmu->lagg_base = devm_ioremap_resource(&pdev->dev, res);
	if (IS_ERR(llccpmu->lagg_base)) {
		dev_err(&pdev->dev, "Can't map PMU lagg base: @%pa\n",
			&res->start);
		return PTR_ERR(llccpmu->lagg_base);
	}

	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "beac-base");
	llccpmu->beac_base = devm_ioremap_resource(&pdev->dev, res);
	if (IS_ERR(llccpmu->beac_base)) {
		dev_err(&pdev->dev, "Can't map PMU beac base @%pa\n",
			&res->start);
		return PTR_ERR(llccpmu->beac_base);
	}

	writel_relaxed(SCALING_FACTOR, BEAC_SCALE(llccpmu));

	raw_spin_lock_init(&counter_lock);
	raw_spin_lock_init(&users_lock);

	ret = perf_pmu_register(&llccpmu->pmu, "llcc-pmu", -1);
	if (ret < 0)
		dev_err(&pdev->dev, "Failed to register LLCC PMU (%d)\n", ret);

	dev_info(&pdev->dev, "Registered llcc_pmu, type: %d\n",
		 llccpmu->pmu.type);

	return 0;
}

static const struct of_device_id qcom_llcc_pmu_match_table[] = {
	{ .compatible = "qcom,qcom-llcc-pmu" },
	{}
};

static struct platform_driver qcom_llcc_pmu_driver = {
	.driver = {
		.name = "qcom-llcc-pmu",
		.of_match_table = qcom_llcc_pmu_match_table,
	},
	.probe = qcom_llcc_pmu_probe,
};

module_platform_driver(qcom_llcc_pmu_driver);