Loading Documentation/devicetree/bindings/perf/qcom-llcc-pmu.txt 0 → 100644 +25 −0 Original line number Diff line number Diff line * QCOM LLCC PMU Bindings This represents the miss counters located in the LLCC hardware counters. Only one event is supported: 0x1000 - LLCC misses The follow section describes the LLCC PMU DT node binding. Required properties: - compatible : Shall be "qcom,qcom-llcc-pmu" - reg : There shall be two resources, each a pair of the form < base_address total_size >. One will represent the DDR LAGG region, the other will represent the LLCC BEAC region. - reg-names : There shall be two values, one 'lagg-base', one 'beac-base', corresponding to the order of the two 'reg' values. Example: llcc_pmu: llcc-pmu { compatible = "qcom,qcom-llcc-pmu"; reg = < 0x090CC000 0x300 >, < 0x09648000 0x110 >; reg-names = "lagg-base", "beac-base"; }; drivers/perf/Kconfig +9 −0 Original line number Diff line number Diff line Loading @@ -36,6 +36,15 @@ config QCOM_L3_PMU Adds the L3 cache PMU into the perf events subsystem for monitoring L3 cache events. config QCOM_LLCC_PMU bool "Qualcomm Technologies LLCC PMU" depends on ARCH_QCOM && ARM64 help Provides support for the LLCC performance monitor unit (PMU) in Qualcomm Technologies processors. Adds the LLCC PMU into the perf events subsystem for monitoring LLCC miss events. config XGENE_PMU depends on ARCH_XGENE bool "APM X-Gene SoC PMU" Loading drivers/perf/Makefile +1 −0 Original line number Diff line number Diff line Loading @@ -3,4 +3,5 @@ obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o obj-$(CONFIG_QCOM_LLCC_PMU) += qcom_llcc_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o drivers/perf/qcom_llcc_pmu.c 0 → 100644 +212 −0 Original line number Diff line number Diff line /* * Copyright (c) 2017, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and * only version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include <linux/of.h> #include <linux/bitops.h> #include <linux/interrupt.h> #include <linux/io.h> #include <linux/list.h> #include <linux/module.h> #include <linux/perf_event.h> #include <linux/platform_device.h> #include <linux/spinlock.h> #include <linux/ktime.h> struct llcc_pmu { struct pmu pmu; struct hlist_node node; void __iomem *lagg_base; void __iomem *beac_base; struct perf_event event; }; #define MON_CFG(m) ((m)->lagg_base + 0x200) #define MON_CNT(m, cpu) ((m)->lagg_base + 0x220 + 0x4 * cpu) #define BEAC_ENABLE(m) ((m)->beac_base + 0x100) #define BEAC_SCALE(m) ((m)->beac_base + 0x110) #define to_llcc_pmu(ptr) (container_of(ptr, struct llcc_pmu, pmu)) #define LLCC_RD_EV 0x1000 #define ENABLE 0x01 #define CLEAR 0x10 #define DISABLE 0x00 #define SCALING_FACTOR 0x4 #define NUM_COUNTERS NR_CPUS #define VALUE_MASK 0xFFFFFF static u64 llcc_stats[NUM_COUNTERS]; static unsigned int users; static raw_spinlock_t counter_lock; static raw_spinlock_t users_lock; static ktime_t last_read; static int qcom_llcc_event_init(struct perf_event *event) { u64 config = event->attr.config; u64 type = event->attr.type; if (config == LLCC_RD_EV) { event->hw.config_base = event->attr.config; return 0; } else return -ENOENT; } static void qcom_llcc_event_read(struct perf_event *event) { int i = 0, cpu = event->cpu; unsigned long raw, irq_flags; struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu); ktime_t cur; raw_spin_lock_irqsave(&counter_lock, irq_flags); cur = ktime_get(); if (ktime_ms_delta(cur, last_read) > 1) { writel_relaxed(DISABLE, MON_CFG(llccpmu)); for (i = 0; i < NUM_COUNTERS; i++) { raw = readl_relaxed(MON_CNT(llccpmu, i)); raw &= VALUE_MASK; llcc_stats[i] += (u64) raw << SCALING_FACTOR; } last_read = cur; writel_relaxed(CLEAR, MON_CFG(llccpmu)); writel_relaxed(ENABLE, MON_CFG(llccpmu)); } if (!(event->hw.state & PERF_HES_STOPPED)) local64_set(&event->count, llcc_stats[cpu]); raw_spin_unlock_irqrestore(&counter_lock, irq_flags); } static void qcom_llcc_event_start(struct perf_event *event, int flags) { struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu); if (flags & PERF_EF_RELOAD) WARN_ON(!(event->hw.state & PERF_HES_UPTODATE)); event->hw.state = 0; } static void qcom_llcc_event_stop(struct perf_event *event, int flags) { struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu); qcom_llcc_event_read(event); event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; } static int qcom_llcc_event_add(struct perf_event *event, int flags) { int i; unsigned int cpu = event->cpu; unsigned long irq_flags; struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu); raw_spin_lock(&users_lock); if (!users) { writel_relaxed(ENABLE, MON_CFG(llccpmu)); writel_relaxed(ENABLE, BEAC_ENABLE(llccpmu)); } users++; raw_spin_unlock(&users_lock); event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; if (flags & PERF_EF_START) qcom_llcc_event_start(event, PERF_EF_RELOAD); return 0; } static void qcom_llcc_event_del(struct perf_event *event, int flags) { int i; unsigned int cpu = event->cpu; unsigned long irq_flags; struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu); raw_spin_lock(&users_lock); users--; if (!users) { writel_relaxed(DISABLE, MON_CFG(llccpmu)); writel_relaxed(DISABLE, BEAC_ENABLE(llccpmu)); } raw_spin_unlock(&users_lock); } static int qcom_llcc_pmu_probe(struct platform_device *pdev) { struct llcc_pmu *llccpmu; struct resource *res; int ret, i; llccpmu = devm_kzalloc(&pdev->dev, sizeof(struct llcc_pmu), GFP_KERNEL); if (!llccpmu) return -ENOMEM; llccpmu->pmu = (struct pmu) { .task_ctx_nr = perf_invalid_context, .event_init = qcom_llcc_event_init, .add = qcom_llcc_event_add, .del = qcom_llcc_event_del, .start = qcom_llcc_event_start, .stop = qcom_llcc_event_stop, .read = qcom_llcc_event_read, }; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lagg-base"); llccpmu->lagg_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(llccpmu->lagg_base)) { dev_err(&pdev->dev, "Can't map PMU lagg base: @%pa\n", &res->start); return PTR_ERR(llccpmu->lagg_base); } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "beac-base"); llccpmu->beac_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(llccpmu->beac_base)) { dev_err(&pdev->dev, "Can't map PMU beac base @%pa\n", &res->start); return PTR_ERR(llccpmu->beac_base); } writel_relaxed(SCALING_FACTOR, BEAC_SCALE(llccpmu)); raw_spin_lock_init(&counter_lock); raw_spin_lock_init(&users_lock); ret = perf_pmu_register(&llccpmu->pmu, "llcc-pmu", -1); if (ret < 0) dev_err(&pdev->dev, "Failed to register LLCC PMU (%d)\n", ret); dev_info(&pdev->dev, "Registered llcc_pmu, type: %d\n", llccpmu->pmu.type); return 0; } static const struct of_device_id qcom_llcc_pmu_match_table[] = { { .compatible = "qcom,qcom-llcc-pmu" }, {} }; static struct platform_driver qcom_llcc_pmu_driver = { .driver = { .name = "qcom-llcc-pmu", .of_match_table = qcom_llcc_pmu_match_table, }, .probe = qcom_llcc_pmu_probe, }; module_platform_driver(qcom_llcc_pmu_driver); Loading
Documentation/devicetree/bindings/perf/qcom-llcc-pmu.txt 0 → 100644 +25 −0 Original line number Diff line number Diff line * QCOM LLCC PMU Bindings This represents the miss counters located in the LLCC hardware counters. Only one event is supported: 0x1000 - LLCC misses The follow section describes the LLCC PMU DT node binding. Required properties: - compatible : Shall be "qcom,qcom-llcc-pmu" - reg : There shall be two resources, each a pair of the form < base_address total_size >. One will represent the DDR LAGG region, the other will represent the LLCC BEAC region. - reg-names : There shall be two values, one 'lagg-base', one 'beac-base', corresponding to the order of the two 'reg' values. Example: llcc_pmu: llcc-pmu { compatible = "qcom,qcom-llcc-pmu"; reg = < 0x090CC000 0x300 >, < 0x09648000 0x110 >; reg-names = "lagg-base", "beac-base"; };
drivers/perf/Kconfig +9 −0 Original line number Diff line number Diff line Loading @@ -36,6 +36,15 @@ config QCOM_L3_PMU Adds the L3 cache PMU into the perf events subsystem for monitoring L3 cache events. config QCOM_LLCC_PMU bool "Qualcomm Technologies LLCC PMU" depends on ARCH_QCOM && ARM64 help Provides support for the LLCC performance monitor unit (PMU) in Qualcomm Technologies processors. Adds the LLCC PMU into the perf events subsystem for monitoring LLCC miss events. config XGENE_PMU depends on ARCH_XGENE bool "APM X-Gene SoC PMU" Loading
drivers/perf/Makefile +1 −0 Original line number Diff line number Diff line Loading @@ -3,4 +3,5 @@ obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o obj-$(CONFIG_QCOM_LLCC_PMU) += qcom_llcc_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
drivers/perf/qcom_llcc_pmu.c 0 → 100644 +212 −0 Original line number Diff line number Diff line /* * Copyright (c) 2017, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and * only version 2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include <linux/of.h> #include <linux/bitops.h> #include <linux/interrupt.h> #include <linux/io.h> #include <linux/list.h> #include <linux/module.h> #include <linux/perf_event.h> #include <linux/platform_device.h> #include <linux/spinlock.h> #include <linux/ktime.h> struct llcc_pmu { struct pmu pmu; struct hlist_node node; void __iomem *lagg_base; void __iomem *beac_base; struct perf_event event; }; #define MON_CFG(m) ((m)->lagg_base + 0x200) #define MON_CNT(m, cpu) ((m)->lagg_base + 0x220 + 0x4 * cpu) #define BEAC_ENABLE(m) ((m)->beac_base + 0x100) #define BEAC_SCALE(m) ((m)->beac_base + 0x110) #define to_llcc_pmu(ptr) (container_of(ptr, struct llcc_pmu, pmu)) #define LLCC_RD_EV 0x1000 #define ENABLE 0x01 #define CLEAR 0x10 #define DISABLE 0x00 #define SCALING_FACTOR 0x4 #define NUM_COUNTERS NR_CPUS #define VALUE_MASK 0xFFFFFF static u64 llcc_stats[NUM_COUNTERS]; static unsigned int users; static raw_spinlock_t counter_lock; static raw_spinlock_t users_lock; static ktime_t last_read; static int qcom_llcc_event_init(struct perf_event *event) { u64 config = event->attr.config; u64 type = event->attr.type; if (config == LLCC_RD_EV) { event->hw.config_base = event->attr.config; return 0; } else return -ENOENT; } static void qcom_llcc_event_read(struct perf_event *event) { int i = 0, cpu = event->cpu; unsigned long raw, irq_flags; struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu); ktime_t cur; raw_spin_lock_irqsave(&counter_lock, irq_flags); cur = ktime_get(); if (ktime_ms_delta(cur, last_read) > 1) { writel_relaxed(DISABLE, MON_CFG(llccpmu)); for (i = 0; i < NUM_COUNTERS; i++) { raw = readl_relaxed(MON_CNT(llccpmu, i)); raw &= VALUE_MASK; llcc_stats[i] += (u64) raw << SCALING_FACTOR; } last_read = cur; writel_relaxed(CLEAR, MON_CFG(llccpmu)); writel_relaxed(ENABLE, MON_CFG(llccpmu)); } if (!(event->hw.state & PERF_HES_STOPPED)) local64_set(&event->count, llcc_stats[cpu]); raw_spin_unlock_irqrestore(&counter_lock, irq_flags); } static void qcom_llcc_event_start(struct perf_event *event, int flags) { struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu); if (flags & PERF_EF_RELOAD) WARN_ON(!(event->hw.state & PERF_HES_UPTODATE)); event->hw.state = 0; } static void qcom_llcc_event_stop(struct perf_event *event, int flags) { struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu); qcom_llcc_event_read(event); event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; } static int qcom_llcc_event_add(struct perf_event *event, int flags) { int i; unsigned int cpu = event->cpu; unsigned long irq_flags; struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu); raw_spin_lock(&users_lock); if (!users) { writel_relaxed(ENABLE, MON_CFG(llccpmu)); writel_relaxed(ENABLE, BEAC_ENABLE(llccpmu)); } users++; raw_spin_unlock(&users_lock); event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; if (flags & PERF_EF_START) qcom_llcc_event_start(event, PERF_EF_RELOAD); return 0; } static void qcom_llcc_event_del(struct perf_event *event, int flags) { int i; unsigned int cpu = event->cpu; unsigned long irq_flags; struct llcc_pmu *llccpmu = to_llcc_pmu(event->pmu); raw_spin_lock(&users_lock); users--; if (!users) { writel_relaxed(DISABLE, MON_CFG(llccpmu)); writel_relaxed(DISABLE, BEAC_ENABLE(llccpmu)); } raw_spin_unlock(&users_lock); } static int qcom_llcc_pmu_probe(struct platform_device *pdev) { struct llcc_pmu *llccpmu; struct resource *res; int ret, i; llccpmu = devm_kzalloc(&pdev->dev, sizeof(struct llcc_pmu), GFP_KERNEL); if (!llccpmu) return -ENOMEM; llccpmu->pmu = (struct pmu) { .task_ctx_nr = perf_invalid_context, .event_init = qcom_llcc_event_init, .add = qcom_llcc_event_add, .del = qcom_llcc_event_del, .start = qcom_llcc_event_start, .stop = qcom_llcc_event_stop, .read = qcom_llcc_event_read, }; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lagg-base"); llccpmu->lagg_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(llccpmu->lagg_base)) { dev_err(&pdev->dev, "Can't map PMU lagg base: @%pa\n", &res->start); return PTR_ERR(llccpmu->lagg_base); } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "beac-base"); llccpmu->beac_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(llccpmu->beac_base)) { dev_err(&pdev->dev, "Can't map PMU beac base @%pa\n", &res->start); return PTR_ERR(llccpmu->beac_base); } writel_relaxed(SCALING_FACTOR, BEAC_SCALE(llccpmu)); raw_spin_lock_init(&counter_lock); raw_spin_lock_init(&users_lock); ret = perf_pmu_register(&llccpmu->pmu, "llcc-pmu", -1); if (ret < 0) dev_err(&pdev->dev, "Failed to register LLCC PMU (%d)\n", ret); dev_info(&pdev->dev, "Registered llcc_pmu, type: %d\n", llccpmu->pmu.type); return 0; } static const struct of_device_id qcom_llcc_pmu_match_table[] = { { .compatible = "qcom,qcom-llcc-pmu" }, {} }; static struct platform_driver qcom_llcc_pmu_driver = { .driver = { .name = "qcom-llcc-pmu", .of_match_table = qcom_llcc_pmu_match_table, }, .probe = qcom_llcc_pmu_probe, }; module_platform_driver(qcom_llcc_pmu_driver);