soc: qcom: add l2 cache perf events driver (9993f9f9) · Commits · e / devices / android_kernel_xiaomi_markw

Documentation/devicetree/bindings/arm/msm/l2cache-pmu.txt

0 → 100644

+20 −0

Original line number	Diff line number	Diff line
		L2 cache performance monitor unit

		L2 cache controllers have a performance monitor unit to measure
		events such as cache hits and misses. There is one L2 cache PMU
		for each cluster of CPUs.

		Required properties:

		- compatible : should be "qcom,qcom-l2cache-pmu"
		- interrupts : 1 interrupt for each cluster.
		- qcom,cpu-affinity: specifies the id of the first CPU in the cluster.

		Example:

		l2cache-pmu {
		compatible = "qcom,qcom-l2cache-pmu";
		interrupts = <0 0 1>, <0 8 1>;
		qcom,cpu-affinity = <0>, <2>
		};

drivers/soc/qcom/Makefile

+1 −1

Original line number	Diff line number	Diff line
		@@ -50,7 +50,7 @@ obj-$(CONFIG_MSM_RPM_RBCPR_STATS_V2_LOG) += rpm_rbcpr_stats_v2.o
		obj-$(CONFIG_MEM_SHARE_QMI_SERVICE) += memshare/
		obj-$(CONFIG_CP_ACCESS64) += cpaccess64.o
		obj-$(CONFIG_MSM_RPM_STATS_LOG) += rpm_stats.o rpm_master_stat.o rpm_rail_stats.o
		obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_kryo.o
		obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_kryo.o perf_event_l2.o
		obj-$(CONFIG_MSM_RPM_LOG) += rpm_log.o
		obj-$(CONFIG_MSM_JTAG) += jtag-fuse.o jtag.o
		obj-$(CONFIG_MSM_JTAG_MM) += jtag-fuse.o jtag-mm.o

drivers/soc/qcom/perf_event_l2.c

0 → 100644

+964 −0

Original line number	Diff line number	Diff line
		/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
		*
		* This program is free software; you can redistribute it and/or modify
		* it under the terms of the GNU General Public License version 2 and
		* only version 2 as published by the Free Software Foundation.
		*
		* This program is distributed in the hope that it will be useful,
		* but WITHOUT ANY WARRANTY; without even the implied warranty of
		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		* GNU General Public License for more details.
		*/
		#define pr_fmt(fmt) "l2 perfevents: " fmt

		#include <linux/module.h>
		#include <linux/bitops.h>
		#include <linux/interrupt.h>
		#include <linux/io.h>
		#include <linux/irq.h>
		#include <linux/list.h>
		#include <linux/of.h>
		#include <linux/acpi.h>
		#include <linux/perf_event.h>
		#include <linux/platform_device.h>
		#include <soc/qcom/perf_event_l2.h>
		#include <soc/qcom/kryo-l2-accessors.h>

		/*
		* The cache is made-up of one or more slices, each slice has its own PMU.
		* This structure represents one of the hardware PMUs.
		*/
		struct hml2_pmu {
		struct list_head entry;
		u32 cluster;
		struct perf_event *events[MAX_L2_CTRS];
		unsigned long used_mask[BITS_TO_LONGS(MAX_L2_CTRS)];
		atomic64_t prev_count[MAX_L2_CTRS];
		spinlock_t pmu_lock;
		};

		/*
		* Aggregate PMU. Implements the core pmu functions and manages
		* the hardware PMUs.
		*/

		struct l2cache_pmu {
		u32 num_pmus;
		struct list_head pmus;
		struct pmu pmu;
		int num_counters;
		};

		#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))

		static struct l2cache_pmu l2cache_pmu = { 0 };

		static u32 l2_cycle_ctr_idx;
		static u32 l2_reset_mask;

		static inline u32 idx_to_reg(u32 idx)
		{
		u32 bit;

		if (idx == l2_cycle_ctr_idx)
		bit = 1 << L2CYCLE_CTR_BIT;
		else
		bit = 1 << idx;
		return bit;
		}

		static struct hml2_pmu get_hml2_pmu(struct l2cache_pmu system, int cpu)
		{
		u32 cluster;
		struct hml2_pmu *slice;

		if (cpu < 0)
		cpu = smp_processor_id();

		cluster = cpu >> 1;
		list_for_each_entry(slice, &system->pmus, entry) {
		if (slice->cluster == cluster)
		return slice;
		}

		pr_err("L2 cluster not found for CPU %d\n", cpu);
		return NULL;
		}

		static
		void hml2_pmu__reset_on_slice(void *x)
		{
		/* Reset all ctrs */
		set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
		set_l2_indirect_reg(L2PMCNTENCLR, l2_reset_mask);
		set_l2_indirect_reg(L2PMINTENCLR, l2_reset_mask);
		set_l2_indirect_reg(L2PMOVSCLR, l2_reset_mask);
		}

		static inline
		void hml2_pmu__reset(struct hml2_pmu *slice)
		{
		int cpu;
		int i;

		if ((smp_processor_id() >> 1) == slice->cluster) {
		hml2_pmu__reset_on_slice(NULL);
		return;
		}
		cpu = slice->cluster << 1;
		/* Call each cpu in the cluster until one works */
		for (i = 0; i <= 1; i++) {
		if (!smp_call_function_single(cpu \| i, hml2_pmu__reset_on_slice,
		NULL, 1))
		return;
		}
		pr_err("Failed to reset on cluster %d\n", slice->cluster);
		}

		static inline
		void hml2_pmu__init(struct hml2_pmu *slice)
		{
		hml2_pmu__reset(slice);
		}

		static inline
		void hml2_pmu__enable(void)
		{
		isb();
		set_l2_indirect_reg(L2PMCR, L2PMCR_GLOBAL_ENABLE);
		}

		static inline
		void hml2_pmu__disable(void)
		{
		set_l2_indirect_reg(L2PMCR, L2PMCR_GLOBAL_DISABLE);
		isb();
		}

		static inline
		void hml2_pmu__counter_set_value(u32 idx, u64 value)
		{
		u32 counter_reg;

		if (idx == l2_cycle_ctr_idx) {
		set_l2_indirect_reg(L2PMCCNTR1, (u32)(value >> 32));
		set_l2_indirect_reg(L2PMCCNTR0, (u32)(value & 0xFFFFFFFF));
		} else {
		counter_reg = (idx * 16) + IA_L2PMXEVCNTR_BASE;
		set_l2_indirect_reg(counter_reg, (u32)(value & 0xFFFFFFFF));
		}
		}

		static inline
		u64 hml2_pmu__counter_get_value(u32 idx)
		{
		u64 value;
		u32 counter_reg;
		u32 hi, lo;

		if (idx == l2_cycle_ctr_idx) {
		do {
		hi = get_l2_indirect_reg(L2PMCCNTR1);
		lo = get_l2_indirect_reg(L2PMCCNTR0);
		} while (hi != get_l2_indirect_reg(L2PMCCNTR1));
		value = ((u64)hi << 32) \| lo;
		} else {
		counter_reg = (idx * 16) + IA_L2PMXEVCNTR_BASE;
		value = get_l2_indirect_reg(counter_reg);
		}

		return value;
		}

		static inline
		void hml2_pmu__counter_enable(u32 idx)
		{
		u32 reg;

		reg = get_l2_indirect_reg(L2PMCNTENSET);
		reg \|= idx_to_reg(idx);
		set_l2_indirect_reg(L2PMCNTENSET, reg);
		}

		static inline
		void hml2_pmu__counter_disable(u32 idx)
		{
		set_l2_indirect_reg(L2PMCNTENCLR, idx_to_reg(idx));
		}

		static inline
		void hml2_pmu__counter_enable_interrupt(u32 idx)
		{
		u32 reg;

		reg = get_l2_indirect_reg(L2PMINTENSET);
		reg \|= idx_to_reg(idx);
		set_l2_indirect_reg(L2PMINTENSET, reg);
		}

		static inline
		void hml2_pmu__counter_disable_interrupt(u32 idx)
		{
		set_l2_indirect_reg(L2PMINTENCLR, idx_to_reg(idx));
		}

		static inline
		void hml2_pmu__set_evcntcr(u32 ctr, u32 val)
		{
		u32 evtcr_reg = (ctr * IA_L2_REG_OFFSET) + IA_L2PMXEVCNTCR_BASE;

		set_l2_indirect_reg(evtcr_reg, val);
		}

		static inline
		void hml2_pmu__set_ccntcr(u32 val)
		{
		set_l2_indirect_reg(L2PMCCNTCR, val);
		}

		static inline
		void hml2_pmu__set_evtyper(u32 val, u32 ctr)
		{
		u32 evtype_reg = (ctr * IA_L2_REG_OFFSET) + IA_L2PMXEVTYPER_BASE;

		set_l2_indirect_reg(evtype_reg, val);
		}

		static
		void hml2_pmu__set_evres(struct hml2_pmu *slice,
		u32 event_group, u32 event_reg, u32 event_cc)
		{
		u32 group_reg;
		u32 group_val;
		u32 group_mask;
		u32 resr_val;
		u32 shift;
		unsigned long iflags;

		shift = 8 * (event_group & 3);
		group_val = (event_cc & 0xff) << shift;
		group_mask = ~(0xff << shift);

		if (event_group <= 3)
		group_reg = L2PMRESRL;
		else {
		group_reg = L2PMRESRH;
		group_val \|= L2PMRESRH_EN;
		}

		spin_lock_irqsave(&slice->pmu_lock, iflags);

		resr_val = get_l2_indirect_reg(group_reg);
		resr_val &= group_mask;
		resr_val \|= group_val;
		set_l2_indirect_reg(group_reg, resr_val);

		/* The enable bit has to be set in RESRH, if it's not set already */
		if (group_reg != L2PMRESRH) {
		resr_val = get_l2_indirect_reg(L2PMRESRH);
		if (!(resr_val & L2PMRESRH_EN)) {
		resr_val \|= L2PMRESRH_EN;
		set_l2_indirect_reg(L2PMRESRH, resr_val);
		}
		}
		spin_unlock_irqrestore(&slice->pmu_lock, iflags);
		}

		static void
		hml2_pmu__set_evfilter_task_mode(int ctr)
		{
		u32 filter_reg = (ctr * 16) + IA_L2PMXEVFILTER_BASE;
		u32 l2_orig_filter = L2PMXEVFILTER_SUFILTER_ALL \|
		L2PMXEVFILTER_ORGFILTER_IDINDEP;
		u32 filter_val = l2_orig_filter \| 1 << (smp_processor_id() % 2);

		set_l2_indirect_reg(filter_reg, filter_val);
		}

		static void
		hml2_pmu__set_evfilter_sys_mode(int ctr, int cpu, unsigned int is_tracectr)
		{
		u32 filter_reg = (ctr * IA_L2_REG_OFFSET) + IA_L2PMXEVFILTER_BASE;
		u32 filter_val;
		u32 l2_orig_filter = L2PMXEVFILTER_SUFILTER_ALL \|
		L2PMXEVFILTER_ORGFILTER_IDINDEP;

		if (is_tracectr == 1)
		filter_val = l2_orig_filter \| 1 << (cpu % 2);
		else
		filter_val = l2_orig_filter \| L2PMXEVFILTER_ORGFILTER_ALL;

		set_l2_indirect_reg(filter_reg, filter_val);
		}

		static inline
		void hml2_pmu__reset_ovsr(u32 idx)
		{
		set_l2_indirect_reg(L2PMOVSCLR, idx_to_reg(idx));
		}

		static inline
		u32 hml2_pmu__getreset_ovsr(void)
		{
		u32 result = get_l2_indirect_reg(L2PMOVSSET);

		set_l2_indirect_reg(L2PMOVSCLR, result);
		return result;
		}

		static inline
		int hml2_pmu__has_overflowed(u32 ovsr)
		{
		return (ovsr & l2_reset_mask) != 0;
		}

		static inline
		int hml2_pmu__counter_has_overflowed(u32 ovsr, u32 idx)
		{
		return (ovsr & idx_to_reg(idx)) != 0;
		}

		static
		void l2_cache__event_update_from_slice(struct perf_event *event,
		struct hml2_pmu *slice)
		{
		struct hw_perf_event *hwc = &event->hw;
		u64 delta64, prev, now;
		u32 delta;
		u32 idx = hwc->idx;

		again:
		prev = atomic64_read(&slice->prev_count[idx]);
		now = hml2_pmu__counter_get_value(idx);

		if (atomic64_cmpxchg(&slice->prev_count[idx], prev, now) != prev)
		goto again;

		if (idx == l2_cycle_ctr_idx) {
		/*
		* The cycle counter is 64-bit so needs separate handling
		* of 64-bit delta.
		*/
		delta64 = now - prev;
		local64_add(delta64, &event->count);
		local64_sub(delta64, &hwc->period_left);
		} else {
		/*
		* 32-bit counters need the unsigned 32-bit math to handle
		* overflow and now < prev
		*/
		delta = now - prev;
		local64_add(delta, &event->count);
		local64_sub(delta, &hwc->period_left);
		}
		}

		static
		void l2_cache__slice_set_period(struct hml2_pmu *slice,
		struct hw_perf_event *hwc)
		{
		u64 value = L2_MAX_PERIOD - (hwc->sample_period - 1);
		u32 idx = hwc->idx;
		u64 prev = atomic64_read(&slice->prev_count[idx]);

		if (prev < value) {
		value += prev;
		atomic64_set(&slice->prev_count[idx], value);
		} else {
		value = prev;
		}

		hml2_pmu__reset_ovsr(idx);
		hml2_pmu__counter_set_value(idx, value);
		}

		static
		int l2_cache__event_set_period(struct perf_event *event,
		struct hw_perf_event *hwc)
		{
		struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
		struct hml2_pmu *slice = get_hml2_pmu(system, event->cpu);
		s64 left = local64_read(&hwc->period_left);
		s64 period = hwc->sample_period;
		int ret = 0;
		u32 idx;

		if (unlikely(!slice))
		return ret;

		if (unlikely(left <= -period)) {
		left = period;
		local64_set(&hwc->period_left, left);
		hwc->last_period = period;
		ret = 1;
		}

		if (unlikely(left <= 0)) {
		left += period;
		local64_set(&hwc->period_left, left);
		hwc->last_period = period;
		ret = 1;
		}

		if (left > (s64)L2_MAX_PERIOD)
		left = L2_MAX_PERIOD;

		idx = hwc->idx;

		atomic64_set(&slice->prev_count[idx], (u64)-left);
		hml2_pmu__reset_ovsr(idx);
		hml2_pmu__counter_set_value(idx, (u64)-left);
		perf_event_update_userpage(event);

		return ret;
		}

		static
		int l2_cache__get_event_idx(struct hml2_pmu *slice,
		struct hw_perf_event *hwc)
		{
		int idx;

		if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
		if (test_and_set_bit(l2_cycle_ctr_idx, slice->used_mask))
		return -EAGAIN;

		return l2_cycle_ctr_idx;
		}

		for (idx = 0; idx < l2cache_pmu.num_counters - 1; idx++) {
		if (!test_and_set_bit(idx, slice->used_mask))
		return idx;
		}

		/* The counters are all in use. */
		return -EAGAIN;
		}

		static
		void l2_cache__event_disable(struct perf_event *event)
		{
		struct hw_perf_event *hwc = &event->hw;

		if (!(hwc->state & PERF_HES_STOPPED)) {
		hml2_pmu__counter_disable_interrupt(hwc->idx);
		hml2_pmu__counter_disable(hwc->idx);
		}
		}

		static inline
		int is_sampling(struct perf_event *event)
		{
		return event->attr.sample_type != 0;
		}

		static
		irqreturn_t l2_cache__handle_irq(int irq_num, void *data)
		{
		struct hml2_pmu *slice = data;
		u32 ovsr;
		int idx;
		struct pt_regs *regs;

		ovsr = hml2_pmu__getreset_ovsr();
		if (!hml2_pmu__has_overflowed(ovsr))
		return IRQ_NONE;

		regs = get_irq_regs();

		for (idx = 0; idx < l2cache_pmu.num_counters; idx++) {
		struct perf_event *event = slice->events[idx];
		struct hw_perf_event *hwc;
		struct perf_sample_data data;

		if (!event)
		continue;

		if (!hml2_pmu__counter_has_overflowed(ovsr, idx))
		continue;

		l2_cache__event_update_from_slice(event, slice);
		hwc = &event->hw;

		if (is_sampling(event)) {
		perf_sample_data_init(&data, 0, hwc->last_period);
		if (!l2_cache__event_set_period(event, hwc))
		continue;
		if (perf_event_overflow(event, &data, regs))
		l2_cache__event_disable(event);
		} else {
		l2_cache__slice_set_period(slice, hwc);
		}
		}

		/*
		* Handle the pending perf events.
		*
		* Note: this call must be run with interrupts disabled. For
		* platforms that can have the PMU interrupts raised as an NMI, this
		* will not work.
		*/
		irq_work_run();

		return IRQ_HANDLED;
		}

		/*
		* Implementation of abstract pmu functionality required by
		* the core perf events code.
		*/

		static
		void l2_cache__pmu_enable(struct pmu *pmu)
		{
		/* Ensure all programming commands are done before proceeding */
		wmb();
		hml2_pmu__enable();
		}

		static
		void l2_cache__pmu_disable(struct pmu *pmu)
		{
		hml2_pmu__disable();
		/* Ensure the basic counter unit is stopped before proceeding */
		wmb();
		}

		static
		int l2_cache__event_init(struct perf_event *event)
		{
		struct hw_perf_event *hwc = &event->hw;

		if (event->attr.type != l2cache_pmu.pmu.type)
		return -ENOENT;

		/* We cannot filter accurately so we just don't allow it. */
		if (event->attr.exclude_user \|\| event->attr.exclude_kernel \|\|
		event->attr.exclude_hv \|\| event->attr.exclude_idle)
		return -EINVAL;

		hwc->idx = -1;
		hwc->config_base = event->attr.config;

		/*
		* For counting events use L2_CNT_PERIOD which allows for simplified
		* math and proper handling of overflows in the presence of IRQs and
		* SMP.
		*/
		if (hwc->sample_period == 0) {
		hwc->sample_period = L2_CNT_PERIOD;
		hwc->last_period = hwc->sample_period;
		local64_set(&hwc->period_left, hwc->sample_period);
		}

		return 0;
		}

		static
		void l2_cache__event_update(struct perf_event *event)
		{
		struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
		struct hml2_pmu *slice;
		struct hw_perf_event *hwc = &event->hw;

		if (hwc->idx == -1)
		return;

		slice = get_hml2_pmu(system, event->cpu);
		if (unlikely(!slice))
		return;
		l2_cache__event_update_from_slice(event, slice);
		}

		static
		void l2_cache__event_start(struct perf_event *event, int flags)
		{
		struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
		struct hml2_pmu *slice;
		struct hw_perf_event *hwc = &event->hw;
		int idx = hwc->idx;
		u32 config;
		u32 evt_prefix, event_reg, event_cc, event_group;
		int is_tracectr = 0;

		if (idx < 0)
		return;

		hwc->state = 0;

		slice = get_hml2_pmu(system, event->cpu);
		if (unlikely(!slice))
		return;
		if (is_sampling(event))
		l2_cache__event_set_period(event, hwc);
		else
		l2_cache__slice_set_period(slice, hwc);

		if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
		hml2_pmu__set_ccntcr(0x0);
		goto out;
		}

		config = hwc->config_base;
		evt_prefix = (config & EVENT_PREFIX_MASK) >> EVENT_PREFIX_SHIFT;
		event_reg = (config & EVENT_REG_MASK) >> EVENT_REG_SHIFT;
		event_cc = (config & EVENT_CC_MASK) >> EVENT_CC_SHIFT;
		event_group = (config & EVENT_GROUP_MASK);

		/* Check if user requested any special origin filtering. */
		if (evt_prefix == L2_TRACECTR_PREFIX)
		is_tracectr = 1;

		hml2_pmu__set_evcntcr(idx, 0x0);
		hml2_pmu__set_evtyper(event_group, idx);
		hml2_pmu__set_evres(slice, event_group, event_reg, event_cc);
		if (event->cpu < 0)
		hml2_pmu__set_evfilter_task_mode(idx);
		else
		hml2_pmu__set_evfilter_sys_mode(idx, event->cpu, is_tracectr);
		out:
		hml2_pmu__counter_enable_interrupt(idx);
		hml2_pmu__counter_enable(idx);
		}

		static
		void l2_cache__event_stop(struct perf_event *event, int flags)
		{
		struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
		struct hml2_pmu *slice;
		struct hw_perf_event *hwc = &event->hw;
		int idx = hwc->idx;

		if (idx < 0)
		return;

		if (!(hwc->state & PERF_HES_STOPPED)) {
		slice = get_hml2_pmu(system, event->cpu);
		if (unlikely(!slice))
		return;
		hml2_pmu__counter_disable_interrupt(idx);
		hml2_pmu__counter_disable(idx);

		if (flags & PERF_EF_UPDATE)
		l2_cache__event_update(event);
		hwc->state \|= PERF_HES_STOPPED \| PERF_HES_UPTODATE;
		}
		}

		/* Look for a duplicate event already configured on this cluster */
		static
		int config_is_dup(struct hml2_pmu slice, struct hw_perf_event hwc)
		{
		int i;
		struct hw_perf_event *hwc_i;

		for (i = 0; i < MAX_L2_CTRS; i++) {
		if (slice->events[i] == NULL)
		continue;
		hwc_i = &slice->events[i]->hw;
		if (hwc->config_base == hwc_i->config_base)
		return 1;
		}
		return 0;
		}

		/* Look for event with same R, G values already configured on this cluster */
		static
		int event_violates_column_exclusion(struct hml2_pmu *slice,
		struct hw_perf_event *hwc)
		{
		int i;
		struct hw_perf_event *hwc_i;
		u32 r_g_mask = EVENT_REG_MASK \| EVENT_GROUP_MASK;
		u32 r_g_value = hwc->config_base & r_g_mask;

		for (i = 0; i < MAX_L2_CTRS; i++) {
		if (slice->events[i] == NULL)
		continue;
		hwc_i = &slice->events[i]->hw;
		/*
		* Identical event is not column exclusion - such as
		* sampling event on all CPUs
		*/
		if (hwc->config_base == hwc_i->config_base)
		continue;
		if (r_g_value == (hwc_i->config_base & r_g_mask)) {
		pr_err("column exclusion violation, events %lx, %lx\n",
		hwc_i->config_base & L2_EVT_MASK,
		hwc->config_base & L2_EVT_MASK);
		return 1;
		}
		}
		return 0;
		}

		static
		int l2_cache__event_add(struct perf_event *event, int flags)
		{
		struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
		struct hw_perf_event *hwc = &event->hw;
		int idx;
		int err = 0;
		struct hml2_pmu *slice;

		/*
		* We need to disable the pmu while adding the event, otherwise
		* the perf tick might kick-in and re-add this event.
		*/
		perf_pmu_disable(event->pmu);

		slice = get_hml2_pmu(system, event->cpu);
		if (!slice) {
		event->state = PERF_EVENT_STATE_OFF;
		hwc->idx = -1;
		goto out;
		}

		/*
		* This checks for a duplicate event on the same cluster, which
		* typically occurs in non-sampling mode when using perf -a,
		* which generates events on each CPU. In this case, we don't
		* want to permanently disable the event by setting its state to
		* OFF, because if the other CPU is subsequently hotplugged, etc,
		* we want the opportunity to start collecting on this event.
		*/
		if (config_is_dup(slice, hwc)) {
		hwc->idx = -1;
		goto out;
		}

		if (event_violates_column_exclusion(slice, hwc)) {
		event->state = PERF_EVENT_STATE_OFF;
		hwc->idx = -1;
		goto out;
		}

		idx = l2_cache__get_event_idx(slice, hwc);
		if (idx < 0) {
		err = idx;
		goto out;
		}

		hwc->idx = idx;
		hwc->state = PERF_HES_STOPPED \| PERF_HES_UPTODATE;
		slice->events[idx] = event;
		atomic64_set(&slice->prev_count[idx], 0ULL);

		if (flags & PERF_EF_START)
		l2_cache__event_start(event, flags);

		/* Propagate changes to the userspace mapping. */
		perf_event_update_userpage(event);

		out:
		perf_pmu_enable(event->pmu);
		return err;
		}

		static
		void l2_cache__event_del(struct perf_event *event, int flags)
		{
		struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
		struct hw_perf_event *hwc = &event->hw;
		struct hml2_pmu *slice;
		int idx = hwc->idx;

		if (idx < 0)
		return;

		slice = get_hml2_pmu(system, event->cpu);
		if (unlikely(!slice))
		return;
		l2_cache__event_stop(event, flags \| PERF_EF_UPDATE);
		slice->events[idx] = NULL;
		clear_bit(idx, slice->used_mask);

		perf_event_update_userpage(event);
		}

		static
		void l2_cache__event_read(struct perf_event *event)
		{
		l2_cache__event_update(event);
		}

		static
		int dummy_event_idx(struct perf_event *event)
		{
		return 0;
		}

		/* NRCCG format for perf RAW codes. */
		PMU_FORMAT_ATTR(l2_prefix, "config:16-19");
		PMU_FORMAT_ATTR(l2_reg, "config:12-15");
		PMU_FORMAT_ATTR(l2_code, "config:4-11");
		PMU_FORMAT_ATTR(l2_grp, "config:0-3");
		static struct attribute *l2_cache_pmu_formats[] = {
		&format_attr_l2_prefix.attr,
		&format_attr_l2_reg.attr,
		&format_attr_l2_code.attr,
		&format_attr_l2_grp.attr,
		NULL,
		};

		static struct attribute_group l2_cache_pmu_format_group = {
		.name = "format",
		.attrs = l2_cache_pmu_formats,
		};

		static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
		&l2_cache_pmu_format_group,
		NULL,
		};

		/*
		* Generic device handlers
		*/

		static struct of_device_id l2_cache_pmu_of_match[] = {
		{ .compatible = "qcom,qcom-l2cache-pmu", },
		{}
		};
		MODULE_DEVICE_TABLE(of, l2_cache_pmu_of_match);

		static int get_num_counters(void)
		{
		int val;

		val = get_l2_indirect_reg(L2PMCR);

		/*
		* Read bits 15:11 of the L2PMCR and add 1
		* for the cycle counter.
		*/
		return ((val >> PMCR_NUM_EV_SHIFT) & PMCR_NUM_EV_MASK) + 1;
		}
		static int l2_cache_pmu_probe(struct platform_device *pdev)
		{
		int result, irq, err;
		struct device_node *of_node;
		struct hml2_pmu *slice;
		u32 res_idx;
		u32 affinity_cpu;
		const u32 *affinity_arr;
		int len;
		struct cpumask affinity_mask;

		INIT_LIST_HEAD(&l2cache_pmu.pmus);

		l2cache_pmu.pmu = (struct pmu) {
		.task_ctx_nr = perf_hw_context,

		.name = "l2cache",
		.pmu_enable = l2_cache__pmu_enable,
		.pmu_disable = l2_cache__pmu_disable,
		.event_init = l2_cache__event_init,
		.add = l2_cache__event_add,
		.del = l2_cache__event_del,
		.start = l2_cache__event_start,
		.stop = l2_cache__event_stop,
		.read = l2_cache__event_read,
		.event_idx = dummy_event_idx,
		.attr_groups = l2_cache_pmu_attr_grps,
		.events_across_hotplug = 1,
		};

		l2cache_pmu.num_counters = get_num_counters();
		l2_cycle_ctr_idx = l2cache_pmu.num_counters - 1;
		l2_reset_mask = ((1 << (l2cache_pmu.num_counters - 1)) - 1) \|
		L2PM_CC_ENABLE;

		of_node = pdev->dev.of_node;
		affinity_arr = of_get_property(of_node, "qcom,cpu-affinity", &len);
		if ((len <= 0) \|\| (!affinity_arr)) {
		dev_err(&pdev->dev,
		"Error reading qcom,cpu-affinity property (%d)\n", len);
		return -ENODEV;
		}
		len = len / sizeof(u32);

		/* Read slice info and initialize each slice */
		for (res_idx = 0; res_idx < len; res_idx++) {
		slice = devm_kzalloc(&pdev->dev, sizeof(*slice), GFP_KERNEL);
		if (!slice)
		return -ENOMEM;

		irq = platform_get_irq(pdev, res_idx);
		if (irq <= 0) {
		dev_err(&pdev->dev,
		"Failed to get valid irq for slice %d\n",
		res_idx);
		return -ENODEV;
		}

		affinity_cpu = be32_to_cpup(&affinity_arr[res_idx]);
		cpumask_clear(&affinity_mask);
		cpumask_set_cpu(affinity_cpu, &affinity_mask);
		cpumask_set_cpu(affinity_cpu + 1, &affinity_mask);

		if (irq_set_affinity(irq, &affinity_mask)) {
		dev_err(&pdev->dev,
		"Unable to set irq affinity (irq=%d, cpu=%d)\n",
		irq, affinity_arr[res_idx]);
		return -ENODEV;
		}

		err = devm_request_irq(
		&pdev->dev, irq, l2_cache__handle_irq,
		IRQF_NOBALANCING, "l2-cache-pmu", slice);
		if (err) {
		dev_err(&pdev->dev,
		"Unable to request IRQ%d for L2 PMU counters\n",
		irq);
		return err;
		}

		slice->cluster = affinity_cpu >> 1;
		slice->pmu_lock = __SPIN_LOCK_UNLOCKED(slice->pmu_lock);

		hml2_pmu__init(slice);
		list_add(&slice->entry, &l2cache_pmu.pmus);
		l2cache_pmu.num_pmus++;
		}

		if (l2cache_pmu.num_pmus == 0) {
		dev_err(&pdev->dev, "No hardware L2 PMUs found\n");
		return -ENODEV;
		}

		result = perf_pmu_register(&l2cache_pmu.pmu,
		l2cache_pmu.pmu.name, -1);

		if (result < 0)
		dev_err(&pdev->dev,
		"Failed to register L2 cache PMU (%d)\n",
		result);
		else
		dev_info(&pdev->dev,
		"Registered L2 cache PMU using %d HW PMUs\n",
		l2cache_pmu.num_pmus);

		return result;
		}

		static int l2_cache_pmu_remove(struct platform_device *pdev)
		{
		perf_pmu_unregister(&l2cache_pmu.pmu);
		return 0;
		}

		static struct platform_driver l2_cache_pmu_driver = {
		.driver = {
		.name = "l2cache-pmu",
		.owner = THIS_MODULE,
		.of_match_table = l2_cache_pmu_of_match,
		},
		.probe = l2_cache_pmu_probe,
		.remove = l2_cache_pmu_remove,
		};

		static int __init register_l2_cache_pmu_driver(void)
		{
		return platform_driver_register(&l2_cache_pmu_driver);
		}
		device_initcall(register_l2_cache_pmu_driver);