Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9993f9f9 authored by Neil Leeder's avatar Neil Leeder
Browse files

soc: qcom: add l2 cache perf events driver



Adds perf events support for L2 cache PMU.

The L2 cache PMU driver is named 'l2cache' and can be used
with perf events to profile L2 events such as cache hits
and misses.

Change-Id: Ic17087dfb48781fec8daf0063e5d556d377dbc7e
Signed-off-by: default avatarNeil Leeder <nleeder@codeaurora.org>
parent 0b8b6a94
Loading
Loading
Loading
Loading
+20 −0
Original line number Diff line number Diff line
L2 cache performance monitor unit

L2 cache controllers have a performance monitor unit to measure
events such as cache hits and misses. There is one L2 cache PMU
for each cluster of CPUs.

Required properties:

- compatible : should be "qcom,qcom-l2cache-pmu"
- interrupts : 1 interrupt for each cluster.
- qcom,cpu-affinity: specifies the id of the first CPU in the cluster.

Example:

	l2cache-pmu {
		    compatible = "qcom,qcom-l2cache-pmu";
		    interrupts = <0 0 1>, <0 8 1>;
		    qcom,cpu-affinity = <0>, <2>
	};
+1 −1
Original line number Diff line number Diff line
@@ -50,7 +50,7 @@ obj-$(CONFIG_MSM_RPM_RBCPR_STATS_V2_LOG) += rpm_rbcpr_stats_v2.o
obj-$(CONFIG_MEM_SHARE_QMI_SERVICE)		+= memshare/
obj-$(CONFIG_CP_ACCESS64) += cpaccess64.o
obj-$(CONFIG_MSM_RPM_STATS_LOG) += rpm_stats.o rpm_master_stat.o rpm_rail_stats.o
obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_kryo.o
obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_kryo.o perf_event_l2.o
obj-$(CONFIG_MSM_RPM_LOG) += rpm_log.o
obj-$(CONFIG_MSM_JTAG) += jtag-fuse.o jtag.o
obj-$(CONFIG_MSM_JTAG_MM) +=  jtag-fuse.o jtag-mm.o
+964 −0
Original line number Diff line number Diff line
/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */
#define pr_fmt(fmt) "l2 perfevents: " fmt

#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/irq.h>
#include <linux/list.h>
#include <linux/of.h>
#include <linux/acpi.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <soc/qcom/perf_event_l2.h>
#include <soc/qcom/kryo-l2-accessors.h>

/*
 * The cache is made-up of one or more slices, each slice has its own PMU.
 * This structure represents one of the hardware PMUs.
 */
struct hml2_pmu {
	struct list_head entry;
	u32 cluster;
	struct perf_event *events[MAX_L2_CTRS];
	unsigned long used_mask[BITS_TO_LONGS(MAX_L2_CTRS)];
	atomic64_t prev_count[MAX_L2_CTRS];
	spinlock_t pmu_lock;
};

/*
 * Aggregate PMU. Implements the core pmu functions and manages
 * the hardware PMUs.
 */

struct l2cache_pmu {
	u32 num_pmus;
	struct list_head pmus;
	struct pmu pmu;
	int num_counters;
};

#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))

static struct l2cache_pmu l2cache_pmu = { 0 };

static u32 l2_cycle_ctr_idx;
static u32 l2_reset_mask;

static inline u32 idx_to_reg(u32 idx)
{
	u32 bit;

	if (idx == l2_cycle_ctr_idx)
		bit = 1 << L2CYCLE_CTR_BIT;
	else
		bit = 1 << idx;
	return bit;
}

static struct hml2_pmu *get_hml2_pmu(struct l2cache_pmu *system, int cpu)
{
	u32 cluster;
	struct hml2_pmu *slice;

	if (cpu < 0)
		cpu = smp_processor_id();

	cluster = cpu >> 1;
	list_for_each_entry(slice, &system->pmus, entry) {
		if (slice->cluster == cluster)
			return slice;
	}

	pr_err("L2 cluster not found for CPU %d\n", cpu);
	return NULL;
}

static
void hml2_pmu__reset_on_slice(void *x)
{
	/* Reset all ctrs */
	set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
	set_l2_indirect_reg(L2PMCNTENCLR, l2_reset_mask);
	set_l2_indirect_reg(L2PMINTENCLR, l2_reset_mask);
	set_l2_indirect_reg(L2PMOVSCLR, l2_reset_mask);
}

static inline
void hml2_pmu__reset(struct hml2_pmu *slice)
{
	int cpu;
	int i;

	if ((smp_processor_id() >> 1) == slice->cluster) {
		hml2_pmu__reset_on_slice(NULL);
		return;
	}
	cpu = slice->cluster << 1;
	/* Call each cpu in the cluster until one works */
	for (i = 0; i <= 1; i++) {
		if (!smp_call_function_single(cpu | i, hml2_pmu__reset_on_slice,
					      NULL, 1))
			return;
	}
	pr_err("Failed to reset on cluster %d\n", slice->cluster);
}

static inline
void hml2_pmu__init(struct hml2_pmu *slice)
{
	hml2_pmu__reset(slice);
}

static inline
void hml2_pmu__enable(void)
{
	isb();
	set_l2_indirect_reg(L2PMCR, L2PMCR_GLOBAL_ENABLE);
}

static inline
void hml2_pmu__disable(void)
{
	set_l2_indirect_reg(L2PMCR, L2PMCR_GLOBAL_DISABLE);
	isb();
}

static inline
void hml2_pmu__counter_set_value(u32 idx, u64 value)
{
	u32 counter_reg;

	if (idx == l2_cycle_ctr_idx) {
		set_l2_indirect_reg(L2PMCCNTR1, (u32)(value >> 32));
		set_l2_indirect_reg(L2PMCCNTR0, (u32)(value & 0xFFFFFFFF));
	} else {
		counter_reg = (idx * 16) + IA_L2PMXEVCNTR_BASE;
		set_l2_indirect_reg(counter_reg, (u32)(value & 0xFFFFFFFF));
	}
}

static inline
u64 hml2_pmu__counter_get_value(u32 idx)
{
	u64 value;
	u32 counter_reg;
	u32 hi, lo;

	if (idx == l2_cycle_ctr_idx) {
		do {
			hi = get_l2_indirect_reg(L2PMCCNTR1);
			lo = get_l2_indirect_reg(L2PMCCNTR0);
		} while (hi != get_l2_indirect_reg(L2PMCCNTR1));
		value = ((u64)hi << 32) | lo;
	} else {
		counter_reg = (idx * 16) + IA_L2PMXEVCNTR_BASE;
		value = get_l2_indirect_reg(counter_reg);
	}

	return value;
}

static inline
void hml2_pmu__counter_enable(u32 idx)
{
	u32 reg;

	reg = get_l2_indirect_reg(L2PMCNTENSET);
	reg |= idx_to_reg(idx);
	set_l2_indirect_reg(L2PMCNTENSET, reg);
}

static inline
void hml2_pmu__counter_disable(u32 idx)
{
	set_l2_indirect_reg(L2PMCNTENCLR, idx_to_reg(idx));
}

static inline
void hml2_pmu__counter_enable_interrupt(u32 idx)
{
	u32 reg;

	reg = get_l2_indirect_reg(L2PMINTENSET);
	reg |= idx_to_reg(idx);
	set_l2_indirect_reg(L2PMINTENSET, reg);
}

static inline
void hml2_pmu__counter_disable_interrupt(u32 idx)
{
	set_l2_indirect_reg(L2PMINTENCLR, idx_to_reg(idx));
}

static inline
void hml2_pmu__set_evcntcr(u32 ctr, u32 val)
{
	u32 evtcr_reg = (ctr * IA_L2_REG_OFFSET) + IA_L2PMXEVCNTCR_BASE;

	set_l2_indirect_reg(evtcr_reg, val);
}

static inline
void hml2_pmu__set_ccntcr(u32 val)
{
	set_l2_indirect_reg(L2PMCCNTCR, val);
}

static inline
void hml2_pmu__set_evtyper(u32 val, u32 ctr)
{
	u32 evtype_reg = (ctr * IA_L2_REG_OFFSET) + IA_L2PMXEVTYPER_BASE;

	set_l2_indirect_reg(evtype_reg, val);
}

static
void hml2_pmu__set_evres(struct hml2_pmu *slice,
			 u32 event_group, u32 event_reg, u32 event_cc)
{
	u32 group_reg;
	u32 group_val;
	u32 group_mask;
	u32 resr_val;
	u32 shift;
	unsigned long iflags;

	shift = 8 * (event_group & 3);
	group_val = (event_cc & 0xff) << shift;
	group_mask = ~(0xff << shift);

	if (event_group <= 3)
		group_reg = L2PMRESRL;
	else {
		group_reg = L2PMRESRH;
		group_val |= L2PMRESRH_EN;
	}

	spin_lock_irqsave(&slice->pmu_lock, iflags);

	resr_val = get_l2_indirect_reg(group_reg);
	resr_val &= group_mask;
	resr_val |= group_val;
	set_l2_indirect_reg(group_reg, resr_val);

	/* The enable bit has to be set in RESRH, if it's not set already */
	if (group_reg != L2PMRESRH) {
		resr_val = get_l2_indirect_reg(L2PMRESRH);
		if (!(resr_val & L2PMRESRH_EN)) {
			resr_val |= L2PMRESRH_EN;
			set_l2_indirect_reg(L2PMRESRH, resr_val);
		}
	}
	spin_unlock_irqrestore(&slice->pmu_lock, iflags);
}

static void
hml2_pmu__set_evfilter_task_mode(int ctr)
{
	u32 filter_reg = (ctr * 16) + IA_L2PMXEVFILTER_BASE;
	u32 l2_orig_filter = L2PMXEVFILTER_SUFILTER_ALL |
			     L2PMXEVFILTER_ORGFILTER_IDINDEP;
	u32 filter_val = l2_orig_filter | 1 << (smp_processor_id() % 2);

	set_l2_indirect_reg(filter_reg, filter_val);
}

static void
hml2_pmu__set_evfilter_sys_mode(int ctr, int cpu, unsigned int is_tracectr)
{
	u32 filter_reg = (ctr * IA_L2_REG_OFFSET) + IA_L2PMXEVFILTER_BASE;
	u32 filter_val;
	u32 l2_orig_filter = L2PMXEVFILTER_SUFILTER_ALL |
			     L2PMXEVFILTER_ORGFILTER_IDINDEP;

	if (is_tracectr == 1)
		filter_val = l2_orig_filter | 1 << (cpu % 2);
	else
		filter_val = l2_orig_filter | L2PMXEVFILTER_ORGFILTER_ALL;

	set_l2_indirect_reg(filter_reg, filter_val);
}

static inline
void hml2_pmu__reset_ovsr(u32 idx)
{
	set_l2_indirect_reg(L2PMOVSCLR, idx_to_reg(idx));
}

static inline
u32 hml2_pmu__getreset_ovsr(void)
{
	u32 result = get_l2_indirect_reg(L2PMOVSSET);

	set_l2_indirect_reg(L2PMOVSCLR, result);
	return result;
}

static inline
int hml2_pmu__has_overflowed(u32 ovsr)
{
	return (ovsr & l2_reset_mask) != 0;
}

static inline
int hml2_pmu__counter_has_overflowed(u32 ovsr, u32 idx)
{
	return (ovsr & idx_to_reg(idx)) != 0;
}

static
void l2_cache__event_update_from_slice(struct perf_event *event,
				       struct hml2_pmu *slice)
{
	struct hw_perf_event *hwc = &event->hw;
	u64 delta64, prev, now;
	u32 delta;
	u32 idx = hwc->idx;

again:
	prev = atomic64_read(&slice->prev_count[idx]);
	now = hml2_pmu__counter_get_value(idx);

	if (atomic64_cmpxchg(&slice->prev_count[idx], prev, now) != prev)
		goto again;

	if (idx == l2_cycle_ctr_idx) {
		/*
		 * The cycle counter is 64-bit so needs separate handling
		 * of 64-bit delta.
		 */
		delta64 = now - prev;
		local64_add(delta64, &event->count);
		local64_sub(delta64, &hwc->period_left);
	} else {
		/*
		 * 32-bit counters need the unsigned 32-bit math to handle
		 * overflow and now < prev
		 */
		delta = now - prev;
		local64_add(delta, &event->count);
		local64_sub(delta, &hwc->period_left);
	}
}

static
void l2_cache__slice_set_period(struct hml2_pmu *slice,
				struct hw_perf_event *hwc)
{
	u64 value = L2_MAX_PERIOD - (hwc->sample_period - 1);
	u32 idx = hwc->idx;
	u64 prev = atomic64_read(&slice->prev_count[idx]);

	if (prev < value) {
		value += prev;
		atomic64_set(&slice->prev_count[idx], value);
	} else {
		value = prev;
	}

	hml2_pmu__reset_ovsr(idx);
	hml2_pmu__counter_set_value(idx, value);
}

static
int l2_cache__event_set_period(struct perf_event *event,
			       struct hw_perf_event *hwc)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hml2_pmu *slice = get_hml2_pmu(system, event->cpu);
	s64 left = local64_read(&hwc->period_left);
	s64 period = hwc->sample_period;
	int ret = 0;
	u32 idx;

	if (unlikely(!slice))
		return ret;

	if (unlikely(left <= -period)) {
		left = period;
		local64_set(&hwc->period_left, left);
		hwc->last_period = period;
		ret = 1;
	}

	if (unlikely(left <= 0)) {
		left += period;
		local64_set(&hwc->period_left, left);
		hwc->last_period = period;
		ret = 1;
	}

	if (left > (s64)L2_MAX_PERIOD)
		left = L2_MAX_PERIOD;

	idx = hwc->idx;

	atomic64_set(&slice->prev_count[idx], (u64)-left);
	hml2_pmu__reset_ovsr(idx);
	hml2_pmu__counter_set_value(idx, (u64)-left);
	perf_event_update_userpage(event);

	return ret;
}

static
int l2_cache__get_event_idx(struct hml2_pmu *slice,
			    struct hw_perf_event *hwc)
{
	int idx;

	if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
		if (test_and_set_bit(l2_cycle_ctr_idx, slice->used_mask))
			return -EAGAIN;

		return l2_cycle_ctr_idx;
	}

	for (idx = 0; idx < l2cache_pmu.num_counters - 1; idx++) {
		if (!test_and_set_bit(idx, slice->used_mask))
			return idx;
	}

	/* The counters are all in use. */
	return -EAGAIN;
}

static
void l2_cache__event_disable(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;

	if (!(hwc->state & PERF_HES_STOPPED)) {
		hml2_pmu__counter_disable_interrupt(hwc->idx);
		hml2_pmu__counter_disable(hwc->idx);
	}
}

static inline
int is_sampling(struct perf_event *event)
{
	return event->attr.sample_type != 0;
}

static
irqreturn_t l2_cache__handle_irq(int irq_num, void *data)
{
	struct hml2_pmu *slice = data;
	u32 ovsr;
	int idx;
	struct pt_regs *regs;

	ovsr = hml2_pmu__getreset_ovsr();
	if (!hml2_pmu__has_overflowed(ovsr))
		return IRQ_NONE;

	regs = get_irq_regs();

	for (idx = 0; idx < l2cache_pmu.num_counters; idx++) {
		struct perf_event *event = slice->events[idx];
		struct hw_perf_event *hwc;
		struct perf_sample_data data;

		if (!event)
			continue;

		if (!hml2_pmu__counter_has_overflowed(ovsr, idx))
			continue;

		l2_cache__event_update_from_slice(event, slice);
		hwc = &event->hw;

		if (is_sampling(event)) {
			perf_sample_data_init(&data, 0, hwc->last_period);
			if (!l2_cache__event_set_period(event, hwc))
				continue;
			if (perf_event_overflow(event, &data, regs))
				l2_cache__event_disable(event);
		} else {
			l2_cache__slice_set_period(slice, hwc);
		}
	}

	/*
	 * Handle the pending perf events.
	 *
	 * Note: this call *must* be run with interrupts disabled. For
	 * platforms that can have the PMU interrupts raised as an NMI, this
	 * will not work.
	 */
	irq_work_run();

	return IRQ_HANDLED;
}

/*
 * Implementation of abstract pmu functionality required by
 * the core perf events code.
 */

static
void l2_cache__pmu_enable(struct pmu *pmu)
{
	/* Ensure all programming commands are done before proceeding */
	wmb();
	hml2_pmu__enable();
}

static
void l2_cache__pmu_disable(struct pmu *pmu)
{
	hml2_pmu__disable();
	/* Ensure the basic counter unit is stopped before proceeding */
	wmb();
}

static
int l2_cache__event_init(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;

	if (event->attr.type != l2cache_pmu.pmu.type)
		return -ENOENT;

	/* We cannot filter accurately so we just don't allow it. */
	if (event->attr.exclude_user || event->attr.exclude_kernel ||
			event->attr.exclude_hv || event->attr.exclude_idle)
		return -EINVAL;

	hwc->idx = -1;
	hwc->config_base = event->attr.config;

	/*
	 * For counting events use L2_CNT_PERIOD which allows for simplified
	 * math and proper handling of overflows in the presence of IRQs and
	 * SMP.
	 */
	if (hwc->sample_period == 0) {
		hwc->sample_period = L2_CNT_PERIOD;
		hwc->last_period   = hwc->sample_period;
		local64_set(&hwc->period_left, hwc->sample_period);
	}

	return 0;
}

static
void l2_cache__event_update(struct perf_event *event)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hml2_pmu *slice;
	struct hw_perf_event *hwc = &event->hw;

	if (hwc->idx == -1)
		return;

	slice = get_hml2_pmu(system, event->cpu);
	if (unlikely(!slice))
		return;
	l2_cache__event_update_from_slice(event, slice);
}

static
void l2_cache__event_start(struct perf_event *event, int flags)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hml2_pmu *slice;
	struct hw_perf_event *hwc = &event->hw;
	int idx = hwc->idx;
	u32 config;
	u32 evt_prefix, event_reg, event_cc, event_group;
	int is_tracectr = 0;

	if (idx < 0)
		return;

	hwc->state = 0;

	slice = get_hml2_pmu(system, event->cpu);
	if (unlikely(!slice))
		return;
	if (is_sampling(event))
		l2_cache__event_set_period(event, hwc);
	else
		l2_cache__slice_set_period(slice, hwc);

	if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
		hml2_pmu__set_ccntcr(0x0);
		goto out;
	}

	config = hwc->config_base;
	evt_prefix  = (config & EVENT_PREFIX_MASK) >> EVENT_PREFIX_SHIFT;
	event_reg   = (config & EVENT_REG_MASK)    >> EVENT_REG_SHIFT;
	event_cc    = (config & EVENT_CC_MASK)     >> EVENT_CC_SHIFT;
	event_group = (config & EVENT_GROUP_MASK);

	/* Check if user requested any special origin filtering. */
	if (evt_prefix == L2_TRACECTR_PREFIX)
		is_tracectr = 1;

	hml2_pmu__set_evcntcr(idx, 0x0);
	hml2_pmu__set_evtyper(event_group, idx);
	hml2_pmu__set_evres(slice, event_group, event_reg, event_cc);
	if (event->cpu < 0)
		hml2_pmu__set_evfilter_task_mode(idx);
	else
		hml2_pmu__set_evfilter_sys_mode(idx, event->cpu, is_tracectr);
out:
	hml2_pmu__counter_enable_interrupt(idx);
	hml2_pmu__counter_enable(idx);
}

static
void l2_cache__event_stop(struct perf_event *event, int flags)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hml2_pmu *slice;
	struct hw_perf_event *hwc = &event->hw;
	int idx = hwc->idx;

	if (idx < 0)
		return;

	if (!(hwc->state & PERF_HES_STOPPED)) {
		slice = get_hml2_pmu(system, event->cpu);
		if (unlikely(!slice))
			return;
		hml2_pmu__counter_disable_interrupt(idx);
		hml2_pmu__counter_disable(idx);

		if (flags & PERF_EF_UPDATE)
			l2_cache__event_update(event);
		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
	}
}

/* Look for a duplicate event already configured on this cluster */
static
int config_is_dup(struct hml2_pmu *slice, struct hw_perf_event *hwc)
{
	int i;
	struct hw_perf_event *hwc_i;

	for (i = 0; i < MAX_L2_CTRS; i++) {
		if (slice->events[i] == NULL)
			continue;
		hwc_i = &slice->events[i]->hw;
		if (hwc->config_base == hwc_i->config_base)
			return 1;
	}
	return 0;
}

/* Look for event with same R, G values already configured on this cluster */
static
int event_violates_column_exclusion(struct hml2_pmu *slice,
				    struct hw_perf_event *hwc)
{
	int i;
	struct hw_perf_event *hwc_i;
	u32 r_g_mask = EVENT_REG_MASK | EVENT_GROUP_MASK;
	u32 r_g_value = hwc->config_base & r_g_mask;

	for (i = 0; i < MAX_L2_CTRS; i++) {
		if (slice->events[i] == NULL)
			continue;
		hwc_i = &slice->events[i]->hw;
		/*
		 * Identical event is not column exclusion - such as
		 * sampling event on all CPUs
		 */
		if (hwc->config_base == hwc_i->config_base)
			continue;
		if (r_g_value == (hwc_i->config_base & r_g_mask)) {
			pr_err("column exclusion violation, events %lx, %lx\n",
			       hwc_i->config_base & L2_EVT_MASK,
			       hwc->config_base & L2_EVT_MASK);
			return 1;
		}
	}
	return 0;
}

static
int l2_cache__event_add(struct perf_event *event, int flags)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hw_perf_event *hwc = &event->hw;
	int idx;
	int err = 0;
	struct hml2_pmu *slice;

	/*
	 * We need to disable the pmu while adding the event, otherwise
	 * the perf tick might kick-in and re-add this event.
	 */
	perf_pmu_disable(event->pmu);

	slice = get_hml2_pmu(system, event->cpu);
	if (!slice) {
		event->state = PERF_EVENT_STATE_OFF;
		hwc->idx = -1;
		goto out;
	}

	/*
	 * This checks for a duplicate event on the same cluster, which
	 * typically occurs in non-sampling mode when using perf -a,
	 * which generates events on each CPU. In this case, we don't
	 * want to permanently disable the event by setting its state to
	 * OFF, because if the other CPU is subsequently hotplugged, etc,
	 * we want the opportunity to start collecting on this event.
	 */
	if (config_is_dup(slice, hwc)) {
		hwc->idx = -1;
		goto out;
	}

	if (event_violates_column_exclusion(slice, hwc)) {
		event->state = PERF_EVENT_STATE_OFF;
		hwc->idx = -1;
		goto out;
	}

	idx = l2_cache__get_event_idx(slice, hwc);
	if (idx < 0) {
		err = idx;
		goto out;
	}

	hwc->idx = idx;
	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
	slice->events[idx] = event;
	atomic64_set(&slice->prev_count[idx], 0ULL);

	if (flags & PERF_EF_START)
		l2_cache__event_start(event, flags);

	/* Propagate changes to the userspace mapping. */
	perf_event_update_userpage(event);

out:
	perf_pmu_enable(event->pmu);
	return err;
}

static
void l2_cache__event_del(struct perf_event *event, int flags)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hw_perf_event *hwc = &event->hw;
	struct hml2_pmu *slice;
	int idx = hwc->idx;

	if (idx < 0)
		return;

	slice = get_hml2_pmu(system, event->cpu);
	if (unlikely(!slice))
		return;
	l2_cache__event_stop(event, flags | PERF_EF_UPDATE);
	slice->events[idx] = NULL;
	clear_bit(idx, slice->used_mask);

	perf_event_update_userpage(event);
}

static
void l2_cache__event_read(struct perf_event *event)
{
	l2_cache__event_update(event);
}

static
int dummy_event_idx(struct perf_event *event)
{
	return 0;
}

/* NRCCG format for perf RAW codes. */
PMU_FORMAT_ATTR(l2_prefix, "config:16-19");
PMU_FORMAT_ATTR(l2_reg,    "config:12-15");
PMU_FORMAT_ATTR(l2_code,   "config:4-11");
PMU_FORMAT_ATTR(l2_grp,    "config:0-3");
static struct attribute *l2_cache_pmu_formats[] = {
	&format_attr_l2_prefix.attr,
	&format_attr_l2_reg.attr,
	&format_attr_l2_code.attr,
	&format_attr_l2_grp.attr,
	NULL,
};

static struct attribute_group l2_cache_pmu_format_group = {
	.name = "format",
	.attrs = l2_cache_pmu_formats,
};

static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
	&l2_cache_pmu_format_group,
	NULL,
};

/*
 * Generic device handlers
 */

static struct of_device_id l2_cache_pmu_of_match[] = {
	{ .compatible = "qcom,qcom-l2cache-pmu", },
	{}
};
MODULE_DEVICE_TABLE(of, l2_cache_pmu_of_match);

static int get_num_counters(void)
{
	int val;

	val = get_l2_indirect_reg(L2PMCR);

	/*
	 * Read bits 15:11 of the L2PMCR and add 1
	 * for the cycle counter.
	 */
	return ((val >> PMCR_NUM_EV_SHIFT) & PMCR_NUM_EV_MASK) + 1;
}
static int l2_cache_pmu_probe(struct platform_device *pdev)
{
	int result, irq, err;
	struct device_node *of_node;
	struct hml2_pmu *slice;
	u32 res_idx;
	u32 affinity_cpu;
	const u32 *affinity_arr;
	int len;
	struct cpumask affinity_mask;

	INIT_LIST_HEAD(&l2cache_pmu.pmus);

	l2cache_pmu.pmu = (struct pmu) {
		.task_ctx_nr	= perf_hw_context,

		.name		= "l2cache",
		.pmu_enable	= l2_cache__pmu_enable,
		.pmu_disable	= l2_cache__pmu_disable,
		.event_init	= l2_cache__event_init,
		.add		= l2_cache__event_add,
		.del		= l2_cache__event_del,
		.start		= l2_cache__event_start,
		.stop		= l2_cache__event_stop,
		.read		= l2_cache__event_read,
		.event_idx	= dummy_event_idx,
		.attr_groups	= l2_cache_pmu_attr_grps,
		.events_across_hotplug = 1,
	};

	l2cache_pmu.num_counters = get_num_counters();
	l2_cycle_ctr_idx = l2cache_pmu.num_counters - 1;
	l2_reset_mask = ((1 << (l2cache_pmu.num_counters - 1)) - 1) |
		L2PM_CC_ENABLE;

	of_node = pdev->dev.of_node;
	affinity_arr = of_get_property(of_node, "qcom,cpu-affinity", &len);
	if ((len <= 0) || (!affinity_arr)) {
		dev_err(&pdev->dev,
			"Error reading qcom,cpu-affinity property (%d)\n", len);
		return -ENODEV;
	}
	len = len / sizeof(u32);

	/* Read slice info and initialize each slice */
	for (res_idx = 0; res_idx < len; res_idx++) {
		slice = devm_kzalloc(&pdev->dev, sizeof(*slice), GFP_KERNEL);
		if (!slice)
			return -ENOMEM;

		irq = platform_get_irq(pdev, res_idx);
		if (irq <= 0) {
			dev_err(&pdev->dev,
				"Failed to get valid irq for slice %d\n",
				res_idx);
			return -ENODEV;
		}

		affinity_cpu = be32_to_cpup(&affinity_arr[res_idx]);
		cpumask_clear(&affinity_mask);
		cpumask_set_cpu(affinity_cpu, &affinity_mask);
		cpumask_set_cpu(affinity_cpu + 1, &affinity_mask);

		if (irq_set_affinity(irq, &affinity_mask)) {
			dev_err(&pdev->dev,
				"Unable to set irq affinity (irq=%d, cpu=%d)\n",
				irq, affinity_arr[res_idx]);
			return -ENODEV;
		}

		err = devm_request_irq(
			&pdev->dev, irq, l2_cache__handle_irq,
			IRQF_NOBALANCING, "l2-cache-pmu", slice);
		if (err) {
			dev_err(&pdev->dev,
				"Unable to request IRQ%d for L2 PMU counters\n",
				irq);
			return err;
		}

		slice->cluster = affinity_cpu >> 1;
		slice->pmu_lock = __SPIN_LOCK_UNLOCKED(slice->pmu_lock);

		hml2_pmu__init(slice);
		list_add(&slice->entry, &l2cache_pmu.pmus);
		l2cache_pmu.num_pmus++;
	}

	if (l2cache_pmu.num_pmus == 0) {
		dev_err(&pdev->dev, "No hardware L2 PMUs found\n");
		return -ENODEV;
	}

	result = perf_pmu_register(&l2cache_pmu.pmu,
				   l2cache_pmu.pmu.name, -1);

	if (result < 0)
		dev_err(&pdev->dev,
			"Failed to register L2 cache PMU (%d)\n",
			result);
	else
		dev_info(&pdev->dev,
			 "Registered L2 cache PMU using %d HW PMUs\n",
			 l2cache_pmu.num_pmus);

	return result;
}

static int l2_cache_pmu_remove(struct platform_device *pdev)
{
	perf_pmu_unregister(&l2cache_pmu.pmu);
	return 0;
}

static struct platform_driver l2_cache_pmu_driver = {
	.driver = {
		.name = "l2cache-pmu",
		.owner = THIS_MODULE,
		.of_match_table = l2_cache_pmu_of_match,
	},
	.probe = l2_cache_pmu_probe,
	.remove = l2_cache_pmu_remove,
};

static int __init register_l2_cache_pmu_driver(void)
{
	return platform_driver_register(&l2_cache_pmu_driver);
}
device_initcall(register_l2_cache_pmu_driver);