Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2c856e14 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull arm[64] perf updates from Will Deacon:
 "I have another mixed bag of ARM-related perf patches here.

  It's about 25% CPU and 75% interconnect, but with drivers/bus/
  languishing without an obvious maintainer or tree, Olof and I agreed
  to keep all of these PMU patches together.  I suspect a whole load of
  code from drivers/bus/arm-* can be moved under drivers/perf/, so
  that's on the radar for the future.

  Summary:

   - Initial support for ARMv8.1 CPU PMUs

   - Support for the CPU PMU in Cavium ThunderX

   - CPU PMU support for systems running 32-bit Linux in secure mode

   - Support for the system PMU in ARM CCI-550 (Cache Coherent Interconnect)"

* tag 'arm64-perf' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (26 commits)
  drivers/perf: arm_pmu: avoid NULL dereference when not using devicetree
  arm64: perf: Extend ARMV8_EVTYPE_MASK to include PMCR.LC
  arm-cci: remove unused variable
  arm-cci: don't return value from void function
  arm-cci: make private functions static
  arm-cci: CoreLink CCI-550 PMU driver
  arm-cci500: Rearrange PMU driver for code sharing with CCI-550 PMU
  arm-cci: CCI-500: Work around PMU counter writes
  arm-cci: Provide hook for writing to PMU counters
  arm-cci: Add helper to enable PMU without synchornising counters
  arm-cci: Add routines to save/restore all counters
  arm-cci: Get the status of a counter
  arm-cci: write_counter: Remove redundant check
  arm-cci: Delay PMU counter writes to pmu::pmu_enable
  arm-cci: Refactor CCI PMU enable/disable methods
  arm-cci: Group writes to counter
  arm-cci: fix handling cpumask_any_but return value
  arm-cci: simplify sysfs attr handling
  drivers/perf: arm_pmu: implement CPU_PM notifier
  arm64: dts: Add Cavium ThunderX specific PMU
  ...
parents d34687ab 357b565d
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ specific to ARM.
		Definition: must contain one of the following:
			    "arm,cci-400"
			    "arm,cci-500"
			    "arm,cci-550"

	- reg
		Usage: required
@@ -101,6 +102,7 @@ specific to ARM.
				 "arm,cci-400-pmu"  - DEPRECATED, permitted only where OS has
						      secure acces to CCI registers
				 "arm,cci-500-pmu,r0"
				 "arm,cci-550-pmu,r0"
		- reg:
			Usage: required
			Value type: Integer cells. A register entry, expressed
+11 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ Required properties:
	"qcom,scorpion-pmu"
	"qcom,scorpion-mp-pmu"
	"qcom,krait-pmu"
	"cavium,thunder-pmu"
- interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu
               interrupt (PPI) then 1 interrupt should be specified.

@@ -46,6 +47,16 @@ Optional properties:
- qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd
                     events.

- secure-reg-access : Indicates that the ARMv7 Secure Debug Enable Register
		      (SDER) is accessible. This will cause the driver to do
		      any setup required that is only possible in ARMv7 secure
		      state. If not present the ARMv7 SDER will not be touched,
		      which means the PMU may fail to operate unless external
		      code (bootloader or security monitor) has performed the
		      appropriate initialisation. Note that this property is
		      not valid for non-ARMv7 CPUs or ARMv7 CPUs booting Linux
		      in Non-secure state.

Example:

pmu {
+12 −1
Original line number Diff line number Diff line
@@ -712,6 +712,11 @@ static const struct attribute_group *armv7_pmuv2_attr_groups[] = {
#define	ARMV7_EXCLUDE_USER	(1 << 30)
#define	ARMV7_INCLUDE_HYP	(1 << 27)

/*
 * Secure debug enable reg
 */
#define ARMV7_SDER_SUNIDEN	BIT(1) /* Permit non-invasive debug */

static inline u32 armv7_pmnc_read(void)
{
	u32 val;
@@ -1094,7 +1099,13 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
static void armv7pmu_reset(void *info)
{
	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
	u32 idx, nb_cnt = cpu_pmu->num_events;
	u32 idx, nb_cnt = cpu_pmu->num_events, val;

	if (cpu_pmu->secure_access) {
		asm volatile("mrc p15, 0, %0, c1, c1, 1" : "=r" (val));
		val |= ARMV7_SDER_SUNIDEN;
		asm volatile("mcr p15, 0, %0, c1, c1, 1" : : "r" (val));
	}

	/* The counter and interrupt enable registers are unknown at reset. */
	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+5 −0
Original line number Diff line number Diff line
@@ -360,6 +360,11 @@
		             <1 10 0xff01>;
	};

	pmu {
		compatible = "cavium,thunder-pmu", "arm,armv8-pmuv3";
		interrupts = <1 7 4>;
	};

	soc {
		compatible = "simple-bus";
		#address-cells = <2>;
+100 −22
Original line number Diff line number Diff line
@@ -88,16 +88,25 @@
#define ARMV8_PMUV3_PERFCTR_L2D_TLB				0x2F
#define ARMV8_PMUV3_PERFCTR_L21_TLB				0x30

/* ARMv8 implementation defined event types. */
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD		0x40
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST		0x41
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD		0x42
#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST		0x43
#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD			0x4C
#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST			0x4D
#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD			0x4E
#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST			0x4F

/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL			0xC2

/* ARMv8 Cortex-A57 and Cortex-A72 specific event types. */
#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD			0x40
#define ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST			0x41
#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD			0x42
#define ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST			0x43
#define ARMV8_A57_PERFCTR_DTLB_REFILL_LD			0x4c
#define ARMV8_A57_PERFCTR_DTLB_REFILL_ST			0x4d
/* ARMv8 Cavium ThunderX specific event types. */
#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST			0xE9
#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS		0xEA
#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS		0xEB
#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS		0xEC
#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS		0xED

/* PMUv3 HW events mapping. */
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
@@ -132,6 +141,18 @@ static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
};

static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = {
	PERF_MAP_ALL_UNSUPPORTED,
	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE,
	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
};

static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
						[PERF_COUNT_HW_CACHE_OP_MAX]
						[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -175,16 +196,46 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
	PERF_CACHE_MAP_ALL_UNSUPPORTED,

	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD,
	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD,
	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST,
	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST,
	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST,

	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,

	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_DTLB_REFILL_LD,
	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_DTLB_REFILL_ST,
	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,

	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_ITLB_REFILL,

	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
};

static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
						   [PERF_COUNT_HW_CACHE_OP_MAX]
						   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
	PERF_CACHE_MAP_ALL_UNSUPPORTED,

	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST,
	[C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS,
	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS,

	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
	[C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS,
	[C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS,

	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD,
	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST,
	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,

	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_ITLB_REFILL,

@@ -325,7 +376,6 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
	NULL,
};


/*
 * Perf Events' indices
 */
@@ -356,9 +406,10 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
#define ARMV8_PMCR_D		(1 << 3) /* CCNT counts every 64th cpu cycle */
#define ARMV8_PMCR_X		(1 << 4) /* Export to ETM */
#define ARMV8_PMCR_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV8_PMCR_LC		(1 << 6) /* Overflow on 64 bit cycle counter */
#define	ARMV8_PMCR_N_SHIFT	11	 /* Number of counters supported */
#define	ARMV8_PMCR_N_MASK	0x1f
#define	ARMV8_PMCR_MASK		0x3f	 /* Mask for writable bits */
#define	ARMV8_PMCR_MASK		0x7f	 /* Mask for writable bits */

/*
 * PMOVSR: counters overflow flag status reg
@@ -369,8 +420,8 @@ static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
/*
 * PMXEVTYPER: Event selection reg
 */
#define	ARMV8_EVTYPE_MASK	0xc80003ff	/* Mask for writable bits */
#define	ARMV8_EVTYPE_EVENT	0x3ff		/* Mask for EVENT bits */
#define	ARMV8_EVTYPE_MASK	0xc800ffff	/* Mask for writable bits */
#define	ARMV8_EVTYPE_EVENT	0xffff		/* Mask for EVENT bits */

/*
 * Event filters for PMUv3
@@ -445,9 +496,16 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
	if (!armv8pmu_counter_valid(cpu_pmu, idx))
		pr_err("CPU%u writing wrong counter %d\n",
			smp_processor_id(), idx);
	else if (idx == ARMV8_IDX_CYCLE_COUNTER)
		asm volatile("msr pmccntr_el0, %0" :: "r" (value));
	else if (armv8pmu_select_counter(idx) == idx)
	else if (idx == ARMV8_IDX_CYCLE_COUNTER) {
		/*
		 * Set the upper 32bits as this is a 64bit counter but we only
		 * count using the lower 32bits and we want an interrupt when
		 * it overflows.
		 */
		u64 value64 = 0xffffffff00000000ULL | value;

		asm volatile("msr pmccntr_el0, %0" :: "r" (value64));
	} else if (armv8pmu_select_counter(idx) == idx)
		asm volatile("msr pmxevcntr_el0, %0" :: "r" (value));
}

@@ -722,8 +780,11 @@ static void armv8pmu_reset(void *info)
		armv8pmu_disable_intens(idx);
	}

	/* Initialize & Reset PMNC: C and P bits. */
	armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
	/*
	 * Initialize & Reset PMNC. Request overflow interrupt for
	 * 64 bit cycle counter but cheat in armv8pmu_write_counter().
	 */
	armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C | ARMV8_PMCR_LC);
}

static int armv8_pmuv3_map_event(struct perf_event *event)
@@ -747,6 +808,13 @@ static int armv8_a57_map_event(struct perf_event *event)
				ARMV8_EVTYPE_EVENT);
}

static int armv8_thunder_map_event(struct perf_event *event)
{
	return armpmu_map_event(event, &armv8_thunder_perf_map,
				&armv8_thunder_perf_cache_map,
				ARMV8_EVTYPE_EVENT);
}

static void armv8pmu_read_num_pmnc_events(void *info)
{
	int *nb_cnt = info;
@@ -815,11 +883,21 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
	return armv8pmu_probe_num_events(cpu_pmu);
}

static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
{
	armv8_pmu_init(cpu_pmu);
	cpu_pmu->name			= "armv8_cavium_thunder";
	cpu_pmu->map_event		= armv8_thunder_map_event;
	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
	return armv8pmu_probe_num_events(cpu_pmu);
}

static const struct of_device_id armv8_pmu_of_device_ids[] = {
	{.compatible = "arm,armv8-pmuv3",	.data = armv8_pmuv3_init},
	{.compatible = "arm,cortex-a53-pmu",	.data = armv8_a53_pmu_init},
	{.compatible = "arm,cortex-a57-pmu",	.data = armv8_a57_pmu_init},
	{.compatible = "arm,cortex-a72-pmu",	.data = armv8_a72_pmu_init},
	{.compatible = "cavium,thunder-pmu",	.data = armv8_thunder_pmu_init},
	{},
};

Loading