Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a072738e authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Ingo Molnar
Browse files

perf, x86: Implement initial P4 PMU driver



The netburst PMU is way different from the "architectural
perfomance monitoring" specification that current CPUs use.
P4 uses a tuple of ESCR+CCCR+COUNTER MSR registers to handle
perfomance monitoring events.

A few implementational details:

1) We need a separate x86_pmu::hw_config helper in struct
   x86_pmu since register bit-fields are quite different from P6,
   Core and later cpu series.

2) For the same reason is a x86_pmu::schedule_events helper
   introduced.

3) hw_perf_event::config consists of packed ESCR+CCCR values.
   It's allowed since in reality both registers only use a half
   of their size. Of course before making a real write into a
   particular MSR we need to unpack the value and extend it to
   a proper size.

4) The tuple of packed ESCR+CCCR in hw_perf_event::config
   doesn't describe the memory address of ESCR MSR register
   so that we need to keep a mapping between these tuples
   used and available ESCR (various P4 events may use same
   ESCRs but not simultaneously), for this sake every active
   event has a per-cpu map of hw_perf_event::idx <--> ESCR
   addresses.

5) Since hw_perf_event::idx is an offset to counter/control register
   we need to lift X86_PMC_MAX_GENERIC up, otherwise kernel
   strips it down to 8 registers and event armed may never be turned
   off (ie the bit in active_mask is set but the loop never reaches
   this index to check), thanks to Peter Zijlstra

Restrictions:

 - No cascaded counters support (do we ever need them?)
 - No dependent events support (so PERF_COUNT_HW_INSTRUCTIONS
   doesn't work for now)
 - There are events with same counters which can't work simultaneously
   (need to use intersected ones due to broken counter 1)
 - No PERF_COUNT_HW_CACHE_ events yet

Todo:

 - Implement dependent events
 - Need proper hashing for event opcodes (no linear search, good for
   debugging stage but not in real loads)
 - Some events counted during a clock cycle -- need to set threshold
   for them and count every clock cycle just to get summary statistics
   (ie to behave the same way as other PMUs do)
 - Need to swicth to use event_constraints
 - To support RAW events we need to encode a global list of P4 events
   into p4_templates
 - Cache events need to be added

Event support status matrix:

 Event			status
 -----------------------------
 cycles			works
 cache-references	works
 cache-misses		works
 branch-misses		works
 bus-cycles		partially (does not work on 64bit cpu with HT enabled)
 instruction		doesnt work (needs dependent event [mop tagging])
 branches		doesnt work

Signed-off-by: default avatarCyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: default avatarLin Ming <ming.m.lin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20100311165439.GB5129@lenovo>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 9b33fa6b
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@
 * Performance event hw details:
 */

#define X86_PMC_MAX_GENERIC					8
#define X86_PMC_MAX_GENERIC				       32
#define X86_PMC_MAX_FIXED					3

#define X86_PMC_IDX_GENERIC				        0
+707 −0

File added.

Preview size limit exceeded, changes collapsed.

+29 −17
Original line number Diff line number Diff line
@@ -190,6 +190,8 @@ struct x86_pmu {
	void		(*enable_all)(void);
	void		(*enable)(struct perf_event *);
	void		(*disable)(struct perf_event *);
	int		(*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc);
	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
	unsigned	eventsel;
	unsigned	perfctr;
	u64		(*event_map)(int);
@@ -415,6 +417,25 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
	return 0;
}

static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
{
	/*
	 * Generate PMC IRQs:
	 * (keep 'enabled' bit clear for now)
	 */
	hwc->config = ARCH_PERFMON_EVENTSEL_INT;

	/*
	 * Count user and OS events unless requested not to
	 */
	if (!attr->exclude_user)
		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
	if (!attr->exclude_kernel)
		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;

	return 0;
}

/*
 * Setup the hardware configuration for a given attr_type
 */
@@ -446,23 +467,13 @@ static int __hw_perf_event_init(struct perf_event *event)

	event->destroy = hw_perf_event_destroy;

	/*
	 * Generate PMC IRQs:
	 * (keep 'enabled' bit clear for now)
	 */
	hwc->config = ARCH_PERFMON_EVENTSEL_INT;

	hwc->idx = -1;
	hwc->last_cpu = -1;
	hwc->last_tag = ~0ULL;

	/*
	 * Count user and OS events unless requested not to.
	 */
	if (!attr->exclude_user)
		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
	if (!attr->exclude_kernel)
		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
	/* Processor specifics */
	if (x86_pmu.hw_config(attr, hwc))
		return -EOPNOTSUPP;

	if (!hwc->sample_period) {
		hwc->sample_period = x86_pmu.max_period;
@@ -517,7 +528,7 @@ static int __hw_perf_event_init(struct perf_event *event)
			return -EOPNOTSUPP;

		/* BTS is currently only allowed for user-mode. */
		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
		if (!attr->exclude_kernel)
			return -EOPNOTSUPP;
	}

@@ -931,7 +942,7 @@ static int x86_pmu_enable(struct perf_event *event)
	if (n < 0)
		return n;

	ret = x86_schedule_events(cpuc, n, assign);
	ret = x86_pmu.schedule_events(cpuc, n, assign);
	if (ret)
		return ret;
	/*
@@ -1263,7 +1274,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,
	if (n0 < 0)
		return n0;

	ret = x86_schedule_events(cpuc, n0, assign);
	ret = x86_pmu.schedule_events(cpuc, n0, assign);
	if (ret)
		return ret;

@@ -1313,6 +1324,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,

#include "perf_event_amd.c"
#include "perf_event_p6.c"
#include "perf_event_p4.c"
#include "perf_event_intel_lbr.c"
#include "perf_event_intel_ds.c"
#include "perf_event_intel.c"
@@ -1515,7 +1527,7 @@ static int validate_group(struct perf_event *event)

	fake_cpuc->n_events = n;

	ret = x86_schedule_events(fake_cpuc, n, NULL);
	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);

out_free:
	kfree(fake_cpuc);
+2 −0
Original line number Diff line number Diff line
@@ -363,6 +363,8 @@ static __initconst struct x86_pmu amd_pmu = {
	.enable_all		= x86_pmu_enable_all,
	.enable			= x86_pmu_enable_event,
	.disable		= x86_pmu_disable_event,
	.hw_config		= x86_hw_config,
	.schedule_events	= x86_schedule_events,
	.eventsel		= MSR_K7_EVNTSEL0,
	.perfctr		= MSR_K7_PERFCTR0,
	.event_map		= amd_pmu_event_map,
+10 −5
Original line number Diff line number Diff line
@@ -749,6 +749,8 @@ static __initconst struct x86_pmu core_pmu = {
	.enable_all		= x86_pmu_enable_all,
	.enable			= x86_pmu_enable_event,
	.disable		= x86_pmu_disable_event,
	.hw_config		= x86_hw_config,
	.schedule_events	= x86_schedule_events,
	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
	.event_map		= intel_pmu_event_map,
@@ -786,6 +788,8 @@ static __initconst struct x86_pmu intel_pmu = {
	.enable_all		= intel_pmu_enable_all,
	.enable			= intel_pmu_enable_event,
	.disable		= intel_pmu_disable_event,
	.hw_config		= x86_hw_config,
	.schedule_events	= x86_schedule_events,
	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
	.event_map		= intel_pmu_event_map,
@@ -839,12 +843,13 @@ static __init int intel_pmu_init(void)
	int version;

	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
		/* check for P6 processor family */
	   if (boot_cpu_data.x86 == 6) {
		switch (boot_cpu_data.x86) {
		case 0x6:
			return p6_pmu_init();
	   } else {
		return -ENODEV;
		case 0xf:
			return p4_pmu_init();
		}
		return -ENODEV;
	}

	/*
Loading