Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f34edbc1 authored by Lin Ming's avatar Lin Ming Committed by Ingo Molnar
Browse files

perf, x86: Add a key to simplify template lookup in Pentium-4 PMU



Currently, we use opcode(Event and Event-Selector) + emask to
look up template in p4_templates.

But cache events (L1-dcache-load-misses, LLC-load-misses, etc)
use the same event(P4_REPLAY_EVENT) to do the counting, ie, they
have the same opcode and emask. So we can not use current lookup
mechanism to find the template for cache events.

This patch introduces a "key", which is the index into
p4_templates. The low 12 bits of CCCR are reserved, so we can
hide the "key" in the low 12 bits of hwc->config.

We extract the key from hwc->config and then quickly find the
template.

Signed-off-by: default avatarLin Ming <ming.m.lin@intel.com>
Reviewed-by: default avatarCyrill Gorcunov <gorcunov@openvz.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1268908387.13901.127.camel@minggr.sh.intel.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 55632770
Loading
Loading
Loading
Loading
+4 −1
Original line number Original line Diff line number Diff line
@@ -65,6 +65,7 @@
#define P4_CCCR_THREAD_SINGLE		0x00010000U
#define P4_CCCR_THREAD_SINGLE		0x00010000U
#define P4_CCCR_THREAD_BOTH		0x00020000U
#define P4_CCCR_THREAD_BOTH		0x00020000U
#define P4_CCCR_THREAD_ANY		0x00030000U
#define P4_CCCR_THREAD_ANY		0x00030000U
#define P4_CCCR_RESERVED		0x00000fffU


/* Non HT mask */
/* Non HT mask */
#define P4_CCCR_MASK				\
#define P4_CCCR_MASK				\
@@ -116,7 +117,7 @@
#define p4_config_pack_escr(v)		(((u64)(v)) << 32)
#define p4_config_pack_escr(v)		(((u64)(v)) << 32)
#define p4_config_pack_cccr(v)		(((u64)(v)) & 0xffffffffULL)
#define p4_config_pack_cccr(v)		(((u64)(v)) & 0xffffffffULL)
#define p4_config_unpack_escr(v)	(((u64)(v)) >> 32)
#define p4_config_unpack_escr(v)	(((u64)(v)) >> 32)
#define p4_config_unpack_cccr(v)	(((u64)(v)) & 0xffffffffULL)
#define p4_config_unpack_cccr(v)	(((u64)(v)) & 0xfffff000ULL)


#define p4_config_unpack_emask(v)			\
#define p4_config_unpack_emask(v)			\
	({						\
	({						\
@@ -126,6 +127,8 @@
		t;					\
		t;					\
	})
	})


#define p4_config_unpack_key(v)		(((u64)(v)) & P4_CCCR_RESERVED)

#define P4_CONFIG_HT_SHIFT		63
#define P4_CONFIG_HT_SHIFT		63
#define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
#define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)


+34 −52
Original line number Original line Diff line number Diff line
@@ -18,6 +18,7 @@ struct p4_event_template {
	u32 opcode;			/* ESCR event + CCCR selector */
	u32 opcode;			/* ESCR event + CCCR selector */
	u64 config;			/* packed predefined bits */
	u64 config;			/* packed predefined bits */
	int dep;			/* upstream dependency event index */
	int dep;			/* upstream dependency event index */
	int key;			/* index into p4_templates */
	unsigned int emask;		/* ESCR EventMask */
	unsigned int emask;		/* ESCR EventMask */
	unsigned int escr_msr[2];	/* ESCR MSR for this event */
	unsigned int escr_msr[2];	/* ESCR MSR for this event */
	unsigned int cntr[2];		/* counter index (offset) */
	unsigned int cntr[2];		/* counter index (offset) */
@@ -39,38 +40,31 @@ static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config);
 */
 */
struct p4_event_template p4_templates[] = {
struct p4_event_template p4_templates[] = {
	[0] = {
	[0] = {
		.opcode	= P4_UOP_TYPE,
		.config	= 0,
		.dep	= -1,
		.emask	=
			P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS)	|
			P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES),
		.escr_msr	= { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
		.cntr		= { 16, 17 },
	},
	[1] = {
		.opcode	= P4_GLOBAL_POWER_EVENTS,
		.opcode	= P4_GLOBAL_POWER_EVENTS,
		.config	= 0,
		.config	= 0,
		.dep	= -1,
		.dep	= -1,
		.key	= 0,
		.emask	=
		.emask	=
			P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING),
			P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING),
		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
		.cntr		= { 0, 2 },
		.cntr		= { 0, 2 },
	},
	},
	[2] = {
	[1] = {
		.opcode	= P4_INSTR_RETIRED,
		.opcode	= P4_INSTR_RETIRED,
		.config	= 0,
		.config	= 0,
		.dep	= -1, /* needs front-end tagging */
		.dep	= -1, /* needs front-end tagging */
		.key	= 1,
		.emask	=
		.emask	=
			P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG)	|
			P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG)	|
			P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG),
			P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG),
		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
		.cntr		= { 12, 14 },
		.cntr		= { 12, 14 },
	},
	},
	[3] = {
	[2] = {
		.opcode	= P4_BSQ_CACHE_REFERENCE,
		.opcode	= P4_BSQ_CACHE_REFERENCE,
		.config	= 0,
		.config	= 0,
		.dep	= -1,
		.dep	= -1,
		.key	= 2,
		.emask	=
		.emask	=
			P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)	|
			P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)	|
			P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)	|
			P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)	|
@@ -81,10 +75,11 @@ struct p4_event_template p4_templates[] = {
		.escr_msr	= { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
		.escr_msr	= { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
		.cntr		= { 0, 2 },
		.cntr		= { 0, 2 },
	},
	},
	[4] = {
	[3] = {
		.opcode	= P4_BSQ_CACHE_REFERENCE,
		.opcode	= P4_BSQ_CACHE_REFERENCE,
		.config	= 0,
		.config	= 0,
		.dep	= -1,
		.dep	= -1,
		.key	= 3,
		.emask	=
		.emask	=
			P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)	|
			P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)	|
			P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)	|
			P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)	|
@@ -92,10 +87,11 @@ struct p4_event_template p4_templates[] = {
		.escr_msr	= { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
		.escr_msr	= { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
		.cntr		= { 0, 3 },
		.cntr		= { 0, 3 },
	},
	},
	[5] = {
	[4] = {
		.opcode	= P4_RETIRED_BRANCH_TYPE,
		.opcode	= P4_RETIRED_BRANCH_TYPE,
		.config	= 0,
		.config	= 0,
		.dep	= -1,
		.dep	= -1,
		.key	= 4,
		.emask	=
		.emask	=
			P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL)	|
			P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL)	|
			P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL)		|
			P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL)		|
@@ -104,48 +100,38 @@ struct p4_event_template p4_templates[] = {
		.escr_msr	= { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 },
		.escr_msr	= { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 },
		.cntr		= { 4, 6 },
		.cntr		= { 4, 6 },
	},
	},
	[6] = {
	[5] = {
		.opcode	= P4_MISPRED_BRANCH_RETIRED,
		.opcode	= P4_MISPRED_BRANCH_RETIRED,
		.config	= 0,
		.config	= 0,
		.dep	= -1,
		.dep	= -1,
		.key	= 5,
		.emask	=
		.emask	=
			P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS),
			P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS),
		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
		.cntr		= { 12, 14 },
		.cntr		= { 12, 14 },
	},
	},
	[7] = {
	[6] = {
		.opcode	= P4_FSB_DATA_ACTIVITY,
		.opcode	= P4_FSB_DATA_ACTIVITY,
		.config	= p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
		.config	= p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
		.dep	= -1,
		.dep	= -1,
		.key	= 6,
		.emask	=
		.emask	=
			P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV)	|
			P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV)	|
			P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN),
			P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN),
		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
		.escr_msr	= { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
		.cntr		= { 0, 2 },
		.cntr		= { 0, 2 },
	},
	},
};
	[7] = {

		.opcode	= P4_UOP_TYPE,
static struct p4_event_template *p4_event_map[PERF_COUNT_HW_MAX] = {
		.config	= 0,
	/* non-halted CPU clocks */
		.dep	= -1,
	[PERF_COUNT_HW_CPU_CYCLES]		= &p4_templates[1],
		.key	= 7,

		.emask	=
	/* retired instructions: dep on tagging the FSB */
			P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS)	|
	[PERF_COUNT_HW_INSTRUCTIONS]		= &p4_templates[2],
			P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES),

		.escr_msr	= { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
	/* cache hits */
		.cntr		= { 16, 17 },
	[PERF_COUNT_HW_CACHE_REFERENCES]	= &p4_templates[3],
	},

	/* cache misses */
	[PERF_COUNT_HW_CACHE_MISSES]		= &p4_templates[4],

	/* branch instructions retired */
	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= &p4_templates[5],

	/* mispredicted branches retired */
	[PERF_COUNT_HW_BRANCH_MISSES]		= &p4_templates[6],

	/* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN):  */
	[PERF_COUNT_HW_BUS_CYCLES]		= &p4_templates[7],
};
};


static u64 p4_pmu_event_map(int hw_event)
static u64 p4_pmu_event_map(int hw_event)
@@ -153,11 +139,11 @@ static u64 p4_pmu_event_map(int hw_event)
	struct p4_event_template *tpl;
	struct p4_event_template *tpl;
	u64 config;
	u64 config;


	if (hw_event > ARRAY_SIZE(p4_event_map)) {
	if (hw_event > ARRAY_SIZE(p4_templates)) {
		printk_once(KERN_ERR "PMU: Incorrect event index\n");
		printk_once(KERN_ERR "PMU: Incorrect event index\n");
		return 0;
		return 0;
	}
	}
	tpl = p4_event_map[hw_event];
	tpl = &p4_templates[hw_event];


	/*
	/*
	 * fill config up according to
	 * fill config up according to
@@ -167,6 +153,7 @@ static u64 p4_pmu_event_map(int hw_event)
	config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT);
	config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT);
	config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT);
	config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT);
	config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT);
	config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT);
	config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED);


	/* on HT machine we need a special bit */
	/* on HT machine we need a special bit */
	if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id()))
	if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id()))
@@ -187,16 +174,11 @@ static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src)


static struct p4_event_template *p4_pmu_template_lookup(u64 config)
static struct p4_event_template *p4_pmu_template_lookup(u64 config)
{
{
	u32 opcode = p4_config_unpack_opcode(config);
	int key = p4_config_unpack_key(config);
	unsigned int emask = p4_config_unpack_emask(config);
	unsigned int i;

	for (i = 0; i < ARRAY_SIZE(p4_templates); i++) {
		if (opcode == p4_templates[i].opcode &&
			p4_pmu_emask_match(emask, p4_templates[i].emask))
			return &p4_templates[i];
	}


	if (key < ARRAY_SIZE(p4_templates))
		return &p4_templates[key];
	else
		return NULL;
		return NULL;
}
}


@@ -564,7 +546,7 @@ static __initconst struct x86_pmu p4_pmu = {
	.perfctr		= MSR_P4_BPU_PERFCTR0,
	.perfctr		= MSR_P4_BPU_PERFCTR0,
	.event_map		= p4_pmu_event_map,
	.event_map		= p4_pmu_event_map,
	.raw_event		= p4_pmu_raw_event,
	.raw_event		= p4_pmu_raw_event,
	.max_events		= ARRAY_SIZE(p4_event_map),
	.max_events		= ARRAY_SIZE(p4_templates),
	.get_event_constraints	= x86_get_event_constraints,
	.get_event_constraints	= x86_get_event_constraints,
	/*
	/*
	 * IF HT disabled we may need to use all
	 * IF HT disabled we may need to use all