Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6ae5fa61 authored by Andi Kleen's avatar Andi Kleen Committed by Ingo Molnar
Browse files

perf/x86: Fix data source decoding for Skylake



Skylake changed the encoding of the PEBS data source field.
Some combinations are not available anymore, but some new cases
e.g. for L4 cache hit are added.

Fix up the conversion table for Skylake, similar as had been done
for Nehalem.

On Skylake server the encoding for L4 actually means persistent
memory. Handle this case too.

To properly describe it in the abstracted perf format I had to add
some new fields. Since a hit can have only one level add a new
field that is an enumeration, not a bit field to describe
the level. It can describe any level. Some numbers are also
used to describe PMEM and LFB.

Also add a new generic remote flag that can be combined with
the generic level to signify a remote cache.

And there is an extension field for the snoop indication to handle
the Forward state.

I didn't add a generic flag for hops because it's not needed
for Skylake.

I changed the existing encodings for older CPUs to also fill in the
new level and remote fields.

Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/20170816222156.19953-3-andi@firstfloor.org


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 95298355
Loading
Loading
Loading
Loading
+2 −0
Original line number Original line Diff line number Diff line
@@ -4208,6 +4208,8 @@ __init int intel_pmu_init(void)
						  skl_format_attr);
						  skl_format_attr);
		WARN_ON(!x86_pmu.format_attrs);
		WARN_ON(!x86_pmu.format_attrs);
		x86_pmu.cpu_events = hsw_events_attrs;
		x86_pmu.cpu_events = hsw_events_attrs;
		intel_pmu_pebs_data_source_skl(
			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
		pr_cont("Skylake events, ");
		pr_cont("Skylake events, ");
		break;
		break;


+32 −19
Original line number Original line Diff line number Diff line
@@ -49,34 +49,47 @@ union intel_x86_pebs_dse {
 */
 */
#define P(a, b) PERF_MEM_S(a, b)
#define P(a, b) PERF_MEM_S(a, b)
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
#define LEVEL(x) P(LVLNUM, x)
#define REM P(REMOTE, REMOTE)
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))


/* Version for Sandy Bridge and later */
/* Version for Sandy Bridge and later */
static u64 pebs_data_source[] = {
static u64 pebs_data_source[] = {
	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
	OP_LH | P(LVL, L1)  | P(SNOOP, NONE),	/* 0x01: L1 local */
	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
	OP_LH | P(LVL, LFB) | P(SNOOP, NONE),	/* 0x02: LFB hit */
	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
	OP_LH | P(LVL, L2)  | P(SNOOP, NONE),	/* 0x03: L2 hit */
	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
	OP_LH | P(LVL, L3)  | P(SNOOP, NONE),	/* 0x04: L3 hit */
	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
	OP_LH | P(LVL, L3)  | P(SNOOP, MISS),	/* 0x05: L3 hit, snoop miss */
	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
	OP_LH | P(LVL, L3)  | P(SNOOP, HIT),	/* 0x06: L3 hit, snoop hit */
	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
	OP_LH | P(LVL, L3)  | P(SNOOP, HITM),	/* 0x07: L3 hit, snoop hitm */
	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
	OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
	OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
	OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */
	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
	OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
	OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
	OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
	OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
	OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
	OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */
	OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
	OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
	OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
};
};


/* Patch up minor differences in the bits */
/* Patch up minor differences in the bits */
void __init intel_pmu_pebs_data_source_nhm(void)
void __init intel_pmu_pebs_data_source_nhm(void)
{
{
	pebs_data_source[0x05] = OP_LH | P(LVL, L3)  | P(SNOOP, HIT);
	pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
	pebs_data_source[0x06] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
	pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
	pebs_data_source[0x07] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
	pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
}

void __init intel_pmu_pebs_data_source_skl(bool pmem)
{
	u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);

	pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
	pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
	pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
	pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
	pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
}
}


static u64 precise_store_data(u64 status)
static u64 precise_store_data(u64 status)
+2 −0
Original line number Original line Diff line number Diff line
@@ -948,6 +948,8 @@ void intel_pmu_lbr_init_knl(void);


void intel_pmu_pebs_data_source_nhm(void);
void intel_pmu_pebs_data_source_nhm(void);


void intel_pmu_pebs_data_source_skl(bool pmem);

int intel_pmu_setup_lbr_filter(struct perf_event *event);
int intel_pmu_setup_lbr_filter(struct perf_event *event);


void intel_pt_interrupt(void);
void intel_pt_interrupt(void);
+28 −2
Original line number Original line Diff line number Diff line
@@ -954,14 +954,20 @@ union perf_mem_data_src {
			mem_snoop:5,	/* snoop mode */
			mem_snoop:5,	/* snoop mode */
			mem_lock:2,	/* lock instr */
			mem_lock:2,	/* lock instr */
			mem_dtlb:7,	/* tlb access */
			mem_dtlb:7,	/* tlb access */
			mem_rsvd:31;
			mem_lvl_num:4,	/* memory hierarchy level number */
			mem_remote:1,   /* remote */
			mem_snoopx:2,	/* snoop mode, ext */
			mem_rsvd:24;
	};
	};
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
union perf_mem_data_src {
	__u64 val;
	__u64 val;
	struct {
	struct {
		__u64	mem_rsvd:31,
		__u64	mem_rsvd:24,
			mem_snoopx:2,	/* snoop mode, ext */
			mem_remote:1,   /* remote */
			mem_lvl_num:4,	/* memory hierarchy level number */
			mem_dtlb:7,	/* tlb access */
			mem_dtlb:7,	/* tlb access */
			mem_lock:2,	/* lock instr */
			mem_lock:2,	/* lock instr */
			mem_snoop:5,	/* snoop mode */
			mem_snoop:5,	/* snoop mode */
@@ -998,6 +1004,22 @@ union perf_mem_data_src {
#define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
#define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
#define PERF_MEM_LVL_SHIFT	5
#define PERF_MEM_LVL_SHIFT	5


#define PERF_MEM_REMOTE_REMOTE	0x01  /* Remote */
#define PERF_MEM_REMOTE_SHIFT	37

#define PERF_MEM_LVLNUM_L1	0x01 /* L1 */
#define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
#define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
#define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
/* 5-0xa available */
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
#define PERF_MEM_LVLNUM_LFB	0x0c /* LFB */
#define PERF_MEM_LVLNUM_RAM	0x0d /* RAM */
#define PERF_MEM_LVLNUM_PMEM	0x0e /* PMEM */
#define PERF_MEM_LVLNUM_NA	0x0f /* N/A */

#define PERF_MEM_LVLNUM_SHIFT	33

/* snoop mode */
/* snoop mode */
#define PERF_MEM_SNOOP_NA	0x01 /* not available */
#define PERF_MEM_SNOOP_NA	0x01 /* not available */
#define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
#define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
@@ -1006,6 +1028,10 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
#define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
#define PERF_MEM_SNOOP_SHIFT	19
#define PERF_MEM_SNOOP_SHIFT	19


#define PERF_MEM_SNOOPX_FWD	0x01 /* forward */
/* 1 free */
#define PERF_MEM_SNOOPX_SHIFT	37

/* locked instruction */
/* locked instruction */
#define PERF_MEM_LOCK_NA	0x01 /* not available */
#define PERF_MEM_LOCK_NA	0x01 /* not available */
#define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
#define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */