Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e994d7d2 authored by Andi Kleen's avatar Andi Kleen Committed by Ingo Molnar
Browse files

perf: Fix LLC-* events on Intel Nehalem/Westmere



On Intel Nehalem and Westmere CPUs the generic perf LLC-* events count the
L2 caches, not the real L3 LLC - this was inconsistent with behavior on
other CPUs.

Fixing this requires the use of the special OFFCORE_RESPONSE
events which need a separate mask register.

This has been implemented by the previous patch, now use this infrastructure
to set correct events for the LLC-* on Nehalem and Westmere.

Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarLin Ming <ming.m.lin@intel.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1299119690-13991-3-git-send-email-ming.m.lin@intel.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent a7e3ed1e
Loading
Loading
Loading
Loading
+10 −5
Original line number Original line Diff line number Diff line
@@ -310,6 +310,10 @@ static u64 __read_mostly hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX];
				[PERF_COUNT_HW_CACHE_RESULT_MAX];
static u64 __read_mostly hw_cache_extra_regs
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX];


/*
/*
 * Propagate event elapsed time into the generic event.
 * Propagate event elapsed time into the generic event.
@@ -524,8 +528,9 @@ static inline int x86_pmu_initialized(void)
}
}


static inline int
static inline int
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
{
{
	struct perf_event_attr *attr = &event->attr;
	unsigned int cache_type, cache_op, cache_result;
	unsigned int cache_type, cache_op, cache_result;
	u64 config, val;
	u64 config, val;


@@ -552,8 +557,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
		return -EINVAL;
		return -EINVAL;


	hwc->config |= val;
	hwc->config |= val;

	attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
	return 0;
	return x86_pmu_extra_regs(val, event);
}
}


static int x86_setup_perfctr(struct perf_event *event)
static int x86_setup_perfctr(struct perf_event *event)
@@ -578,10 +583,10 @@ static int x86_setup_perfctr(struct perf_event *event)
	}
	}


	if (attr->type == PERF_TYPE_RAW)
	if (attr->type == PERF_TYPE_RAW)
		return 0;
		return x86_pmu_extra_regs(event->attr.config, event);


	if (attr->type == PERF_TYPE_HW_CACHE)
	if (attr->type == PERF_TYPE_HW_CACHE)
		return set_ext_hw_attr(hwc, attr);
		return set_ext_hw_attr(hwc, event);


	if (attr->config >= x86_pmu.max_events)
	if (attr->config >= x86_pmu.max_events)
		return -EINVAL;
		return -EINVAL;
+69 −12
Original line number Original line Diff line number Diff line
@@ -285,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
 },
 },
 [ C(LL  ) ] = {
 [ C(LL  ) ] = {
	[ C(OP_READ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
		/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
		[ C(RESULT_ACCESS) ] = 0x01b7,
		/* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
		[ C(RESULT_MISS)   ] = 0x01bb,
	},
	},
	/*
	 * Use RFO, not WRITEBACK, because a write miss would typically occur
	 * on RFO.
	 */
	[ C(OP_WRITE) ] = {
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
		/* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
		[ C(RESULT_ACCESS) ] = 0x01bb,
		/* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
		[ C(RESULT_MISS)   ] = 0x01b7,
	},
	},
	[ C(OP_PREFETCH) ] = {
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
		/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
		[ C(RESULT_ACCESS) ] = 0x01b7,
		/* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
		[ C(RESULT_MISS)   ] = 0x01bb,
	},
	},
 },
 },
 [ C(DTLB) ] = {
 [ C(DTLB) ] = {
@@ -341,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids
 },
 },
};
};


/*
 * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
 */

#define DMND_DATA_RD     (1 << 0)
#define DMND_RFO         (1 << 1)
#define DMND_WB          (1 << 3)
#define PF_DATA_RD       (1 << 4)
#define PF_DATA_RFO      (1 << 5)
#define RESP_UNCORE_HIT  (1 << 8)
#define RESP_MISS        (0xf600) /* non uncore hit */

static __initconst const u64 nehalem_hw_cache_extra_regs
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
 [ C(LL  ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
		[ C(RESULT_MISS)   ] = DMND_DATA_RD|RESP_MISS,
	},
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
		[ C(RESULT_MISS)   ] = DMND_RFO|DMND_WB|RESP_MISS,
	},
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
		[ C(RESULT_MISS)   ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
	},
 }
};

static __initconst const u64 nehalem_hw_cache_event_ids
static __initconst const u64 nehalem_hw_cache_event_ids
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -376,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids
 },
 },
 [ C(LL  ) ] = {
 [ C(LL  ) ] = {
	[ C(OP_READ) ] = {
	[ C(OP_READ) ] = {
		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
		[ C(RESULT_ACCESS) ] = 0x01b7,
		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
		[ C(RESULT_MISS)   ] = 0x01b7,
	},
	},
	/*
	 * Use RFO, not WRITEBACK, because a write miss would typically occur
	 * on RFO.
	 */
	[ C(OP_WRITE) ] = {
	[ C(OP_WRITE) ] = {
		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
		[ C(RESULT_ACCESS) ] = 0x01b7,
		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
		[ C(RESULT_MISS)   ] = 0x01b7,
	},
	},
	[ C(OP_PREFETCH) ] = {
	[ C(OP_PREFETCH) ] = {
		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
		[ C(RESULT_ACCESS) ] = 0x01b7,
		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
		[ C(RESULT_MISS)   ] = 0x01b7,
	},
	},
 },
 },
 [ C(DTLB) ] = {
 [ C(DTLB) ] = {
@@ -1340,6 +1393,8 @@ static __init int intel_pmu_init(void)
	case 46: /* 45 nm nehalem-ex, "Beckton" */
	case 46: /* 45 nm nehalem-ex, "Beckton" */
		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
		       sizeof(hw_cache_event_ids));
		       sizeof(hw_cache_event_ids));
		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
		       sizeof(hw_cache_extra_regs));


		intel_pmu_lbr_init_nhm();
		intel_pmu_lbr_init_nhm();


@@ -1366,6 +1421,8 @@ static __init int intel_pmu_init(void)
	case 44: /* 32 nm nehalem, "Gulftown" */
	case 44: /* 32 nm nehalem, "Gulftown" */
		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
		       sizeof(hw_cache_event_ids));
		       sizeof(hw_cache_event_ids));
		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
		       sizeof(hw_cache_extra_regs));


		intel_pmu_lbr_init_nhm();
		intel_pmu_lbr_init_nhm();