Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 351af072 authored by Zhang, Yanmin's avatar Zhang, Yanmin Committed by Ingo Molnar
Browse files

perf, x86: Fix Intel-nhm PMU programming errata workaround



Fix the Errata AAK100/AAP53/BD53 workaround, the officialy documented
workaround we implemented in:

 11164cd4: perf, x86: Add Nehelem PMU programming errata workaround

doesn't actually work fully and causes a stuck PMU state
under load and non-functioning perf profiling.

A functional workaround was found by trial & error.

Affects all Nehalem-class Intel PMUs.

Signed-off-by: default avatarZhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1281073148.2125.63.camel@ymzhang.sh.intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@kernel.org> # .35.x
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 9d5f3714
Loading
Loading
Loading
Loading
+63 −18
Original line number Diff line number Diff line
@@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added)
 *   Intel Errata AAP53  (model 30)
 *   Intel Errata BD53   (model 44)
 *
 * These chips need to be 'reset' when adding counters by programming
 * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
 * either in sequence on the same PMC or on different PMCs.
 * The official story:
 *   These chips need to be 'reset' when adding counters by programming the
 *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
 *   in sequence on the same PMC or on different PMCs.
 *
 * In practise it appears some of these events do in fact count, and
 * we need to programm all 4 events.
 */
static void intel_pmu_nhm_enable_all(int added)
static void intel_pmu_nhm_workaround(void)
{
	if (added) {
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	static const unsigned long nhm_magic[4] = {
		0x4300B5,
		0x4300D2,
		0x4300B1,
		0x4300B1
	};
	struct perf_event *event;
	int i;

		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
	/*
	 * The Errata requires below steps:
	 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
	 * 2) Configure 4 PERFEVTSELx with the magic events and clear
	 *    the corresponding PMCx;
	 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
	 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
	 * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
	 */

		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
		wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
	/*
	 * The real steps we choose are a little different from above.
	 * A) To reduce MSR operations, we don't run step 1) as they
	 *    are already cleared before this function is called;
	 * B) Call x86_perf_event_update to save PMCx before configuring
	 *    PERFEVTSELx with magic number;
	 * C) With step 5), we do clear only when the PERFEVTSELx is
	 *    not used currently.
	 * D) Call x86_perf_event_set_period to restore PMCx;
	 */

	/* We always operate 4 pairs of PERF Counters */
	for (i = 0; i < 4; i++) {
		event = cpuc->events[i];
		if (event)
			x86_perf_event_update(event);
	}

		for (i = 0; i < 3; i++) {
			struct perf_event *event = cpuc->events[i];
	for (i = 0; i < 4; i++) {
		wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
		wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
	}

			if (!event)
				continue;
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);

	for (i = 0; i < 4; i++) {
		event = cpuc->events[i];

		if (event) {
			x86_perf_event_set_period(event);
			__x86_pmu_enable_event(&event->hw,
					ARCH_PERFMON_EVENTSEL_ENABLE);
		} else
			wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
	}
}

static void intel_pmu_nhm_enable_all(int added)
{
	if (added)
		intel_pmu_nhm_workaround();
	intel_pmu_enable_all(added);
}