Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 130768b8 authored by Andi Kleen's avatar Andi Kleen Committed by Ingo Molnar
Browse files

perf/x86/intel: Add Haswell PEBS record support



Add support for the Haswell extended (fmt2) PEBS format.

It has a superset of the nhm (fmt1) PEBS fields, but has a
longer record so we need to adjust the code paths.

The main advantage is the new "EventingRip" support which
directly gives the instruction, not off-by-one instruction. So
with precise == 2 we use that directly and don't try to use LBRs
and walking basic blocks. This lowers the overhead of using
precise significantly.

Some other features are added in later patches.

Reviewed-by: default avatarStephane Eranian <eranian@google.com>
Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Cc: Andi Kleen <ak@linux.jf.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: http://lkml.kernel.org/r/1371515812-9646-2-git-send-email-andi@firstfloor.org


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent b2fa344d
Loading
Loading
Loading
Loading
+2 −1
Original line number Original line Diff line number Diff line
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event)
		 * check that PEBS LBR correction does not conflict with
		 * check that PEBS LBR correction does not conflict with
		 * whatever the user is asking with attr->branch_sample_type
		 * whatever the user is asking with attr->branch_sample_type
		 */
		 */
		if (event->attr.precise_ip > 1) {
		if (event->attr.precise_ip > 1 &&
		    x86_pmu.intel_cap.pebs_format < 2) {
			u64 *br_type = &event->attr.branch_sample_type;
			u64 *br_type = &event->attr.branch_sample_type;


			if (has_branch_stack(event)) {
			if (has_branch_stack(event)) {
+89 −21
Original line number Original line Diff line number Diff line
@@ -165,6 +165,22 @@ struct pebs_record_nhm {
	u64 status, dla, dse, lat;
	u64 status, dla, dse, lat;
};
};


/*
 * Same as pebs_record_nhm, with two additional fields.
 */
struct pebs_record_hsw {
	struct pebs_record_nhm nhm;
	/*
	 * Real IP of the event. In the Intel documentation this
	 * is called eventingrip.
	 */
	u64 real_ip;
	/*
	 * TSX tuning information field: abort cycles and abort flags.
	 */
	u64 tsx_tuning;
};

void init_debug_store_on_cpu(int cpu)
void init_debug_store_on_cpu(int cpu)
{
{
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -697,6 +713,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
	 */
	 */
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct pebs_record_nhm *pebs = __pebs;
	struct pebs_record_nhm *pebs = __pebs;
	struct pebs_record_hsw *pebs_hsw = __pebs;
	struct perf_sample_data data;
	struct perf_sample_data data;
	struct pt_regs regs;
	struct pt_regs regs;
	u64 sample_type;
	u64 sample_type;
@@ -753,7 +770,10 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
	regs.bp = pebs->bp;
	regs.bp = pebs->bp;
	regs.sp = pebs->sp;
	regs.sp = pebs->sp;


	if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
		regs.ip = pebs_hsw->real_ip;
		regs.flags |= PERF_EFLAGS_EXACT;
	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
		regs.flags |= PERF_EFLAGS_EXACT;
		regs.flags |= PERF_EFLAGS_EXACT;
	else
	else
		regs.flags &= ~PERF_EFLAGS_EXACT;
		regs.flags &= ~PERF_EFLAGS_EXACT;
@@ -806,35 +826,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
	__intel_pmu_pebs_event(event, iregs, at);
	__intel_pmu_pebs_event(event, iregs, at);
}
}


static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
					void *top)
{
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct debug_store *ds = cpuc->ds;
	struct debug_store *ds = cpuc->ds;
	struct pebs_record_nhm *at, *top;
	struct perf_event *event = NULL;
	struct perf_event *event = NULL;
	u64 status = 0;
	u64 status = 0;
	int bit, n;
	int bit;

	if (!x86_pmu.pebs_active)
		return;

	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;


	ds->pebs_index = ds->pebs_buffer_base;
	ds->pebs_index = ds->pebs_buffer_base;


	n = top - at;
	for (; at < top; at += x86_pmu.pebs_record_size) {
	if (n <= 0)
		struct pebs_record_nhm *p = at;
		return;


	/*
		for_each_set_bit(bit, (unsigned long *)&p->status,
	 * Should not happen, we program the threshold at 1 and do not
				 x86_pmu.max_pebs_events) {
	 * set a reset value.
	 */
	WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);

	for ( ; at < top; at++) {
		for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
			event = cpuc->events[bit];
			event = cpuc->events[bit];
			if (!test_bit(bit, cpuc->active_mask))
			if (!test_bit(bit, cpuc->active_mask))
				continue;
				continue;
@@ -857,6 +864,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
	}
	}
}
}


static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct debug_store *ds = cpuc->ds;
	struct pebs_record_nhm *at, *top;
	int n;

	if (!x86_pmu.pebs_active)
		return;

	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;

	ds->pebs_index = ds->pebs_buffer_base;

	n = top - at;
	if (n <= 0)
		return;

	/*
	 * Should not happen, we program the threshold at 1 and do not
	 * set a reset value.
	 */
	WARN_ONCE(n > x86_pmu.max_pebs_events,
		  "Unexpected number of pebs records %d\n", n);

	return __intel_pmu_drain_pebs_nhm(iregs, at, top);
}

static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct debug_store *ds = cpuc->ds;
	struct pebs_record_hsw *at, *top;
	int n;

	if (!x86_pmu.pebs_active)
		return;

	at  = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
	top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;

	n = top - at;
	if (n <= 0)
		return;
	/*
	 * Should not happen, we program the threshold at 1 and do not
	 * set a reset value.
	 */
	WARN_ONCE(n > x86_pmu.max_pebs_events,
		  "Unexpected number of pebs records %d\n", n);

	return __intel_pmu_drain_pebs_nhm(iregs, at, top);
}

/*
/*
 * BTS, PEBS probe and setup
 * BTS, PEBS probe and setup
 */
 */
@@ -888,6 +950,12 @@ void intel_ds_init(void)
			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
			break;
			break;


		case 2:
			pr_cont("PEBS fmt2%c, ", pebs_type);
			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
			x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
			break;

		default:
		default:
			printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
			printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
			x86_pmu.pebs = 0;
			x86_pmu.pebs = 0;