Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ef21f683 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

perf, x86: use LBR for PEBS IP+1 fixup



Use the LBR to fix up the PEBS IP+1 issue.

As said, PEBS reports the next instruction, here we use the LBR to find
the last branch and from that construct the actual IP. If the IP matches
the LBR-TO, we use LBR-FROM, otherwise we use the LBR-TO address as the
beginning of the last basic block and decode forward.

Once we find a match to the current IP, we use the previous location.

This patch introduces a new ABI element: PERF_RECORD_MISC_EXACT, which
conveys that the reported IP (PERF_SAMPLE_IP) is the exact instruction
that caused the event (barring CPU errata).

The fixup can fail due to various reasons:

 1) LBR contains invalid data (quite possible)
 2) part of the basic block got paged out
 3) the reported IP isn't part of the basic block (see 1)

Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.619375431@chello.nl>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent caff2bef
Loading
Loading
Loading
Loading
+19 −0
Original line number Diff line number Diff line
@@ -136,6 +136,25 @@ extern void perf_events_lapic_init(void);

#define PERF_EVENT_INDEX_OFFSET			0

/*
 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
 * This flag is otherwise unused and ABI specified to be 0, so nobody should
 * care what we do with it.
 */
#define PERF_EFLAGS_EXACT	(1UL << 3)

#define perf_misc_flags(regs)				\
({	int misc = 0;					\
	if (user_mode(regs))				\
		misc |= PERF_RECORD_MISC_USER;		\
	else						\
		misc |= PERF_RECORD_MISC_KERNEL;	\
	if (regs->flags & PERF_EFLAGS_EXACT)		\
		misc |= PERF_RECORD_MISC_EXACT;		\
	misc; })

#define perf_instruction_pointer(regs)	((regs)->ip)

#else
static inline void init_hw_perf_events(void)		{ }
static inline void perf_events_lapic_init(void)	{ }
+35 −35
Original line number Diff line number Diff line
@@ -29,6 +29,41 @@
#include <asm/stacktrace.h>
#include <asm/nmi.h>

/*
 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
 */
static unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
{
	unsigned long offset, addr = (unsigned long)from;
	int type = in_nmi() ? KM_NMI : KM_IRQ0;
	unsigned long size, len = 0;
	struct page *page;
	void *map;
	int ret;

	do {
		ret = __get_user_pages_fast(addr, 1, 0, &page);
		if (!ret)
			break;

		offset = addr & (PAGE_SIZE - 1);
		size = min(PAGE_SIZE - offset, n - len);

		map = kmap_atomic(page, type);
		memcpy(to, map+offset, size);
		kunmap_atomic(map, type);
		put_page(page);

		len  += size;
		to   += size;
		addr += size;

	} while (len < n);

	return len;
}

static u64 perf_event_mask __read_mostly;

struct event_constraint {
@@ -1550,41 +1585,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
	dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
}

/*
 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
 */
static unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
{
	unsigned long offset, addr = (unsigned long)from;
	int type = in_nmi() ? KM_NMI : KM_IRQ0;
	unsigned long size, len = 0;
	struct page *page;
	void *map;
	int ret;

	do {
		ret = __get_user_pages_fast(addr, 1, 0, &page);
		if (!ret)
			break;

		offset = addr & (PAGE_SIZE - 1);
		size = min(PAGE_SIZE - offset, n - len);

		map = kmap_atomic(page, type);
		memcpy(to, map+offset, size);
		kunmap_atomic(map, type);
		put_page(page);

		len  += size;
		to   += size;
		addr += size;

	} while (len < n);

	return len;
}

static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{
	unsigned long bytes;
+2 −2
Original line number Diff line number Diff line
@@ -547,7 +547,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
	x86_pmu_disable_event(event);

	if (unlikely(event->attr.precise))
		intel_pmu_pebs_disable(hwc);
		intel_pmu_pebs_disable(event);
}

static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
@@ -600,7 +600,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
	}

	if (unlikely(event->attr.precise))
		intel_pmu_pebs_enable(hwc);
		intel_pmu_pebs_enable(event);

	__x86_pmu_enable_event(hwc);
}
+82 −2
Original line number Diff line number Diff line
@@ -331,26 +331,32 @@ intel_pebs_constraints(struct perf_event *event)
	return &emptyconstraint;
}

static void intel_pmu_pebs_enable(struct hw_perf_event *hwc)
static void intel_pmu_pebs_enable(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	u64 val = cpuc->pebs_enabled;

	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;

	val |= 1ULL << hwc->idx;
	wrmsrl(MSR_IA32_PEBS_ENABLE, val);

	intel_pmu_lbr_enable(event);
}

static void intel_pmu_pebs_disable(struct hw_perf_event *hwc)
static void intel_pmu_pebs_disable(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	u64 val = cpuc->pebs_enabled;

	val &= ~(1ULL << hwc->idx);
	wrmsrl(MSR_IA32_PEBS_ENABLE, val);

	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;

	intel_pmu_lbr_disable(event);
}

static void intel_pmu_pebs_enable_all(void)
@@ -369,6 +375,70 @@ static void intel_pmu_pebs_disable_all(void)
		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}

#include <asm/insn.h>

#define MAX_INSN_SIZE	16

static inline bool kernel_ip(unsigned long ip)
{
#ifdef CONFIG_X86_32
	return ip > PAGE_OFFSET;
#else
	return (long)ip < 0;
#endif
}

static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
{
	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
	unsigned long from = cpuc->lbr_entries[0].from;
	unsigned long old_to, to = cpuc->lbr_entries[0].to;
	unsigned long ip = regs->ip;

	if (!cpuc->lbr_stack.nr || !from || !to)
		return 0;

	if (ip < to)
		return 0;

	/*
	 * We sampled a branch insn, rewind using the LBR stack
	 */
	if (ip == to) {
		regs->ip = from;
		return 1;
	}

	do {
		struct insn insn;
		u8 buf[MAX_INSN_SIZE];
		void *kaddr;

		old_to = to;
		if (!kernel_ip(ip)) {
			int bytes, size = min_t(int, MAX_INSN_SIZE, ip - to);

			bytes = copy_from_user_nmi(buf, (void __user *)to, size);
			if (bytes != size)
				return 0;

			kaddr = buf;
		} else
			kaddr = (void *)to;

		kernel_insn_init(&insn, kaddr);
		insn_get_length(&insn);
		to += insn.length;
	} while (to < ip);

	if (to == ip) {
		regs->ip = old_to;
		return 1;
	}

	return 0;
}

static int intel_pmu_save_and_restart(struct perf_event *event);
static void intel_pmu_disable_event(struct perf_event *event);

@@ -424,6 +494,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
	regs.bp = at->bp;
	regs.sp = at->sp;

	if (intel_pmu_pebs_fixup_ip(&regs))
		regs.flags |= PERF_EFLAGS_EXACT;
	else
		regs.flags &= ~PERF_EFLAGS_EXACT;

	if (perf_event_overflow(event, 1, &data, &regs))
		intel_pmu_disable_event(event);

@@ -487,6 +562,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
		regs.bp = at->bp;
		regs.sp = at->sp;

		if (intel_pmu_pebs_fixup_ip(&regs))
			regs.flags |= PERF_EFLAGS_EXACT;
		else
			regs.flags &= ~PERF_EFLAGS_EXACT;

		if (perf_event_overflow(event, 1, &data, &regs))
			intel_pmu_disable_event(event);
	}
+6 −0
Original line number Diff line number Diff line
@@ -294,6 +294,12 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_USER			(2 << 0)
#define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)

#define PERF_RECORD_MISC_EXACT			(1 << 14)
/*
 * Reserve the last bit to indicate some extended misc field
 */
#define PERF_RECORD_MISC_EXT_RESERVED		(1 << 15)

struct perf_event_header {
	__u32	type;
	__u16	misc;