Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 106544d8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "A bit larger than what I'd wish for - half of it is due to hw driver
  updates to Intel Ivy-Bridge which info got recently released,
  cycles:pp should work there now too, amongst other things.  (but we
  are generally making exceptions for hardware enablement of this type.)

  There are also callchain fixes in it - responding to mostly
  theoretical (but valid) concerns.  The tooling side sports perf.data
  endianness/portability fixes which did not make it for the merge
  window - and various other fixes as well."

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits)
  perf/x86: Check user address explicitly in copy_from_user_nmi()
  perf/x86: Check if user fp is valid
  perf: Limit callchains to 127
  perf/x86: Allow multiple stacks
  perf/x86: Update SNB PEBS constraints
  perf/x86: Enable/Add IvyBridge hardware support
  perf/x86: Implement cycles:p for SNB/IVB
  perf/x86: Fix Intel shared extra MSR allocation
  x86/decoder: Fix bsr/bsf/jmpe decoding with operand-size prefix
  perf: Remove duplicate invocation on perf_event_for_each
  perf uprobes: Remove unnecessary check before strlist__delete
  perf symbols: Check for valid dso before creating map
  perf evsel: Fix 32 bit values endianity swap for sample_id_all header
  perf session: Handle endianity swap on sample_id_all header data
  perf symbols: Handle different endians properly during symbol load
  perf evlist: Pass third argument to ioctl explicitly
  perf tools: Update ioctl documentation for PERF_IOC_FLAG_GROUP
  perf tools: Make --version show kernel version instead of pull req tag
  perf tools: Check if callchain is corrupted
  perf callchain: Make callchain cursors TLS
  ...
parents 03d8f540 db0dc75d
Loading
Loading
Loading
Loading
+6 −6
Original line number Original line Diff line number Diff line
@@ -34,8 +34,7 @@


#define user_addr_max() (current_thread_info()->addr_limit.seg)
#define user_addr_max() (current_thread_info()->addr_limit.seg)
#define __addr_ok(addr) 	\
#define __addr_ok(addr) 	\
	((unsigned long __force)(addr) <		\
	((unsigned long __force)(addr) < user_addr_max())
	 (current_thread_info()->addr_limit.seg))


/*
/*
 * Test whether a block of memory is a valid user space address.
 * Test whether a block of memory is a valid user space address.
@@ -47,14 +46,14 @@
 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
 */
 */


#define __range_not_ok(addr, size)					\
#define __range_not_ok(addr, size, limit)				\
({									\
({									\
	unsigned long flag, roksum;					\
	unsigned long flag, roksum;					\
	__chk_user_ptr(addr);						\
	__chk_user_ptr(addr);						\
	asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0"		\
	asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0"		\
	    : "=&r" (flag), "=r" (roksum)				\
	    : "=&r" (flag), "=r" (roksum)				\
	    : "1" (addr), "g" ((long)(size)),				\
	    : "1" (addr), "g" ((long)(size)),				\
	      "rm" (current_thread_info()->addr_limit.seg));		\
	      "rm" (limit));						\
	flag;								\
	flag;								\
})
})


@@ -77,7 +76,8 @@
 * checks that the pointer is in the user space range - after calling
 * checks that the pointer is in the user space range - after calling
 * this function, memory access functions may still return -EFAULT.
 * this function, memory access functions may still return -EFAULT.
 */
 */
#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
#define access_ok(type, addr, size) \
	(likely(__range_not_ok(addr, size, user_addr_max()) == 0))


/*
/*
 * The exception table consists of pairs of addresses relative to the
 * The exception table consists of pairs of addresses relative to the
+9 −2
Original line number Original line Diff line number Diff line
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
		if (!cpuc->shared_regs)
		if (!cpuc->shared_regs)
			goto error;
			goto error;
	}
	}
	cpuc->is_fake = 1;
	return cpuc;
	return cpuc;
error:
error:
	free_fake_cpuc(cpuc);
	free_fake_cpuc(cpuc);
@@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
}
}


static inline int
valid_user_frame(const void __user *fp, unsigned long size)
{
	return (__range_not_ok(fp, size, TASK_SIZE) == 0);
}

#ifdef CONFIG_COMPAT
#ifdef CONFIG_COMPAT


#include <asm/compat.h>
#include <asm/compat.h>
@@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
		if (bytes != sizeof(frame))
		if (bytes != sizeof(frame))
			break;
			break;


		if (fp < compat_ptr(regs->sp))
		if (!valid_user_frame(fp, sizeof(frame)))
			break;
			break;


		perf_callchain_store(entry, frame.return_address);
		perf_callchain_store(entry, frame.return_address);
@@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
		if (bytes != sizeof(frame))
		if (bytes != sizeof(frame))
			break;
			break;


		if ((unsigned long)fp < regs->sp)
		if (!valid_user_frame(fp, sizeof(frame)))
			break;
			break;


		perf_callchain_store(entry, frame.return_address);
		perf_callchain_store(entry, frame.return_address);
+2 −0
Original line number Original line Diff line number Diff line
@@ -117,6 +117,7 @@ struct cpu_hw_events {
	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */


	unsigned int		group_flag;
	unsigned int		group_flag;
	int			is_fake;


	/*
	/*
	 * Intel DebugStore bits
	 * Intel DebugStore bits
@@ -364,6 +365,7 @@ struct x86_pmu {
	int		pebs_record_size;
	int		pebs_record_size;
	void		(*drain_pebs)(struct pt_regs *regs);
	void		(*drain_pebs)(struct pt_regs *regs);
	struct event_constraint *pebs_constraints;
	struct event_constraint *pebs_constraints;
	void		(*pebs_aliases)(struct perf_event *event);


	/*
	/*
	 * Intel LBR
	 * Intel LBR
+108 −37
Original line number Original line Diff line number Diff line
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
	return NULL;
	return NULL;
}
}


static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
static int intel_alt_er(int idx)
{
{
	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
		return false;
		return idx;


	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
	if (idx == EXTRA_REG_RSP_0)
		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
		return EXTRA_REG_RSP_1;
		event->hw.config |= 0x01bb;

		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
	if (idx == EXTRA_REG_RSP_1)
		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
		return EXTRA_REG_RSP_0;
	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {

	return idx;
}

static void intel_fixup_er(struct perf_event *event, int idx)
{
	event->hw.extra_reg.idx = idx;

	if (idx == EXTRA_REG_RSP_0) {
		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
		event->hw.config |= 0x01b7;
		event->hw.config |= 0x01b7;
		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
	} else if (idx == EXTRA_REG_RSP_1) {
		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
		event->hw.config |= 0x01bb;
		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
	}
	}

	if (event->hw.extra_reg.idx == orig_idx)
		return false;

	return true;
}
}


/*
/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
	struct event_constraint *c = &emptyconstraint;
	struct event_constraint *c = &emptyconstraint;
	struct er_account *era;
	struct er_account *era;
	unsigned long flags;
	unsigned long flags;
	int orig_idx = reg->idx;
	int idx = reg->idx;


	/* already allocated shared msr */
	/*
	if (reg->alloc)
	 * reg->alloc can be set due to existing state, so for fake cpuc we
	 * need to ignore this, otherwise we might fail to allocate proper fake
	 * state for this extra reg constraint. Also see the comment below.
	 */
	if (reg->alloc && !cpuc->is_fake)
		return NULL; /* call x86_get_event_constraint() */
		return NULL; /* call x86_get_event_constraint() */


again:
again:
	era = &cpuc->shared_regs->regs[reg->idx];
	era = &cpuc->shared_regs->regs[idx];
	/*
	/*
	 * we use spin_lock_irqsave() to avoid lockdep issues when
	 * we use spin_lock_irqsave() to avoid lockdep issues when
	 * passing a fake cpuc
	 * passing a fake cpuc
@@ -1173,6 +1183,29 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,


	if (!atomic_read(&era->ref) || era->config == reg->config) {
	if (!atomic_read(&era->ref) || era->config == reg->config) {


		/*
		 * If its a fake cpuc -- as per validate_{group,event}() we
		 * shouldn't touch event state and we can avoid doing so
		 * since both will only call get_event_constraints() once
		 * on each event, this avoids the need for reg->alloc.
		 *
		 * Not doing the ER fixup will only result in era->reg being
		 * wrong, but since we won't actually try and program hardware
		 * this isn't a problem either.
		 */
		if (!cpuc->is_fake) {
			if (idx != reg->idx)
				intel_fixup_er(event, idx);

			/*
			 * x86_schedule_events() can call get_event_constraints()
			 * multiple times on events in the case of incremental
			 * scheduling(). reg->alloc ensures we only do the ER
			 * allocation once.
			 */
			reg->alloc = 1;
		}

		/* lock in msr value */
		/* lock in msr value */
		era->config = reg->config;
		era->config = reg->config;
		era->reg = reg->reg;
		era->reg = reg->reg;
@@ -1180,18 +1213,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
		/* one more user */
		/* one more user */
		atomic_inc(&era->ref);
		atomic_inc(&era->ref);


		/* no need to reallocate during incremental event scheduling */
		reg->alloc = 1;

		/*
		/*
		 * need to call x86_get_event_constraint()
		 * need to call x86_get_event_constraint()
		 * to check if associated event has constraints
		 * to check if associated event has constraints
		 */
		 */
		c = NULL;
		c = NULL;
	} else if (intel_try_alt_er(event, orig_idx)) {
	} else {
		idx = intel_alt_er(idx);
		if (idx != reg->idx) {
			raw_spin_unlock_irqrestore(&era->lock, flags);
			raw_spin_unlock_irqrestore(&era->lock, flags);
			goto again;
			goto again;
		}
		}
	}
	raw_spin_unlock_irqrestore(&era->lock, flags);
	raw_spin_unlock_irqrestore(&era->lock, flags);


	return c;
	return c;
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
	struct er_account *era;
	struct er_account *era;


	/*
	/*
	 * only put constraint if extra reg was actually
	 * Only put constraint if extra reg was actually allocated. Also takes
	 * allocated. Also takes care of event which do
	 * care of event which do not use an extra shared reg.
	 * not use an extra shared reg
	 *
	 * Also, if this is a fake cpuc we shouldn't touch any event state
	 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
	 * either since it'll be thrown out.
	 */
	 */
	if (!reg->alloc)
	if (!reg->alloc || cpuc->is_fake)
		return;
		return;


	era = &cpuc->shared_regs->regs[reg->idx];
	era = &cpuc->shared_regs->regs[reg->idx];
@@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
	intel_put_shared_regs_event_constraints(cpuc, event);
	intel_put_shared_regs_event_constraints(cpuc, event);
}
}


static int intel_pmu_hw_config(struct perf_event *event)
static void intel_pebs_aliases_core2(struct perf_event *event)
{
{
	int ret = x86_pmu_hw_config(event);
	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {

	if (ret)
		return ret;

	if (event->attr.precise_ip &&
	    (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
		/*
		/*
		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
		 * (0x003c) so that we can use it with PEBS.
		 * (0x003c) so that we can use it with PEBS.
@@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
		 */
		 */
		u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
		u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);


		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
		event->hw.config = alt_config;
	}
}

static void intel_pebs_aliases_snb(struct perf_event *event)
{
	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
		/*
		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
		 * (0x003c) so that we can use it with PEBS.
		 *
		 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
		 * PEBS capable. However we can use UOPS_RETIRED.ALL
		 * (0x01c2), which is a PEBS capable event, to get the same
		 * count.
		 *
		 * UOPS_RETIRED.ALL counts the number of cycles that retires
		 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
		 * larger than the maximum number of micro-ops that can be
		 * retired per cycle (4) and then inverting the condition, we
		 * count all cycles that retire 16 or less micro-ops, which
		 * is every cycle.
		 *
		 * Thereby we gain a PEBS capable cycle counter.
		 */
		u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);


		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
		event->hw.config = alt_config;
		event->hw.config = alt_config;
	}
	}
}

static int intel_pmu_hw_config(struct perf_event *event)
{
	int ret = x86_pmu_hw_config(event);

	if (ret)
		return ret;

	if (event->attr.precise_ip && x86_pmu.pebs_aliases)
		x86_pmu.pebs_aliases(event);


	if (intel_pmu_needs_lbr_smpl(event)) {
	if (intel_pmu_needs_lbr_smpl(event)) {
		ret = intel_pmu_setup_lbr_filter(event);
		ret = intel_pmu_setup_lbr_filter(event);
@@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = {
	.max_period		= (1ULL << 31) - 1,
	.max_period		= (1ULL << 31) - 1,
	.get_event_constraints	= intel_get_event_constraints,
	.get_event_constraints	= intel_get_event_constraints,
	.put_event_constraints	= intel_put_event_constraints,
	.put_event_constraints	= intel_put_event_constraints,
	.pebs_aliases		= intel_pebs_aliases_core2,


	.format_attrs		= intel_arch3_formats_attr,
	.format_attrs		= intel_arch3_formats_attr,


@@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void)
		break;
		break;


	case 42: /* SandyBridge */
	case 42: /* SandyBridge */
		x86_add_quirk(intel_sandybridge_quirk);
	case 45: /* SandyBridge, "Romely-EP" */
	case 45: /* SandyBridge, "Romely-EP" */
		x86_add_quirk(intel_sandybridge_quirk);
	case 58: /* IvyBridge */
		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
		       sizeof(hw_cache_event_ids));
		       sizeof(hw_cache_event_ids));


@@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void)


		x86_pmu.event_constraints = intel_snb_event_constraints;
		x86_pmu.event_constraints = intel_snb_event_constraints;
		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
		x86_pmu.extra_regs = intel_snb_extra_regs;
		x86_pmu.extra_regs = intel_snb_extra_regs;
		/* all extra regs are per-cpu when HT is on */
		/* all extra regs are per-cpu when HT is on */
		x86_pmu.er_flags |= ERF_HAS_RSP_1;
		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+1 −8
Original line number Original line Diff line number Diff line
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
	INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
	INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
	INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */
	INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */
	INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
	INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
	INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
	INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
	INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
	INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
	INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
	INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
	INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
	INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
Loading