Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ad83242a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "Mostly tooling fixes, plus two uncore-PMU fixes, an uprobes fix, a
  perf-cgroups fix and an AUX events fix"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel/uncore: Add enable_box for client MSR uncore
  perf/x86/intel/uncore: Fix uncore num_counters
  uprobes/x86: Fix RIP-relative handling of EVEX-encoded instructions
  perf/core: Set cgroup in CPU contexts for new cgroup events
  perf/core: Fix sideband list-iteration vs. event ordering NULL pointer deference crash
  perf probe ppc64le: Fix probe location when using DWARF
  perf probe: Add function to post process kernel trace events
  tools: Sync cpufeatures headers with the kernel
  toops: Sync tools/include/uapi/linux/bpf.h with the kernel
  tools: Sync cpufeatures.h and vmx.h with the kernel
  perf probe: Support signedness casting
  perf stat: Avoid skew when reading events
  perf probe: Fix module name matching
  perf probe: Adjust map->reloc offset when finding kernel symbol from map
  perf hists: Trim libtraceevent trace_seq buffers
  perf script: Add 'bpf-output' field to usage message
parents 1f8083c6 95f3be79
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -100,6 +100,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
	}
}

static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
{
	wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
		SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
}

static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
{
	if (box->pmu->pmu_idx == 0)
@@ -127,6 +133,7 @@ static struct attribute_group snb_uncore_format_group = {

static struct intel_uncore_ops snb_uncore_msr_ops = {
	.init_box	= snb_uncore_msr_init_box,
	.enable_box	= snb_uncore_msr_enable_box,
	.exit_box	= snb_uncore_msr_exit_box,
	.disable_event	= snb_uncore_msr_disable_event,
	.enable_event	= snb_uncore_msr_enable_event,
@@ -192,6 +199,12 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
	}
}

static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
{
	wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
		SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
}

static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
{
	if (box->pmu->pmu_idx == 0)
@@ -200,6 +213,7 @@ static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)

static struct intel_uncore_ops skl_uncore_msr_ops = {
	.init_box	= skl_uncore_msr_init_box,
	.enable_box	= skl_uncore_msr_enable_box,
	.exit_box	= skl_uncore_msr_exit_box,
	.disable_event	= snb_uncore_msr_disable_event,
	.enable_event	= snb_uncore_msr_enable_event,
+5 −5
Original line number Diff line number Diff line
@@ -2626,7 +2626,7 @@ void hswep_uncore_cpu_init(void)

static struct intel_uncore_type hswep_uncore_ha = {
	.name		= "ha",
	.num_counters   = 5,
	.num_counters   = 4,
	.num_boxes	= 2,
	.perf_ctr_bits	= 48,
	SNBEP_UNCORE_PCI_COMMON_INIT(),
@@ -2645,7 +2645,7 @@ static struct uncore_event_desc hswep_uncore_imc_events[] = {

static struct intel_uncore_type hswep_uncore_imc = {
	.name		= "imc",
	.num_counters   = 5,
	.num_counters   = 4,
	.num_boxes	= 8,
	.perf_ctr_bits	= 48,
	.fixed_ctr_bits	= 48,
@@ -2691,7 +2691,7 @@ static struct intel_uncore_type hswep_uncore_irp = {

static struct intel_uncore_type hswep_uncore_qpi = {
	.name			= "qpi",
	.num_counters		= 5,
	.num_counters		= 4,
	.num_boxes		= 3,
	.perf_ctr_bits		= 48,
	.perf_ctr		= SNBEP_PCI_PMON_CTR0,
@@ -2773,7 +2773,7 @@ static struct event_constraint hswep_uncore_r3qpi_constraints[] = {

static struct intel_uncore_type hswep_uncore_r3qpi = {
	.name		= "r3qpi",
	.num_counters   = 4,
	.num_counters   = 3,
	.num_boxes	= 3,
	.perf_ctr_bits	= 44,
	.constraints	= hswep_uncore_r3qpi_constraints,
@@ -2972,7 +2972,7 @@ static struct intel_uncore_type bdx_uncore_ha = {

static struct intel_uncore_type bdx_uncore_imc = {
	.name		= "imc",
	.num_counters   = 5,
	.num_counters   = 4,
	.num_boxes	= 8,
	.perf_ctr_bits	= 48,
	.fixed_ctr_bits	= 48,
+11 −11
Original line number Diff line number Diff line
@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
		*cursor &= 0xfe;
	}
	/*
	 * Similar treatment for VEX3 prefix.
	 * TODO: add XOP/EVEX treatment when insn decoder supports them
	 * Similar treatment for VEX3/EVEX prefix.
	 * TODO: add XOP treatment when insn decoder supports them
	 */
	if (insn->vex_prefix.nbytes == 3) {
	if (insn->vex_prefix.nbytes >= 3) {
		/*
		 * vex2:     c5    rvvvvLpp   (has no b bit)
		 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
		 * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
		 *   (evex will need setting of both b and x since
		 *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
		 * Setting VEX3.b (setting because it has inverted meaning):
		 * Setting VEX3.b (setting because it has inverted meaning).
		 * Setting EVEX.x since (in non-SIB encoding) EVEX.x
		 * is the 4th bit of MODRM.rm, and needs the same treatment.
		 * For VEX3-encoded insns, VEX3.x value has no effect in
		 * non-SIB encoding, the change is superfluous but harmless.
		 */
		cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
		*cursor |= 0x20;
		*cursor |= 0x60;
	}

	/*
@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)

	reg = MODRM_REG(insn);	/* Fetch modrm.reg */
	reg2 = 0xff;		/* Fetch vex.vvvv */
	if (insn->vex_prefix.nbytes == 2)
		reg2 = insn->vex_prefix.bytes[1];
	else if (insn->vex_prefix.nbytes == 3)
	if (insn->vex_prefix.nbytes)
		reg2 = insn->vex_prefix.bytes[2];
	/*
	 * TODO: add XOP, EXEV vvvv reading.
	 * TODO: add XOP vvvv reading.
	 *
	 * vex.vvvv field is in bits 6-3, bits are inverted.
	 * But in 32-bit mode, high-order bit may be ignored.
+4 −0
Original line number Diff line number Diff line
@@ -743,7 +743,9 @@ struct perf_event_context {
	u64				parent_gen;
	u64				generation;
	int				pin_count;
#ifdef CONFIG_CGROUP_PERF
	int				nr_cgroups;	 /* cgroup evts */
#endif
	void				*task_ctx_data; /* pmu specific data */
	struct rcu_head			rcu_head;
};
@@ -769,7 +771,9 @@ struct perf_cpu_context {
	unsigned int			hrtimer_active;

	struct pmu			*unique_pmu;
#ifdef CONFIG_CGROUP_PERF
	struct perf_cgroup		*cgrp;
#endif
};

struct perf_output_handle {
+54 −23
Original line number Diff line number Diff line
@@ -843,6 +843,32 @@ perf_cgroup_mark_enabled(struct perf_event *event,
		}
	}
}

/*
 * Update cpuctx->cgrp so that it is set when first cgroup event is added and
 * cleared when last cgroup event is removed.
 */
static inline void
list_update_cgroup_event(struct perf_event *event,
			 struct perf_event_context *ctx, bool add)
{
	struct perf_cpu_context *cpuctx;

	if (!is_cgroup_event(event))
		return;

	if (add && ctx->nr_cgroups++)
		return;
	else if (!add && --ctx->nr_cgroups)
		return;
	/*
	 * Because cgroup events are always per-cpu events,
	 * this will always be called from the right CPU.
	 */
	cpuctx = __get_cpu_context(ctx);
	cpuctx->cgrp = add ? event->cgrp : NULL;
}

#else /* !CONFIG_CGROUP_PERF */

static inline bool
@@ -920,6 +946,13 @@ perf_cgroup_mark_enabled(struct perf_event *event,
			 struct perf_event_context *ctx)
{
}

static inline void
list_update_cgroup_event(struct perf_event *event,
			 struct perf_event_context *ctx, bool add)
{
}

#endif

/*
@@ -1392,6 +1425,7 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
static void
list_add_event(struct perf_event *event, struct perf_event_context *ctx)
{

	lockdep_assert_held(&ctx->lock);

	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1412,8 +1446,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
		list_add_tail(&event->group_entry, list);
	}

	if (is_cgroup_event(event))
		ctx->nr_cgroups++;
	list_update_cgroup_event(event, ctx, true);

	list_add_rcu(&event->event_entry, &ctx->event_list);
	ctx->nr_events++;
@@ -1581,8 +1614,6 @@ static void perf_group_attach(struct perf_event *event)
static void
list_del_event(struct perf_event *event, struct perf_event_context *ctx)
{
	struct perf_cpu_context *cpuctx;

	WARN_ON_ONCE(event->ctx != ctx);
	lockdep_assert_held(&ctx->lock);

@@ -1594,20 +1625,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)

	event->attach_state &= ~PERF_ATTACH_CONTEXT;

	if (is_cgroup_event(event)) {
		ctx->nr_cgroups--;
		/*
		 * Because cgroup events are always per-cpu events, this will
		 * always be called from the right CPU.
		 */
		cpuctx = __get_cpu_context(ctx);
		/*
		 * If there are no more cgroup events then clear cgrp to avoid
		 * stale pointer in update_cgrp_time_from_cpuctx().
		 */
		if (!ctx->nr_cgroups)
			cpuctx->cgrp = NULL;
	}
	list_update_cgroup_event(event, ctx, false);

	ctx->nr_events--;
	if (event->attr.inherit_stat)
@@ -1716,8 +1734,8 @@ static inline int pmu_filter_match(struct perf_event *event)
static inline int
event_filter_match(struct perf_event *event)
{
	return (event->cpu == -1 || event->cpu == smp_processor_id())
	    && perf_cgroup_match(event) && pmu_filter_match(event);
	return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
	       perf_cgroup_match(event) && pmu_filter_match(event);
}

static void
@@ -1737,8 +1755,8 @@ event_sched_out(struct perf_event *event,
	 * maintained, otherwise bogus information is return
	 * via read() for time_enabled, time_running:
	 */
	if (event->state == PERF_EVENT_STATE_INACTIVE
	    && !event_filter_match(event)) {
	if (event->state == PERF_EVENT_STATE_INACTIVE &&
	    !event_filter_match(event)) {
		delta = tstamp - event->tstamp_stopped;
		event->tstamp_running += delta;
		event->tstamp_stopped = tstamp;
@@ -2236,10 +2254,15 @@ perf_install_in_context(struct perf_event_context *ctx,

	lockdep_assert_held(&ctx->mutex);

	event->ctx = ctx;
	if (event->cpu != -1)
		event->cpu = cpu;

	/*
	 * Ensures that if we can observe event->ctx, both the event and ctx
	 * will be 'complete'. See perf_iterate_sb_cpu().
	 */
	smp_store_release(&event->ctx, ctx);

	if (!task) {
		cpu_function_call(cpu, __perf_install_in_context, event);
		return;
@@ -5969,6 +5992,14 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
	struct perf_event *event;

	list_for_each_entry_rcu(event, &pel->list, sb_list) {
		/*
		 * Skip events that are not fully formed yet; ensure that
		 * if we observe event->ctx, both event and ctx will be
		 * complete enough. See perf_install_in_context().
		 */
		if (!smp_load_acquire(&event->ctx))
			continue;

		if (event->state < PERF_EVENT_STATE_INACTIVE)
			continue;
		if (!event_filter_match(event))
Loading