Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c58267e9 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "Kernel side changes mostly consist of work on x86 PMU drivers:

   - x86 Intel PT (hardware CPU tracer) improvements (Alexander
     Shishkin)

   - x86 Intel CQM (cache quality monitoring) improvements (Thomas
     Gleixner)

   - x86 Intel PEBSv3 support (Peter Zijlstra)

   - x86 Intel PEBS interrupt batching support for lower overhead
     sampling (Zheng Yan, Kan Liang)

   - x86 PMU scheduler fixes and improvements (Peter Zijlstra)

  There's too many tooling improvements to list them all - here are a
  few select highlights:

  'perf bench':

      - Introduce new 'perf bench futex' benchmark: 'wake-parallel', to
        measure parallel waker threads generating contention for kernel
        locks (hb->lock). (Davidlohr Bueso)

  'perf top', 'perf report':

      - Allow disabling/enabling events dynamicaly in 'perf top':
        a 'perf top' session can instantly become a 'perf report'
        one, i.e. going from dynamic analysis to a static one,
        returning to a dynamic one is possible, to toogle the
        modes, just press 'f' to 'freeze/unfreeze' the sampling. (Arnaldo Carvalho de Melo)

      - Make Ctrl-C stop processing on TUI, allowing interrupting the load of big
        perf.data files (Namhyung Kim)

  'perf probe': (Masami Hiramatsu)

      - Support glob wildcards for function name
      - Support $params special probe argument: Collect all function arguments
      - Make --line checks validate C-style function name.
      - Add --no-inlines option to avoid searching inline functions
      - Greatly speed up 'perf probe --list' by caching debuginfo.
      - Improve --filter support for 'perf probe', allowing using its arguments
        on other commands, as --add, --del, etc.

  'perf sched':

      - Add option in 'perf sched' to merge like comms to lat output (Josef Bacik)

  Plus tons of infrastructure work - in particular preparation for
  upcoming threaded perf report support, but also lots of other work -
  and fixes and other improvements.  See (much) more details in the
  shortlog and in the git log"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (305 commits)
  perf tools: Configurable per thread proc map processing time out
  perf tools: Add time out to force stop proc map processing
  perf report: Fix sort__sym_cmp to also compare end of symbol
  perf hists browser: React to unassigned hotkey pressing
  perf top: Tell the user how to unfreeze events after pressing 'f'
  perf hists browser: Honour the help line provided by builtin-{top,report}.c
  perf hists browser: Do not exit when 'f' is pressed in 'report' mode
  perf top: Replace CTRL+z with 'f' as hotkey for enable/disable events
  perf annotate: Rename source_line_percent to source_line_samples
  perf annotate: Display total number of samples with --show-total-period
  perf tools: Ensure thread-stack is flushed
  perf top: Allow disabling/enabling events dynamicly
  perf evlist: Add toggle_enable() method
  perf trace: Fix race condition at the end of started workloads
  perf probe: Speed up perf probe --list by caching debuginfo
  perf probe: Show usage even if the last event is skipped
  perf tools: Move libtraceevent dynamic list to separated LDFLAGS variable
  perf tools: Fix a problem when opening old perf.data with different byte order
  perf tools: Ignore .config-detected in .gitignore
  perf probe: Fix to return error if no probe is added
  ...
parents 1bf7067c a9a3cd90
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -7634,7 +7634,6 @@ F: kernel/delayacct.c

PERFORMANCE EVENTS SUBSYSTEM
M:	Peter Zijlstra <a.p.zijlstra@chello.nl>
M:	Paul Mackerras <paulus@samba.org>
M:	Ingo Molnar <mingo@redhat.com>
M:	Arnaldo Carvalho de Melo <acme@kernel.org>
L:	linux-kernel@vger.kernel.org
+2 −3
Original line number Diff line number Diff line
@@ -215,7 +215,6 @@ VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD))

export srctree objtree VPATH


# SUBARCH tells the usermode build what the underlying arch is.  That is set
# first, and if a usermode build is happening, the "ARCH=um" on the command
# line overrides the setting of ARCH below.  If a native build is happening,
@@ -1497,11 +1496,11 @@ image_name:
# Clear a bunch of variables before executing the submake
tools/: FORCE
	$(Q)mkdir -p $(objtree)/tools
	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/
	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/

tools/%: FORCE
	$(Q)mkdir -p $(objtree)/tools
	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/ $*
	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/ $*

# Single targets
# ---------------------------------------------------------------------------
+10 −10
Original line number Diff line number Diff line
@@ -881,10 +881,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
			if (x86_pmu.commit_scheduling)
				x86_pmu.commit_scheduling(cpuc, i, assign[i]);
		}
	}

	if (!assign || unsched) {

	} else {
		for (i = 0; i < n; i++) {
			e = cpuc->event_list[i];
			/*
@@ -1097,6 +1094,8 @@ int x86_perf_event_set_period(struct perf_event *event)

	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;

	if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
	    local64_read(&hwc->prev_count) != (u64)-left) {
		/*
		 * The hw event starts counting from this event offset,
		 * mark it to be able to extra future deltas:
@@ -1104,6 +1103,7 @@ int x86_perf_event_set_period(struct perf_event *event)
		local64_set(&hwc->prev_count, (u64)-left);

		wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
	}

	/*
	 * Due to erratum on certan cpu we need
+18 −3
Original line number Diff line number Diff line
@@ -75,6 +75,8 @@ struct event_constraint {
#define PERF_X86_EVENT_DYNAMIC		0x0080 /* dynamic alloc'd constraint */
#define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */
#define PERF_X86_EVENT_EXCL_ACCT	0x0200 /* accounted EXCL event */
#define PERF_X86_EVENT_AUTO_RELOAD	0x0400 /* use PEBS auto-reload */
#define PERF_X86_EVENT_FREERUNNING	0x0800 /* use freerunning PEBS */


struct amd_nb {
@@ -87,6 +89,18 @@ struct amd_nb {
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS		8

/*
 * Flags PEBS can handle without an PMI.
 *
 * TID can only be handled by flushing at context switch.
 *
 */
#define PEBS_FREERUNNING_FLAGS \
	(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
	PERF_SAMPLE_TRANSACTION)

/*
 * A debug store configuration.
 *
@@ -133,7 +147,6 @@ enum intel_excl_state_type {
};

struct intel_excl_states {
	enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
	enum intel_excl_state_type state[X86_PMC_IDX_MAX];
	bool sched_started; /* true if scheduling has started */
};
@@ -527,10 +540,10 @@ struct x86_pmu {
	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
						 struct perf_event *event);

	void		(*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);

	void		(*start_scheduling)(struct cpu_hw_events *cpuc);

	void		(*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);

	void		(*stop_scheduling)(struct cpu_hw_events *cpuc);

	struct event_constraint *event_constraints;
@@ -866,6 +879,8 @@ void intel_pmu_pebs_enable_all(void);

void intel_pmu_pebs_disable_all(void);

void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);

void intel_ds_init(void);

void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+99 −125
Original line number Diff line number Diff line
@@ -1903,9 +1903,8 @@ static void
intel_start_scheduling(struct cpu_hw_events *cpuc)
{
	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
	struct intel_excl_states *xl, *xlo;
	struct intel_excl_states *xl;
	int tid = cpuc->excl_thread_id;
	int o_tid = 1 - tid; /* sibling thread */

	/*
	 * nothing needed if in group validation mode
@@ -1916,10 +1915,9 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
	/*
	 * no exclusion needed
	 */
	if (!excl_cntrs)
	if (WARN_ON_ONCE(!excl_cntrs))
		return;

	xlo = &excl_cntrs->states[o_tid];
	xl = &excl_cntrs->states[tid];

	xl->sched_started = true;
@@ -1928,22 +1926,41 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
	 * in stop_event_scheduling()
	 * makes scheduling appear as a transaction
	 */
	WARN_ON_ONCE(!irqs_disabled());
	raw_spin_lock(&excl_cntrs->lock);
}

	/*
	 * save initial state of sibling thread
	 */
	memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
{
	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
	struct event_constraint *c = cpuc->event_constraint[idx];
	struct intel_excl_states *xl;
	int tid = cpuc->excl_thread_id;

	if (cpuc->is_fake || !is_ht_workaround_enabled())
		return;

	if (WARN_ON_ONCE(!excl_cntrs))
		return;

	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
		return;

	xl = &excl_cntrs->states[tid];

	lockdep_assert_held(&excl_cntrs->lock);

	if (c->flags & PERF_X86_EVENT_EXCL)
		xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
	else
		xl->state[cntr] = INTEL_EXCL_SHARED;
}

static void
intel_stop_scheduling(struct cpu_hw_events *cpuc)
{
	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
	struct intel_excl_states *xl, *xlo;
	struct intel_excl_states *xl;
	int tid = cpuc->excl_thread_id;
	int o_tid = 1 - tid; /* sibling thread */

	/*
	 * nothing needed if in group validation mode
@@ -1953,17 +1970,11 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
	/*
	 * no exclusion needed
	 */
	if (!excl_cntrs)
	if (WARN_ON_ONCE(!excl_cntrs))
		return;

	xlo = &excl_cntrs->states[o_tid];
	xl = &excl_cntrs->states[tid];

	/*
	 * make new sibling thread state visible
	 */
	memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));

	xl->sched_started = false;
	/*
	 * release shared state lock (acquired in intel_start_scheduling())
@@ -1975,12 +1986,10 @@ static struct event_constraint *
intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
			   int idx, struct event_constraint *c)
{
	struct event_constraint *cx;
	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
	struct intel_excl_states *xl, *xlo;
	int is_excl, i;
	struct intel_excl_states *xlo;
	int tid = cpuc->excl_thread_id;
	int o_tid = 1 - tid; /* alternate */
	int is_excl, i;

	/*
	 * validating a group does not require
@@ -1992,27 +2001,8 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
	/*
	 * no exclusion needed
	 */
	if (!excl_cntrs)
	if (WARN_ON_ONCE(!excl_cntrs))
		return c;
	/*
	 * event requires exclusive counter access
	 * across HT threads
	 */
	is_excl = c->flags & PERF_X86_EVENT_EXCL;
	if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
		event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
		if (!cpuc->n_excl++)
			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
	}

	/*
	 * xl = state of current HT
	 * xlo = state of sibling HT
	 */
	xl = &excl_cntrs->states[tid];
	xlo = &excl_cntrs->states[o_tid];

	cx = c;

	/*
	 * because we modify the constraint, we need
@@ -2023,10 +2013,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
	 * been cloned (marked dynamic)
	 */
	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {

		/* sanity check */
		if (idx < 0)
			return &emptyconstraint;
		struct event_constraint *cx;

		/*
		 * grab pre-allocated constraint entry
@@ -2037,13 +2024,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
		 * initialize dynamic constraint
		 * with static constraint
		 */
		memcpy(cx, c, sizeof(*cx));
		*cx = *c;

		/*
		 * mark constraint as dynamic, so we
		 * can free it later on
		 */
		cx->flags |= PERF_X86_EVENT_DYNAMIC;
		c = cx;
	}

	/*
@@ -2053,6 +2041,22 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
	 * of this function
	 */

	/*
	 * state of sibling HT
	 */
	xlo = &excl_cntrs->states[tid ^ 1];

	/*
	 * event requires exclusive counter access
	 * across HT threads
	 */
	is_excl = c->flags & PERF_X86_EVENT_EXCL;
	if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
		event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
		if (!cpuc->n_excl++)
			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
	}

	/*
	 * Modify static constraint with current dynamic
	 * state of thread
@@ -2061,37 +2065,37 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
	 * SHARED   : sibling counter measuring non-exclusive event
	 * UNUSED   : sibling counter unused
	 */
	for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
	for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
		/*
		 * exclusive event in sibling counter
		 * our corresponding counter cannot be used
		 * regardless of our event
		 */
		if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
			__clear_bit(i, cx->idxmsk);
		if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
			__clear_bit(i, c->idxmsk);
		/*
		 * if measuring an exclusive event, sibling
		 * measuring non-exclusive, then counter cannot
		 * be used
		 */
		if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
			__clear_bit(i, cx->idxmsk);
		if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
			__clear_bit(i, c->idxmsk);
	}

	/*
	 * recompute actual bit weight for scheduling algorithm
	 */
	cx->weight = hweight64(cx->idxmsk64);
	c->weight = hweight64(c->idxmsk64);

	/*
	 * if we return an empty mask, then switch
	 * back to static empty constraint to avoid
	 * the cost of freeing later on
	 */
	if (cx->weight == 0)
		cx = &emptyconstraint;
	if (c->weight == 0)
		c = &emptyconstraint;

	return cx;
	return c;
}

static struct event_constraint *
@@ -2124,10 +2128,8 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
{
	struct hw_perf_event *hwc = &event->hw;
	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
	struct intel_excl_states *xlo, *xl;
	unsigned long flags = 0; /* keep compiler happy */
	int tid = cpuc->excl_thread_id;
	int o_tid = 1 - tid;
	struct intel_excl_states *xl;

	/*
	 * nothing needed if in group validation mode
@@ -2135,36 +2137,35 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
	if (cpuc->is_fake)
		return;

	WARN_ON_ONCE(!excl_cntrs);

	if (!excl_cntrs)
	if (WARN_ON_ONCE(!excl_cntrs))
		return;

	xl = &excl_cntrs->states[tid];
	xlo = &excl_cntrs->states[o_tid];
	if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
		hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
		if (!--cpuc->n_excl)
			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
	}

	/*
	 * If event was actually assigned, then mark the counter state as
	 * unused now.
	 */
	if (hwc->idx >= 0) {
		xl = &excl_cntrs->states[tid];

		/*
		 * put_constraint may be called from x86_schedule_events()
		 * which already has the lock held so here make locking
	 * conditional
		 * conditional.
		 */
		if (!xl->sched_started)
		raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
			raw_spin_lock(&excl_cntrs->lock);

	/*
	 * if event was actually assigned, then mark the
	 * counter state as unused now
	 */
	if (hwc->idx >= 0)
		xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
		xl->state[hwc->idx] = INTEL_EXCL_UNUSED;

		if (!xl->sched_started)
		raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
			raw_spin_unlock(&excl_cntrs->lock);
	}
}

static void
@@ -2196,41 +2197,6 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
		intel_put_excl_constraints(cpuc, event);
}

static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
{
	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
	struct event_constraint *c = cpuc->event_constraint[idx];
	struct intel_excl_states *xlo, *xl;
	int tid = cpuc->excl_thread_id;
	int o_tid = 1 - tid;
	int is_excl;

	if (cpuc->is_fake || !c)
		return;

	is_excl = c->flags & PERF_X86_EVENT_EXCL;

	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
		return;

	WARN_ON_ONCE(!excl_cntrs);

	if (!excl_cntrs)
		return;

	xl = &excl_cntrs->states[tid];
	xlo = &excl_cntrs->states[o_tid];

	WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));

	if (cntr >= 0) {
		if (is_excl)
			xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
		else
			xlo->init_state[cntr] = INTEL_EXCL_SHARED;
	}
}

static void intel_pebs_aliases_core2(struct perf_event *event)
{
	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
@@ -2294,8 +2260,15 @@ static int intel_pmu_hw_config(struct perf_event *event)
	if (ret)
		return ret;

	if (event->attr.precise_ip && x86_pmu.pebs_aliases)
	if (event->attr.precise_ip) {
		if (!event->attr.freq) {
			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
			if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
				event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
		}
		if (x86_pmu.pebs_aliases)
			x86_pmu.pebs_aliases(event);
	}

	if (needs_branch_stack(event)) {
		ret = intel_pmu_setup_lbr_filter(event);
@@ -2544,19 +2517,11 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
{
	struct intel_excl_cntrs *c;
	int i;

	c = kzalloc_node(sizeof(struct intel_excl_cntrs),
			 GFP_KERNEL, cpu_to_node(cpu));
	if (c) {
		raw_spin_lock_init(&c->lock);
		for (i = 0; i < X86_PMC_IDX_MAX; i++) {
			c->states[0].state[i] = INTEL_EXCL_UNUSED;
			c->states[0].init_state[i] = INTEL_EXCL_UNUSED;

			c->states[1].state[i] = INTEL_EXCL_UNUSED;
			c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
		}
		c->core_id = -1;
	}
	return c;
@@ -2677,6 +2642,15 @@ static void intel_pmu_cpu_dying(int cpu)
	fini_debug_store_on_cpu(cpu);
}

static void intel_pmu_sched_task(struct perf_event_context *ctx,
				 bool sched_in)
{
	if (x86_pmu.pebs_active)
		intel_pmu_pebs_sched_task(ctx, sched_in);
	if (x86_pmu.lbr_nr)
		intel_pmu_lbr_sched_task(ctx, sched_in);
}

PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");

PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -2766,7 +2740,7 @@ static __initconst const struct x86_pmu intel_pmu = {
	.cpu_starting		= intel_pmu_cpu_starting,
	.cpu_dying		= intel_pmu_cpu_dying,
	.guest_get_msrs		= intel_guest_get_msrs,
	.sched_task		= intel_pmu_lbr_sched_task,
	.sched_task		= intel_pmu_sched_task,
};

static __init void intel_clovertown_quirk(void)
@@ -2939,8 +2913,8 @@ static __init void intel_ht_bug(void)
{
	x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;

	x86_pmu.commit_scheduling = intel_commit_scheduling;
	x86_pmu.start_scheduling = intel_start_scheduling;
	x86_pmu.commit_scheduling = intel_commit_scheduling;
	x86_pmu.stop_scheduling = intel_stop_scheduling;
}

@@ -3396,8 +3370,8 @@ static __init int fixup_ht_bug(void)

	x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);

	x86_pmu.commit_scheduling = NULL;
	x86_pmu.start_scheduling = NULL;
	x86_pmu.commit_scheduling = NULL;
	x86_pmu.stop_scheduling = NULL;

	watchdog_nmi_enable_all();
Loading