Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (c58267e9) · Commits · e / devices / android_kernel_fairphone_FP4

MAINTAINERS

+0 −1

Original line number	Diff line number	Diff line
		@@ -7634,7 +7634,6 @@ F: kernel/delayacct.c

		PERFORMANCE EVENTS SUBSYSTEM
		M: Peter Zijlstra <a.p.zijlstra@chello.nl>
		M: Paul Mackerras <paulus@samba.org>
		M: Ingo Molnar <mingo@redhat.com>
		M: Arnaldo Carvalho de Melo <acme@kernel.org>
		L: linux-kernel@vger.kernel.org

Makefile

+2 −3

Original line number	Diff line number	Diff line
		@@ -215,7 +215,6 @@ VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD))

		export srctree objtree VPATH


		# SUBARCH tells the usermode build what the underlying arch is. That is set
		# first, and if a usermode build is happening, the "ARCH=um" on the command
		# line overrides the setting of ARCH below. If a native build is happening,
		@@ -1497,11 +1496,11 @@ image_name:
		# Clear a bunch of variables before executing the submake
		tools/: FORCE
		$(Q)mkdir -p $(objtree)/tools
		$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/
		$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/

		tools/%: FORCE
		$(Q)mkdir -p $(objtree)/tools
		$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/ $*
		$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/ $*

		# Single targets
		# ---------------------------------------------------------------------------

arch/x86/kernel/cpu/perf_event.c

+10 −10

Original line number	Diff line number	Diff line
		@@ -881,10 +881,7 @@ int x86_schedule_events(struct cpu_hw_events cpuc, int n, int assign)
		if (x86_pmu.commit_scheduling)
		x86_pmu.commit_scheduling(cpuc, i, assign[i]);
		}
		}

		if (!assign \|\| unsched) {

		} else {
		for (i = 0; i < n; i++) {
		e = cpuc->event_list[i];
		/*
		@@ -1097,6 +1094,8 @@ int x86_perf_event_set_period(struct perf_event *event)

		per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;

		if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) \|\|
		local64_read(&hwc->prev_count) != (u64)-left) {
		/*
		* The hw event starts counting from this event offset,
		* mark it to be able to extra future deltas:
		@@ -1104,6 +1103,7 @@ int x86_perf_event_set_period(struct perf_event *event)
		local64_set(&hwc->prev_count, (u64)-left);

		wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
		}

		/*
		* Due to erratum on certan cpu we need

arch/x86/kernel/cpu/perf_event.h

+18 −3

Original line number	Diff line number	Diff line
		@@ -75,6 +75,8 @@ struct event_constraint {
		#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
		#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
		#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
		#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
		#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */


		struct amd_nb {
		@@ -87,6 +89,18 @@ struct amd_nb {
		/* The maximal number of PEBS events: */
		#define MAX_PEBS_EVENTS 8

		/*
		* Flags PEBS can handle without an PMI.
		*
		* TID can only be handled by flushing at context switch.
		*
		*/
		#define PEBS_FREERUNNING_FLAGS \
		(PERF_SAMPLE_IP \| PERF_SAMPLE_TID \| PERF_SAMPLE_ADDR \| \
		PERF_SAMPLE_ID \| PERF_SAMPLE_CPU \| PERF_SAMPLE_STREAM_ID \| \
		PERF_SAMPLE_DATA_SRC \| PERF_SAMPLE_IDENTIFIER \| \
		PERF_SAMPLE_TRANSACTION)

		/*
		* A debug store configuration.
		*
		@@ -133,7 +147,6 @@ enum intel_excl_state_type {
		};

		struct intel_excl_states {
		enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
		enum intel_excl_state_type state[X86_PMC_IDX_MAX];
		bool sched_started; /* true if scheduling has started */
		};
		@@ -527,10 +540,10 @@ struct x86_pmu {
		void (put_event_constraints)(struct cpu_hw_events cpuc,
		struct perf_event *event);

		void (commit_scheduling)(struct cpu_hw_events cpuc, int idx, int cntr);

		void (start_scheduling)(struct cpu_hw_events cpuc);

		void (commit_scheduling)(struct cpu_hw_events cpuc, int idx, int cntr);

		void (stop_scheduling)(struct cpu_hw_events cpuc);

		struct event_constraint *event_constraints;
		@@ -866,6 +879,8 @@ void intel_pmu_pebs_enable_all(void);

		void intel_pmu_pebs_disable_all(void);

		void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);

		void intel_ds_init(void);

		void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);

arch/x86/kernel/cpu/perf_event_intel.c

+99 −125

Original line number	Diff line number	Diff line
		@@ -1903,9 +1903,8 @@ static void
		intel_start_scheduling(struct cpu_hw_events *cpuc)
		{
		struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
		struct intel_excl_states xl, xlo;
		struct intel_excl_states *xl;
		int tid = cpuc->excl_thread_id;
		int o_tid = 1 - tid; /* sibling thread */

		/*
		* nothing needed if in group validation mode
		@@ -1916,10 +1915,9 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
		/*
		* no exclusion needed
		*/
		if (!excl_cntrs)
		if (WARN_ON_ONCE(!excl_cntrs))
		return;

		xlo = &excl_cntrs->states[o_tid];
		xl = &excl_cntrs->states[tid];

		xl->sched_started = true;
		@@ -1928,22 +1926,41 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
		* in stop_event_scheduling()
		* makes scheduling appear as a transaction
		*/
		WARN_ON_ONCE(!irqs_disabled());
		raw_spin_lock(&excl_cntrs->lock);
		}

		/*
		* save initial state of sibling thread
		*/
		memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
		static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
		{
		struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
		struct event_constraint *c = cpuc->event_constraint[idx];
		struct intel_excl_states *xl;
		int tid = cpuc->excl_thread_id;

		if (cpuc->is_fake \|\| !is_ht_workaround_enabled())
		return;

		if (WARN_ON_ONCE(!excl_cntrs))
		return;

		if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
		return;

		xl = &excl_cntrs->states[tid];

		lockdep_assert_held(&excl_cntrs->lock);

		if (c->flags & PERF_X86_EVENT_EXCL)
		xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
		else
		xl->state[cntr] = INTEL_EXCL_SHARED;
		}

		static void
		intel_stop_scheduling(struct cpu_hw_events *cpuc)
		{
		struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
		struct intel_excl_states xl, xlo;
		struct intel_excl_states *xl;
		int tid = cpuc->excl_thread_id;
		int o_tid = 1 - tid; /* sibling thread */

		/*
		* nothing needed if in group validation mode
		@@ -1953,17 +1970,11 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
		/*
		* no exclusion needed
		*/
		if (!excl_cntrs)
		if (WARN_ON_ONCE(!excl_cntrs))
		return;

		xlo = &excl_cntrs->states[o_tid];
		xl = &excl_cntrs->states[tid];

		/*
		* make new sibling thread state visible
		*/
		memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));

		xl->sched_started = false;
		/*
		* release shared state lock (acquired in intel_start_scheduling())
		@@ -1975,12 +1986,10 @@ static struct event_constraint *
		intel_get_excl_constraints(struct cpu_hw_events cpuc, struct perf_event event,
		int idx, struct event_constraint *c)
		{
		struct event_constraint *cx;
		struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
		struct intel_excl_states xl, xlo;
		int is_excl, i;
		struct intel_excl_states *xlo;
		int tid = cpuc->excl_thread_id;
		int o_tid = 1 - tid; /* alternate */
		int is_excl, i;

		/*
		* validating a group does not require
		@@ -1992,27 +2001,8 @@ intel_get_excl_constraints(struct cpu_hw_events cpuc, struct perf_event event,
		/*
		* no exclusion needed
		*/
		if (!excl_cntrs)
		if (WARN_ON_ONCE(!excl_cntrs))
		return c;
		/*
		* event requires exclusive counter access
		* across HT threads
		*/
		is_excl = c->flags & PERF_X86_EVENT_EXCL;
		if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
		event->hw.flags \|= PERF_X86_EVENT_EXCL_ACCT;
		if (!cpuc->n_excl++)
		WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
		}

		/*
		* xl = state of current HT
		* xlo = state of sibling HT
		*/
		xl = &excl_cntrs->states[tid];
		xlo = &excl_cntrs->states[o_tid];

		cx = c;

		/*
		* because we modify the constraint, we need
		@@ -2023,10 +2013,7 @@ intel_get_excl_constraints(struct cpu_hw_events cpuc, struct perf_event event,
		* been cloned (marked dynamic)
		*/
		if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {

		/* sanity check */
		if (idx < 0)
		return &emptyconstraint;
		struct event_constraint *cx;

		/*
		* grab pre-allocated constraint entry
		@@ -2037,13 +2024,14 @@ intel_get_excl_constraints(struct cpu_hw_events cpuc, struct perf_event event,
		* initialize dynamic constraint
		* with static constraint
		*/
		memcpy(cx, c, sizeof(*cx));
		cx = c;

		/*
		* mark constraint as dynamic, so we
		* can free it later on
		*/
		cx->flags \|= PERF_X86_EVENT_DYNAMIC;
		c = cx;
		}

		/*
		@@ -2053,6 +2041,22 @@ intel_get_excl_constraints(struct cpu_hw_events cpuc, struct perf_event event,
		* of this function
		*/

		/*
		* state of sibling HT
		*/
		xlo = &excl_cntrs->states[tid ^ 1];

		/*
		* event requires exclusive counter access
		* across HT threads
		*/
		is_excl = c->flags & PERF_X86_EVENT_EXCL;
		if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
		event->hw.flags \|= PERF_X86_EVENT_EXCL_ACCT;
		if (!cpuc->n_excl++)
		WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
		}

		/*
		* Modify static constraint with current dynamic
		* state of thread
		@@ -2061,37 +2065,37 @@ intel_get_excl_constraints(struct cpu_hw_events cpuc, struct perf_event event,
		* SHARED : sibling counter measuring non-exclusive event
		* UNUSED : sibling counter unused
		*/
		for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
		for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
		/*
		* exclusive event in sibling counter
		* our corresponding counter cannot be used
		* regardless of our event
		*/
		if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
		__clear_bit(i, cx->idxmsk);
		if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
		__clear_bit(i, c->idxmsk);
		/*
		* if measuring an exclusive event, sibling
		* measuring non-exclusive, then counter cannot
		* be used
		*/
		if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
		__clear_bit(i, cx->idxmsk);
		if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
		__clear_bit(i, c->idxmsk);
		}

		/*
		* recompute actual bit weight for scheduling algorithm
		*/
		cx->weight = hweight64(cx->idxmsk64);
		c->weight = hweight64(c->idxmsk64);

		/*
		* if we return an empty mask, then switch
		* back to static empty constraint to avoid
		* the cost of freeing later on
		*/
		if (cx->weight == 0)
		cx = &emptyconstraint;
		if (c->weight == 0)
		c = &emptyconstraint;

		return cx;
		return c;
		}

		static struct event_constraint *
		@@ -2124,10 +2128,8 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
		{
		struct hw_perf_event *hwc = &event->hw;
		struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
		struct intel_excl_states xlo, xl;
		unsigned long flags = 0; /* keep compiler happy */
		int tid = cpuc->excl_thread_id;
		int o_tid = 1 - tid;
		struct intel_excl_states *xl;

		/*
		* nothing needed if in group validation mode
		@@ -2135,36 +2137,35 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
		if (cpuc->is_fake)
		return;

		WARN_ON_ONCE(!excl_cntrs);

		if (!excl_cntrs)
		if (WARN_ON_ONCE(!excl_cntrs))
		return;

		xl = &excl_cntrs->states[tid];
		xlo = &excl_cntrs->states[o_tid];
		if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
		hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
		if (!--cpuc->n_excl)
		WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
		}

		/*
		* If event was actually assigned, then mark the counter state as
		* unused now.
		*/
		if (hwc->idx >= 0) {
		xl = &excl_cntrs->states[tid];

		/*
		* put_constraint may be called from x86_schedule_events()
		* which already has the lock held so here make locking
		* conditional
		* conditional.
		*/
		if (!xl->sched_started)
		raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
		raw_spin_lock(&excl_cntrs->lock);

		/*
		* if event was actually assigned, then mark the
		* counter state as unused now
		*/
		if (hwc->idx >= 0)
		xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
		xl->state[hwc->idx] = INTEL_EXCL_UNUSED;

		if (!xl->sched_started)
		raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
		raw_spin_unlock(&excl_cntrs->lock);
		}
		}

		static void
		@@ -2196,41 +2197,6 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
		intel_put_excl_constraints(cpuc, event);
		}

		static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
		{
		struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
		struct event_constraint *c = cpuc->event_constraint[idx];
		struct intel_excl_states xlo, xl;
		int tid = cpuc->excl_thread_id;
		int o_tid = 1 - tid;
		int is_excl;

		if (cpuc->is_fake \|\| !c)
		return;

		is_excl = c->flags & PERF_X86_EVENT_EXCL;

		if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
		return;

		WARN_ON_ONCE(!excl_cntrs);

		if (!excl_cntrs)
		return;

		xl = &excl_cntrs->states[tid];
		xlo = &excl_cntrs->states[o_tid];

		WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));

		if (cntr >= 0) {
		if (is_excl)
		xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
		else
		xlo->init_state[cntr] = INTEL_EXCL_SHARED;
		}
		}

		static void intel_pebs_aliases_core2(struct perf_event *event)
		{
		if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
		@@ -2294,8 +2260,15 @@ static int intel_pmu_hw_config(struct perf_event *event)
		if (ret)
		return ret;

		if (event->attr.precise_ip && x86_pmu.pebs_aliases)
		if (event->attr.precise_ip) {
		if (!event->attr.freq) {
		event->hw.flags \|= PERF_X86_EVENT_AUTO_RELOAD;
		if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
		event->hw.flags \|= PERF_X86_EVENT_FREERUNNING;
		}
		if (x86_pmu.pebs_aliases)
		x86_pmu.pebs_aliases(event);
		}

		if (needs_branch_stack(event)) {
		ret = intel_pmu_setup_lbr_filter(event);
		@@ -2544,19 +2517,11 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
		static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
		{
		struct intel_excl_cntrs *c;
		int i;

		c = kzalloc_node(sizeof(struct intel_excl_cntrs),
		GFP_KERNEL, cpu_to_node(cpu));
		if (c) {
		raw_spin_lock_init(&c->lock);
		for (i = 0; i < X86_PMC_IDX_MAX; i++) {
		c->states[0].state[i] = INTEL_EXCL_UNUSED;
		c->states[0].init_state[i] = INTEL_EXCL_UNUSED;

		c->states[1].state[i] = INTEL_EXCL_UNUSED;
		c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
		}
		c->core_id = -1;
		}
		return c;
		@@ -2677,6 +2642,15 @@ static void intel_pmu_cpu_dying(int cpu)
		fini_debug_store_on_cpu(cpu);
		}

		static void intel_pmu_sched_task(struct perf_event_context *ctx,
		bool sched_in)
		{
		if (x86_pmu.pebs_active)
		intel_pmu_pebs_sched_task(ctx, sched_in);
		if (x86_pmu.lbr_nr)
		intel_pmu_lbr_sched_task(ctx, sched_in);
		}

		PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");

		PMU_FORMAT_ATTR(ldlat, "config1:0-15");
		@@ -2766,7 +2740,7 @@ static __initconst const struct x86_pmu intel_pmu = {
		.cpu_starting = intel_pmu_cpu_starting,
		.cpu_dying = intel_pmu_cpu_dying,
		.guest_get_msrs = intel_guest_get_msrs,
		.sched_task = intel_pmu_lbr_sched_task,
		.sched_task = intel_pmu_sched_task,
		};

		static __init void intel_clovertown_quirk(void)
		@@ -2939,8 +2913,8 @@ static __init void intel_ht_bug(void)
		{
		x86_pmu.flags \|= PMU_FL_EXCL_CNTRS \| PMU_FL_EXCL_ENABLED;

		x86_pmu.commit_scheduling = intel_commit_scheduling;
		x86_pmu.start_scheduling = intel_start_scheduling;
		x86_pmu.commit_scheduling = intel_commit_scheduling;
		x86_pmu.stop_scheduling = intel_stop_scheduling;
		}

		@@ -3396,8 +3370,8 @@ static __init int fixup_ht_bug(void)

		x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS \| PMU_FL_EXCL_ENABLED);

		x86_pmu.commit_scheduling = NULL;
		x86_pmu.start_scheduling = NULL;
		x86_pmu.commit_scheduling = NULL;
		x86_pmu.stop_scheduling = NULL;

		watchdog_nmi_enable_all();