Merge branch 'perf/urgent' into perf/core, to fix up fixes before queueing up new changes (84c48d8d) · Commits · e / devices / android_kernel_fairphone_FP4

arch/x86/events/amd/ibs.c

+45 −7

Original line number	Diff line number	Diff line
		@@ -28,10 +28,46 @@ static u32 ibs_caps;
		#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN \| IBS_FETCH_MAX_CNT)
		#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT


		/*
		* IBS states:
		*
		* ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
		* and any further add()s must fail.
		*
		* STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
		* complicated by the fact that the IBS hardware can send late NMIs (ie. after
		* we've cleared the EN bit).
		*
		* In order to consume these late NMIs we have the STOPPED state, any NMI that
		* happens after we've cleared the EN state will clear this bit and report the
		* NMI handled (this is fundamentally racy in the face or multiple NMI sources,
		* someone else can consume our BIT and our NMI will go unhandled).
		*
		* And since we cannot set/clear this separate bit together with the EN bit,
		* there are races; if we cleared STARTED early, an NMI could land in
		* between clearing STARTED and clearing the EN bit (in fact multiple NMIs
		* could happen if the period is small enough), and consume our STOPPED bit
		* and trigger streams of unhandled NMIs.
		*
		* If, however, we clear STARTED late, an NMI can hit between clearing the
		* EN bit and clearing STARTED, still see STARTED set and process the event.
		* If this event will have the VALID bit clear, we bail properly, but this
		* is not a given. With VALID set we can end up calling pmu::stop() again
		* (the throttle logic) and trigger the WARNs in there.
		*
		* So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
		* nesting, and clear STARTED late, so that we have a well defined state over
		* the clearing of the EN bit.
		*
		* XXX: we could probably be using !atomic bitops for all this.
		*/

		enum ibs_states {
		IBS_ENABLED = 0,
		IBS_STARTED = 1,
		IBS_STOPPING = 2,
		IBS_STOPPED = 3,

		IBS_MAX_STATES,
		};
		@@ -377,9 +413,8 @@ static void perf_ibs_start(struct perf_event *event, int flags)

		perf_ibs_set_period(perf_ibs, hwc, &period);
		/*
		* Set STARTED before enabling the hardware, such that
		* a subsequent NMI must observe it. Then clear STOPPING
		* such that we don't consume NMIs by accident.
		* Set STARTED before enabling the hardware, such that a subsequent NMI
		* must observe it.
		*/
		set_bit(IBS_STARTED, pcpu->state);
		clear_bit(IBS_STOPPING, pcpu->state);
		@@ -396,6 +431,9 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
		u64 config;
		int stopping;

		if (test_and_set_bit(IBS_STOPPING, pcpu->state))
		return;

		stopping = test_bit(IBS_STARTED, pcpu->state);

		if (!stopping && (hwc->state & PERF_HES_UPTODATE))
		@@ -405,12 +443,12 @@ static void perf_ibs_stop(struct perf_event *event, int flags)

		if (stopping) {
		/*
		* Set STOPPING before disabling the hardware, such that it
		* Set STOPPED before disabling the hardware, such that it
		* must be visible to NMIs the moment we clear the EN bit,
		* at which point we can generate an !VALID sample which
		* we need to consume.
		*/
		set_bit(IBS_STOPPING, pcpu->state);
		set_bit(IBS_STOPPED, pcpu->state);
		perf_ibs_disable_event(perf_ibs, hwc, config);
		/*
		* Clear STARTED after disabling the hardware; if it were
		@@ -556,7 +594,7 @@ static int perf_ibs_handle_irq(struct perf_ibs perf_ibs, struct pt_regs iregs)
		* with samples that even have the valid bit cleared.
		* Mark all this NMIs as handled.
		*/
		if (test_and_clear_bit(IBS_STOPPING, pcpu->state))
		if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
		return 1;

		return 0;

kernel/events/core.c

+13 −2

Original line number	Diff line number	Diff line
		@@ -2417,14 +2417,24 @@ static void ctx_sched_out(struct perf_event_context *ctx,
		cpuctx->task_ctx = NULL;
		}

		is_active ^= ctx->is_active; /* changed bits */

		/*
		* Always update time if it was set; not only when it changes.
		* Otherwise we can 'forget' to update time for any but the last
		* context we sched out. For example:
		*
		* ctx_sched_out(.event_type = EVENT_FLEXIBLE)
		* ctx_sched_out(.event_type = EVENT_PINNED)
		*
		* would only update time for the pinned events.
		*/
		if (is_active & EVENT_TIME) {
		/* update (and stop) ctx time */
		update_context_time(ctx);
		update_cgrp_time_from_cpuctx(cpuctx);
		}

		is_active ^= ctx->is_active; /* changed bits */

		if (!ctx->nr_active \|\| !(is_active & EVENT_ALL))
		return;

		@@ -8532,6 +8542,7 @@ SYSCALL_DEFINE5(perf_event_open,
		f_flags);
		if (IS_ERR(event_file)) {
		err = PTR_ERR(event_file);
		event_file = NULL;
		goto err_context;
		}