Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0c1cbc18 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

perf/core: Fix perf_event_read()



perf_event_read() has a number of issues regarding the timekeeping bits.

 - The IPI didn't update group times when it found INACTIVE

 - The direct call would not re-check ->state after taking ctx->lock
   which can result in ->count and timestamps getting out of sync.

And we can make use of the ordering introduced for perf_event_stop()
to make it more accurate for ACTIVE.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 7f0ec325
Loading
Loading
Loading
Loading
+39 −16
Original line number Diff line number Diff line
@@ -2081,8 +2081,9 @@ event_sched_in(struct perf_event *event,

	WRITE_ONCE(event->oncpu, smp_processor_id());
	/*
	 * Order event::oncpu write to happen before the ACTIVE state
	 * is visible.
	 * Order event::oncpu write to happen before the ACTIVE state is
	 * visible. This allows perf_event_{stop,read}() to observe the correct
	 * ->oncpu if it sees ACTIVE.
	 */
	smp_wmb();
	WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
@@ -3638,12 +3639,16 @@ static void __perf_event_read(void *info)
		return;

	raw_spin_lock(&ctx->lock);
	if (ctx->is_active) {
	if (ctx->is_active & EVENT_TIME) {
		update_context_time(ctx);
		update_cgrp_time_from_event(event);
	}

	if (!data->group)
		update_event_times(event);
	else
		update_group_times(event);

	if (event->state != PERF_EVENT_STATE_ACTIVE)
		goto unlock;

@@ -3658,7 +3663,6 @@ static void __perf_event_read(void *info)
	pmu->read(event);

	list_for_each_entry(sub, &event->sibling_list, group_entry) {
		update_event_times(sub);
		if (sub->state == PERF_EVENT_STATE_ACTIVE) {
			/*
			 * Use sibling's PMU rather than @event's since
@@ -3748,23 +3752,35 @@ int perf_event_read_local(struct perf_event *event, u64 *value,

static int perf_event_read(struct perf_event *event, bool group)
{
	enum perf_event_state state = READ_ONCE(event->state);
	int event_cpu, ret = 0;

	/*
	 * If event is enabled and currently active on a CPU, update the
	 * value in the event structure:
	 */
	if (event->state == PERF_EVENT_STATE_ACTIVE) {
		struct perf_read_data data = {
			.event = event,
			.group = group,
			.ret = 0,
		};
again:
	if (state == PERF_EVENT_STATE_ACTIVE) {
		struct perf_read_data data;

		/*
		 * Orders the ->state and ->oncpu loads such that if we see
		 * ACTIVE we must also see the right ->oncpu.
		 *
		 * Matches the smp_wmb() from event_sched_in().
		 */
		smp_rmb();

		event_cpu = READ_ONCE(event->oncpu);
		if ((unsigned)event_cpu >= nr_cpu_ids)
			return 0;

		data = (struct perf_read_data){
			.event = event,
			.group = group,
			.ret = 0,
		};

		preempt_disable();
		event_cpu = __perf_event_read_cpu(event, event_cpu);

@@ -3781,20 +3797,27 @@ static int perf_event_read(struct perf_event *event, bool group)
		(void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
		preempt_enable();
		ret = data.ret;
	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {

	} else if (state == PERF_EVENT_STATE_INACTIVE) {
		struct perf_event_context *ctx = event->ctx;
		unsigned long flags;

		raw_spin_lock_irqsave(&ctx->lock, flags);
		state = event->state;
		if (state != PERF_EVENT_STATE_INACTIVE) {
			raw_spin_unlock_irqrestore(&ctx->lock, flags);
			goto again;
		}

		/*
		 * may read while context is not active
		 * (e.g., thread is blocked), in that case
		 * we cannot update context time
		 * May read while context is not active (e.g., thread is
		 * blocked), in that case we cannot update context time
		 */
		if (ctx->is_active) {
		if (ctx->is_active & EVENT_TIME) {
			update_context_time(ctx);
			update_cgrp_time_from_event(event);
		}

		if (group)
			update_group_times(event);
		else