Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0f5a2601 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

perf: Avoid a useless pmu_disable() in the perf-tick



Gleb writes:

 > Currently pmu is disabled and re-enabled on each timer interrupt even
 > when no rotation or frequency adjustment is needed. On Intel CPU this
 > results in two writes into PERF_GLOBAL_CTRL MSR per tick. On bare metal
 > it does not cause significant slowdown, but when running perf in a virtual
 > machine it leads to 20% slowdown on my machine.

Cure this by keeping a perf_event_context::nr_freq counter that counts the
number of active events that require frequency adjustments and use this in a
similar fashion to the already existing nr_events != nr_active test in
perf_rotate_context().

By being able to exclude both rotation and frequency adjustments a-priory for
the common case we can avoid the otherwise superfluous PMU disable.

Suggested-by: default avatarGleb Natapov <gleb@redhat.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-515yhoatehd3gza7we9fapaa@git.kernel.org


Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent d6c1c49d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -890,6 +890,7 @@ struct perf_event_context {
	int				nr_active;
	int				is_active;
	int				nr_stat;
	int				nr_freq;
	int				rotate_disable;
	atomic_t			refcount;
	struct task_struct		*task;
+32 −16
Original line number Diff line number Diff line
@@ -1130,6 +1130,8 @@ event_sched_out(struct perf_event *event,
	if (!is_software_event(event))
		cpuctx->active_oncpu--;
	ctx->nr_active--;
	if (event->attr.freq && event->attr.sample_freq)
		ctx->nr_freq--;
	if (event->attr.exclusive || !cpuctx->active_oncpu)
		cpuctx->exclusive = 0;
}
@@ -1407,6 +1409,8 @@ event_sched_in(struct perf_event *event,
	if (!is_software_event(event))
		cpuctx->active_oncpu++;
	ctx->nr_active++;
	if (event->attr.freq && event->attr.sample_freq)
		ctx->nr_freq++;

	if (event->attr.exclusive)
		cpuctx->exclusive = 1;
@@ -2329,6 +2333,9 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
	u64 interrupts, now;
	s64 delta;

	if (!ctx->nr_freq)
		return;

	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
		if (event->state != PERF_EVENT_STATE_ACTIVE)
			continue;
@@ -2384,12 +2391,14 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
{
	u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
	struct perf_event_context *ctx = NULL;
	int rotate = 0, remove = 1;
	int rotate = 0, remove = 1, freq = 0;

	if (cpuctx->ctx.nr_events) {
		remove = 0;
		if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
			rotate = 1;
		if (cpuctx->ctx.nr_freq)
			freq = 1;
	}

	ctx = cpuctx->task_ctx;
@@ -2397,17 +2406,23 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
		remove = 0;
		if (ctx->nr_events != ctx->nr_active)
			rotate = 1;
		if (ctx->nr_freq)
			freq = 1;
	}

	if (!rotate && !freq)
		goto done;

	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
	perf_pmu_disable(cpuctx->ctx.pmu);

	if (freq) {
		perf_ctx_adjust_freq(&cpuctx->ctx, interval);
		if (ctx)
			perf_ctx_adjust_freq(ctx, interval);
	}

	if (!rotate)
		goto done;

	if (rotate) {
		cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
		if (ctx)
			ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
@@ -2417,13 +2432,14 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
			rotate_ctx(ctx);

		perf_event_sched_in(cpuctx, ctx, current);
	}

	perf_pmu_enable(cpuctx->ctx.pmu);
	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);

done:
	if (remove)
		list_del_init(&cpuctx->rotation_list);

	perf_pmu_enable(cpuctx->ctx.pmu);
	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}

void perf_event_task_tick(void)