Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 91a612ee authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

perf/core: Fix dynamic interrupt throttle



There were two problems with the dynamic interrupt throttle mechanism,
both triggered by the same action.

When you (or perf_fuzzer) write a huge value into
/proc/sys/kernel/perf_event_max_sample_rate the computed
perf_sample_allowed_ns becomes 0. This effectively disables the whole
dynamic throttle.

This is fixed by ensuring update_perf_cpu_limits() never sets the
value to 0. However, we allow disabling of the dynamic throttle by
writing 100 to /proc/sys/kernel/perf_cpu_time_max_percent. This will
generate a warning in dmesg.

The second problem is that by setting the max_sample_rate to a huge
number, the adaptive process can take a few tries, since it halfs the
limit each time. Change that to directly compute a new value based on
the observed duration.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent c2872d38
Loading
Loading
Loading
Loading
+51 −36
Original line number Diff line number Diff line
@@ -376,8 +376,11 @@ static void update_perf_cpu_limits(void)
	u64 tmp = perf_sample_period_ns;

	tmp *= sysctl_perf_cpu_time_max_percent;
	do_div(tmp, 100);
	ACCESS_ONCE(perf_sample_allowed_ns) = tmp;
	tmp = div_u64(tmp, 100);
	if (!tmp)
		tmp = 1;

	WRITE_ONCE(perf_sample_allowed_ns, tmp);
}

static int perf_rotate_context(struct perf_cpu_context *cpuctx);
@@ -409,7 +412,13 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
	if (ret || !write)
		return ret;

	if (sysctl_perf_cpu_time_max_percent == 100) {
		printk(KERN_WARNING
		       "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
		WRITE_ONCE(perf_sample_allowed_ns, 0);
	} else {
		update_perf_cpu_limits();
	}

	return 0;
}
@@ -423,19 +432,15 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
#define NR_ACCUMULATED_SAMPLES 128
static DEFINE_PER_CPU(u64, running_sample_length);

static u64 __report_avg;
static u64 __report_allowed;

static void perf_duration_warn(struct irq_work *w)
{
	u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
	u64 avg_local_sample_len;
	u64 local_samples_len;

	local_samples_len = __this_cpu_read(running_sample_length);
	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;

	printk_ratelimited(KERN_WARNING
			"perf interrupt took too long (%lld > %lld), lowering "
		"perf: interrupt took too long (%lld > %lld), lowering "
		"kernel.perf_event_max_sample_rate to %d\n",
			avg_local_sample_len, allowed_ns >> 1,
		__report_avg, __report_allowed,
		sysctl_perf_event_sample_rate);
}

@@ -443,42 +448,52 @@ static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);

void perf_sample_event_took(u64 sample_len_ns)
{
	u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
	u64 avg_local_sample_len;
	u64 local_samples_len;
	u64 max_len = READ_ONCE(perf_sample_allowed_ns);
	u64 running_len;
	u64 avg_len;
	u32 max;

	if (allowed_ns == 0)
	if (max_len == 0)
		return;

	/* decay the counter by 1 average sample */
	local_samples_len = __this_cpu_read(running_sample_length);
	local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES;
	local_samples_len += sample_len_ns;
	__this_cpu_write(running_sample_length, local_samples_len);
	/* Decay the counter by 1 average sample. */
	running_len = __this_cpu_read(running_sample_length);
	running_len -= running_len/NR_ACCUMULATED_SAMPLES;
	running_len += sample_len_ns;
	__this_cpu_write(running_sample_length, running_len);

	/*
	 * note: this will be biased artifically low until we have
	 * Note: this will be biased artifically low until we have
	 * seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
	 * from having to maintain a count.
	 */
	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;

	if (avg_local_sample_len <= allowed_ns)
	avg_len = running_len/NR_ACCUMULATED_SAMPLES;
	if (avg_len <= max_len)
		return;

	if (max_samples_per_tick <= 1)
		return;
	__report_avg = avg_len;
	__report_allowed = max_len;

	max_samples_per_tick = DIV_ROUND_UP(max_samples_per_tick, 2);
	sysctl_perf_event_sample_rate = max_samples_per_tick * HZ;
	perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
	/*
	 * Compute a throttle threshold 25% below the current duration.
	 */
	avg_len += avg_len / 4;
	max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
	if (avg_len < max)
		max /= (u32)avg_len;
	else
		max = 1;

	update_perf_cpu_limits();
	WRITE_ONCE(perf_sample_allowed_ns, avg_len);
	WRITE_ONCE(max_samples_per_tick, max);

	sysctl_perf_event_sample_rate = max * HZ;
	perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;

	if (!irq_work_queue(&perf_duration_work)) {
		early_printk("perf interrupt took too long (%lld > %lld), lowering "
		early_printk("perf: interrupt took too long (%lld > %lld), lowering "
			     "kernel.perf_event_max_sample_rate to %d\n",
			     avg_local_sample_len, allowed_ns >> 1,
			     __report_avg, __report_allowed,
			     sysctl_perf_event_sample_rate);
	}
}