Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0fe1ac48 authored by Paul Mackerras's avatar Paul Mackerras Committed by Benjamin Herrenschmidt
Browse files

powerpc/perf_event: Fix oops due to perf_event_do_pending call



Anton Blanchard found that large POWER systems would occasionally
crash in the exception exit path when profiling with perf_events.
The symptom was that an interrupt would occur late in the exit path
when the MSR[RI] (recoverable interrupt) bit was clear.  Interrupts
should be hard-disabled at this point but they were enabled.  Because
the interrupt was not recoverable the system panicked.

The reason is that the exception exit path was calling
perf_event_do_pending after hard-disabling interrupts, and
perf_event_do_pending will re-enable interrupts.

The simplest and cleanest fix for this is to use the same mechanism
that 32-bit powerpc does, namely to cause a self-IPI by setting the
decrementer to 1.  This means we can remove the tests in the exception
exit path and raw_local_irq_restore.

This also makes sure that the call to perf_event_do_pending from
timer_interrupt() happens within irq_enter/irq_exit.  (Note that
calling perf_event_do_pending from timer_interrupt does not mean that
there is a possible 1/HZ latency; setting the decrementer to 1 ensures
that the timer interrupt will happen immediately, i.e. within one
timebase tick, which is a few nanoseconds or 10s of nanoseconds.)

Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Cc: stable@kernel.org
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent cea0d767
Loading
Loading
Loading
Loading
+0 −38
Original line number Diff line number Diff line
@@ -130,43 +130,5 @@ static inline int irqs_disabled_flags(unsigned long flags)
 */
struct irq_chip;

#ifdef CONFIG_PERF_EVENTS

#ifdef CONFIG_PPC64
static inline unsigned long test_perf_event_pending(void)
{
	unsigned long x;

	asm volatile("lbz %0,%1(13)"
		: "=r" (x)
		: "i" (offsetof(struct paca_struct, perf_event_pending)));
	return x;
}

static inline void set_perf_event_pending(void)
{
	asm volatile("stb %0,%1(13)" : :
		"r" (1),
		"i" (offsetof(struct paca_struct, perf_event_pending)));
}

static inline void clear_perf_event_pending(void)
{
	asm volatile("stb %0,%1(13)" : :
		"r" (0),
		"i" (offsetof(struct paca_struct, perf_event_pending)));
}
#endif /* CONFIG_PPC64 */

#else  /* CONFIG_PERF_EVENTS */

static inline unsigned long test_perf_event_pending(void)
{
	return 0;
}

static inline void clear_perf_event_pending(void) {}
#endif /* CONFIG_PERF_EVENTS */

#endif	/* __KERNEL__ */
#endif	/* _ASM_POWERPC_HW_IRQ_H */
+0 −1
Original line number Diff line number Diff line
@@ -133,7 +133,6 @@ int main(void)
	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
#ifdef CONFIG_PPC_MM_SLICES
	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
+0 −9
Original line number Diff line number Diff line
@@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
2:
	TRACE_AND_RESTORE_IRQ(r5);

#ifdef CONFIG_PERF_EVENTS
	/* check paca->perf_event_pending if we're enabling ints */
	lbz	r3,PACAPERFPEND(r13)
	and.	r3,r3,r5
	beq	27f
	bl	.perf_event_do_pending
27:
#endif /* CONFIG_PERF_EVENTS */

	/* extract EE bit and use it to restore paca->hard_enabled */
	ld	r3,_MSR(r1)
	rldicl	r4,r3,49,63		/* r0 = (r3 >> 15) & 1 */
+0 −6
Original line number Diff line number Diff line
@@ -53,7 +53,6 @@
#include <linux/bootmem.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
#include <linux/perf_event.h>

#include <asm/uaccess.h>
#include <asm/system.h>
@@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en)
	}
#endif /* CONFIG_PPC_STD_MMU_64 */

	if (test_perf_event_pending()) {
		clear_perf_event_pending();
		perf_event_do_pending();
	}

	/*
	 * if (get_paca()->hard_enabled) return;
	 * But again we need to take care that gcc gets hard_enabled directly
+48 −12
Original line number Diff line number Diff line
@@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void)
}
#endif /* CONFIG_PPC_ISERIES */

#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
DEFINE_PER_CPU(u8, perf_event_pending);
#ifdef CONFIG_PERF_EVENTS

void set_perf_event_pending(void)
/*
 * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
 */
#ifdef CONFIG_PPC64
static inline unsigned long test_perf_event_pending(void)
{
	get_cpu_var(perf_event_pending) = 1;
	set_dec(1);
	put_cpu_var(perf_event_pending);
	unsigned long x;

	asm volatile("lbz %0,%1(13)"
		: "=r" (x)
		: "i" (offsetof(struct paca_struct, perf_event_pending)));
	return x;
}

static inline void set_perf_event_pending_flag(void)
{
	asm volatile("stb %0,%1(13)" : :
		"r" (1),
		"i" (offsetof(struct paca_struct, perf_event_pending)));
}

static inline void clear_perf_event_pending(void)
{
	asm volatile("stb %0,%1(13)" : :
		"r" (0),
		"i" (offsetof(struct paca_struct, perf_event_pending)));
}

#else /* 32-bit */

DEFINE_PER_CPU(u8, perf_event_pending);

#define set_perf_event_pending_flag()	__get_cpu_var(perf_event_pending) = 1
#define test_perf_event_pending()	__get_cpu_var(perf_event_pending)
#define clear_perf_event_pending()	__get_cpu_var(perf_event_pending) = 0

#else  /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
#endif /* 32 vs 64 bit */

void set_perf_event_pending(void)
{
	preempt_disable();
	set_perf_event_pending_flag();
	set_dec(1);
	preempt_enable();
}

#else  /* CONFIG_PERF_EVENTS */

#define test_perf_event_pending()	0
#define clear_perf_event_pending()

#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
#endif /* CONFIG_PERF_EVENTS */

/*
 * For iSeries shared processors, we have to let the hypervisor
@@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs)
	set_dec(DECREMENTER_MAX);

#ifdef CONFIG_PPC32
	if (test_perf_event_pending()) {
		clear_perf_event_pending();
		perf_event_do_pending();
	}
	if (atomic_read(&ppc_n_lost_interrupts) != 0)
		do_IRQ(regs);
#endif
@@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs)

	calculate_steal_time();

	if (test_perf_event_pending()) {
		clear_perf_event_pending();
		perf_event_do_pending();
	}

#ifdef CONFIG_PPC_ISERIES
	if (firmware_has_feature(FW_FEATURE_ISERIES))
		get_lppaca()->int_dword.fields.decr_int = 0;