Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8da5adda authored by Don Zickus's avatar Don Zickus Committed by Andi Kleen
Browse files

[PATCH] x86: Allow users to force a panic on NMI



To quote Alan Cox:

The default Linux behaviour on an NMI of either memory or unknown is to
continue operation. For many environments such as scientific computing
it is preferable that the box is taken out and the error dealt with than
an uncorrected parity/ECC error get propogated.

A small number of systems do generate NMI's for bizarre random reasons
such as power management so the default is unchanged. In other respects
the new proc/sys entry works like the existing panic controls already in
that directory.

This is separate to the edac support - EDAC allows supported chipsets to
handle ECC errors well, this change allows unsupported cases to at least
panic rather than cause problems further down the line.

Signed-off-by: default avatarDon Zickus <dzickus@redhat.com>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
parent e33e89ab
Loading
Loading
Loading
Loading
+6 −0
Original line number Original line Diff line number Diff line
@@ -635,6 +635,8 @@ static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
			"to continue\n");
			"to continue\n");
	printk(KERN_EMERG "You probably have a hardware problem with your RAM "
	printk(KERN_EMERG "You probably have a hardware problem with your RAM "
			"chips\n");
			"chips\n");
	if (panic_on_unrecovered_nmi)
                panic("NMI: Not continuing");


	/* Clear and disable the memory parity error line. */
	/* Clear and disable the memory parity error line. */
	clear_mem_error(reason);
	clear_mem_error(reason);
@@ -670,6 +672,10 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
		reason, smp_processor_id());
		reason, smp_processor_id());
	printk("Dazed and confused, but trying to continue\n");
	printk("Dazed and confused, but trying to continue\n");
	printk("Do you have a strange power saving mode enabled?\n");
	printk("Do you have a strange power saving mode enabled?\n");

	if (panic_on_unrecovered_nmi)
                panic("NMI: Not continuing");

}
}


static DEFINE_SPINLOCK(nmi_print_lock);
static DEFINE_SPINLOCK(nmi_print_lock);
+6 −0
Original line number Original line Diff line number Diff line
@@ -732,6 +732,8 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
{
{
	printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
	printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
	printk("You probably have a hardware problem with your RAM chips\n");
	printk("You probably have a hardware problem with your RAM chips\n");
	if (panic_on_unrecovered_nmi)
               panic("NMI: Not continuing");


	/* Clear and disable the memory parity error line. */
	/* Clear and disable the memory parity error line. */
	reason = (reason & 0xf) | 4;
	reason = (reason & 0xf) | 4;
@@ -757,6 +759,10 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
{	printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
{	printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
	printk("Dazed and confused, but trying to continue\n");
	printk("Dazed and confused, but trying to continue\n");
	printk("Do you have a strange power saving mode enabled?\n");
	printk("Do you have a strange power saving mode enabled?\n");

	if (panic_on_unrecovered_nmi)
                panic("NMI: Not continuing");

}
}


/* Runs on IST stack. This code must keep interrupts off all the time.
/* Runs on IST stack. This code must keep interrupts off all the time.
+1 −0
Original line number Original line Diff line number Diff line
@@ -186,6 +186,7 @@ extern void bust_spinlocks(int yes);
extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
extern int panic_timeout;
extern int panic_timeout;
extern int panic_on_oops;
extern int panic_on_oops;
extern int panic_on_unrecovered_nmi;
extern int tainted;
extern int tainted;
extern const char *print_tainted(void);
extern const char *print_tainted(void);
extern void add_taint(unsigned);
extern void add_taint(unsigned);
+1 −0
Original line number Original line Diff line number Diff line
@@ -151,6 +151,7 @@ enum
	KERN_COMPAT_LOG=73,	/* int: print compat layer  messages */
	KERN_COMPAT_LOG=73,	/* int: print compat layer  messages */
	KERN_MAX_LOCK_DEPTH=74,
	KERN_MAX_LOCK_DEPTH=74,
	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
	KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
};
};




+1 −0
Original line number Original line Diff line number Diff line
@@ -21,6 +21,7 @@
#include <linux/debug_locks.h>
#include <linux/debug_locks.h>


int panic_on_oops;
int panic_on_oops;
int panic_on_unrecovered_nmi;
int tainted;
int tainted;
static int pause_on_oops;
static int pause_on_oops;
static int pause_on_oops_flag;
static int pause_on_oops_flag;
Loading