Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c0d12172 authored by Dave Jiang's avatar Dave Jiang Committed by Linus Torvalds
Browse files

drivers/edac: add new nmi rescan



Provides a way for NMI reported errors on x86 to notify the EDAC
subsystem pending ECC errors by writing to a software state variable.

Here's the reworked patch. I added an EDAC stub to the kernel so we can
have variables that are in the kernel even if EDAC is a module. I also
implemented the idea of using the chip driver to select error detection
mode via module parameter and eliminate the kernel compile option.
Please review/test. Thx!

Also, I only made changes to some of the chipset drivers since I am
unfamiliar with the other ones. We can add similar changes as we go.

Signed-off-by: default avatarDave Jiang <djiang@mvista.com>
Signed-off-by: default avatarDouglas Thompson <dougthompson@xmission.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 28f96eea
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -41,6 +41,10 @@
#include <linux/mca.h>
#endif

#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif

#include <asm/processor.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -638,6 +642,14 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
		"CPU %d.\n", reason, smp_processor_id());
	printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");

#if defined(CONFIG_EDAC)
	if(edac_handler_set()) {
		edac_atomic_assert_error();
		return;
	}
#endif

	if (panic_on_unrecovered_nmi)
                panic("NMI: Not continuing");

+11 −0
Original line number Diff line number Diff line
@@ -34,6 +34,10 @@
#include <linux/bug.h>
#include <linux/kdebug.h>

#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif

#include <asm/system.h>
#include <asm/io.h>
#include <asm/atomic.h>
@@ -719,6 +723,13 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
		reason);
	printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");

#if defined(CONFIG_EDAC)
	if(edac_handler_set()) {
		edac_atomic_assert_error();
		return;
	}
#endif

	if (panic_on_unrecovered_nmi)
		panic("NMI: Not continuing");

+0 −11
Original line number Diff line number Diff line
@@ -109,15 +109,4 @@ config EDAC_I5000
	  Support for error detection and correction the Intel
	  Greekcreek/Blackford chipsets.

choice
	prompt "Error detecting method"
	default EDAC_POLL

config EDAC_POLL
	bool "Poll for errors"
	help
	  Poll the chipset periodically to detect errors.

endchoice

endif # EDAC
+1 −1
Original line number Diff line number Diff line
@@ -5,9 +5,9 @@
# This file may be distributed under the terms of the
# GNU General Public License.
#
# $Id: Makefile,v 1.4.2.3 2005/07/08 22:05:38 dsp_llnl Exp $


obj-$(CONFIG_EDAC)			:= edac_stub.o
obj-$(CONFIG_EDAC_MM_EDAC)		+= edac_core.o

edac_core-objs	:= edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
+13 −1
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/slab.h>
#include <linux/edac.h>
#include "edac_mc.h"

#define E752X_REVISION	" Ver: 2.0.1 " __DATE__
@@ -948,6 +949,16 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
	debugf0("%s(): mci\n", __func__);
	debugf0("Starting Probe1\n");

	/* make sure error reporting method is sane */
	switch(edac_op_state) {
		case EDAC_OPSTATE_POLL:
		case EDAC_OPSTATE_NMI:
			break;
		default:
			edac_op_state = EDAC_OPSTATE_POLL;
			break;
	}

	/* check to see if device 0 function 1 is enabled; if it isn't, we
	 * assume the BIOS has reserved it for a reason and is expecting
	 * exclusive access, we take care not to violate that assumption and
@@ -1123,4 +1134,5 @@ MODULE_DESCRIPTION("MC support for Intel e752x memory controllers");
module_param(force_function_unhide, int, 0444);
MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:"
" 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
Loading