Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8375d490 authored by Mauro Carvalho Chehab's avatar Mauro Carvalho Chehab Committed by Linus Torvalds
Browse files

edac: driver for i5400 MCH (update)



Signed-off-by: default avatarBen Woodard <woodard@redhat.com>
Signed-off-by: default avatarMauro Carvalho Chehab <mchehab@redhat.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 920c8df6
Loading
Loading
Loading
Loading
+62 −57
Original line number Original line Diff line number Diff line
/*
/*
 * Intel 5400 class Memory Controllers kernel module
 * Intel 5400 class Memory Controllers kernel module (Seaburg)
 *
 *
 * This file may be distributed under the terms of the
 * This file may be distributed under the terms of the
 * GNU General Public License.
 * GNU General Public License.
@@ -55,13 +55,11 @@
 * Function 1: Memory Branch Map, Control, Errors Register
 * Function 1: Memory Branch Map, Control, Errors Register
 * Function 2: FSB Error Registers
 * Function 2: FSB Error Registers
 *
 *
 * All 3 functions of Device 16 (0,1,2) share the SAME DID
 * All 3 functions of Device 16 (0,1,2) share the SAME DID and
 * uses PCI_DEVICE_ID_INTEL_5400_ERR for device 16 (0,1,2),
 * PCI_DEVICE_ID_INTEL_5400_FBD0 and PCI_DEVICE_ID_INTEL_5400_FBD1
 * for device 21 (0,1).
 */
 */
#ifndef PCI_DEVICE_ID_INTEL_5400_ERR
#define PCI_DEVICE_ID_INTEL_5400_ERR  0x4030	/* Device 16 (0,1,2) */
#define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035	/* Device 21 (0,1) */
#define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036	/* Device 21 (0,1) */
#endif


	/* OFFSETS for Function 0 */
	/* OFFSETS for Function 0 */
#define		AMBASE			0x48 /* AMB Mem Mapped Reg Region Base */
#define		AMBASE			0x48 /* AMB Mem Mapped Reg Region Base */
@@ -174,17 +172,17 @@ enum error_mask {
/*
/*
 * Names to translate bit error into something useful
 * Names to translate bit error into something useful
 */
 */
char *error_name[] = {
static const char *error_name[] = {
	[0]  = "Memory Write error on non-redundant retry",
	[0]  = "Memory Write error on non-redundant retry",
	[1]  = "Memory or FB-DIMM configuration CRC read error",
	[1]  = "Memory or FB-DIMM configuration CRC read error",
	/* Reserved */
	/* Reserved */
	[3]  = "Uncorrectable Data ECC on Replay",
	[3]  = "Uncorrectable Data ECC on Replay",
	[4]  = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
	[4]  = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
	/* Unsupported on i5400 */
	/* M6 Unsupported on i5400 */
	[6]  = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
	[6]  = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
	[7]  = "Aliased Uncorrectable Patrol Data ECC",
	[7]  = "Aliased Uncorrectable Patrol Data ECC",
	[8]  = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
	[8]  = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
	/* Unsupported */
	/* M10 Unsupported on i5400 */
	[10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
	[10] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
	[11] = "Non-Aliased Uncorrectable Patrol Data ECC",
	[11] = "Non-Aliased Uncorrectable Patrol Data ECC",
	[12] = "Memory Write error on first attempt",
	[12] = "Memory Write error on first attempt",
@@ -192,7 +190,7 @@ char *error_name[] = {
	[14] = "Memory or FB-DIMM configuration CRC read error",
	[14] = "Memory or FB-DIMM configuration CRC read error",
	[15] = "Channel Failed-Over Occurred",
	[15] = "Channel Failed-Over Occurred",
	[16] = "Correctable Non-Mirrored Demand Data ECC",
	[16] = "Correctable Non-Mirrored Demand Data ECC",
	/* Unsupported */
	/* M18 Unsupported on i5400 */
	[18] = "Correctable Resilver- or Spare-Copy Data ECC",
	[18] = "Correctable Resilver- or Spare-Copy Data ECC",
	[19] = "Correctable Patrol Data ECC",
	[19] = "Correctable Patrol Data ECC",
	[20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status",
	[20] = "FB-DIMM Northbound parity error on FB-DIMM Sync Status",
@@ -259,16 +257,24 @@ char *error_name[] = {
#define FERR_FAT_MASK ERROR_FAT_MASK
#define FERR_FAT_MASK ERROR_FAT_MASK


/* masks for non-fatal error register */
/* masks for non-fatal error register */
#define TO_NF_MASK(a)		(((a) & EMASK_M29) | ((a) >> 3))
static inline int to_nf_mask(unsigned int mask)
#define FROM_NF_FERR(a)		(((a) & EMASK_M29) | (((a) << 3) & ((1 << 30)-1)))
{
	return (mask & EMASK_M29) | (mask >> 3);
};

static inline int from_nf_ferr(unsigned int mask)
{
	return (mask & EMASK_M29) |		/* Bit 28 */
	       (mask & ((1 << 28) - 1) << 3);	/* Bits 0 to 27 */
};


#define FERR_NF_MASK		TO_NF_MASK(ERROR_NF_MASK)
#define FERR_NF_MASK		to_nf_mask(ERROR_NF_MASK)
#define FERR_NF_CORRECTABLE	TO_NF_MASK(ERROR_NF_CORRECTABLE)
#define FERR_NF_CORRECTABLE	to_nf_mask(ERROR_NF_CORRECTABLE)
#define FERR_NF_DIMM_SPARE	TO_NF_MASK(ERROR_NF_DIMM_SPARE)
#define FERR_NF_DIMM_SPARE	to_nf_mask(ERROR_NF_DIMM_SPARE)
#define FERR_NF_SPD_PROTOCOL	TO_NF_MASK(ERROR_NF_SPD_PROTOCOL)
#define FERR_NF_SPD_PROTOCOL	to_nf_mask(ERROR_NF_SPD_PROTOCOL)
#define FERR_NF_NORTH_CRC	TO_NF_MASK(ERROR_NF_NORTH_CRC)
#define FERR_NF_NORTH_CRC	to_nf_mask(ERROR_NF_NORTH_CRC)
#define FERR_NF_RECOVERABLE	TO_NF_MASK(ERROR_NF_RECOVERABLE)
#define FERR_NF_RECOVERABLE	to_nf_mask(ERROR_NF_RECOVERABLE)
#define FERR_NF_UNCORRECTABLE	TO_NF_MASK(ERROR_NF_UNCORRECTABLE)
#define FERR_NF_UNCORRECTABLE	to_nf_mask(ERROR_NF_UNCORRECTABLE)


/* Defines to extract the vaious fields from the
/* Defines to extract the vaious fields from the
 *	MTRx - Memory Technology Registers
 *	MTRx - Memory Technology Registers
@@ -293,7 +299,7 @@ static inline int extract_fbdchan_indx(u32 x)


#ifdef CONFIG_EDAC_DEBUG
#ifdef CONFIG_EDAC_DEBUG
/* MTR NUMROW */
/* MTR NUMROW */
static char *numrow_toString[] = {
static const char *numrow_toString[] = {
	"8,192 - 13 rows",
	"8,192 - 13 rows",
	"16,384 - 14 rows",
	"16,384 - 14 rows",
	"32,768 - 15 rows",
	"32,768 - 15 rows",
@@ -301,7 +307,7 @@ static char *numrow_toString[] = {
};
};


/* MTR NUMCOL */
/* MTR NUMCOL */
static char *numcol_toString[] = {
static const char *numcol_toString[] = {
	"1,024 - 10 columns",
	"1,024 - 10 columns",
	"2,048 - 11 columns",
	"2,048 - 11 columns",
	"4,096 - 12 columns",
	"4,096 - 12 columns",
@@ -370,7 +376,7 @@ struct i5400_error_info {
	u16 recmema;		/* Recoverable Mem Error log A */
	u16 recmema;		/* Recoverable Mem Error log A */
	u32 recmemb;		/* Recoverable Mem Error log B */
	u32 recmemb;		/* Recoverable Mem Error log B */


	/* These registers are input ONLY if there was a Non-Recoverable Error */
	/* These registers are input ONLY if there was a Non-Rec Error */
	u16 nrecmema;		/* Non-Recoverable Mem log A */
	u16 nrecmema;		/* Non-Recoverable Mem log A */
	u16 nrecmemb;		/* Non-Recoverable Mem log B */
	u16 nrecmemb;		/* Non-Recoverable Mem log B */


@@ -560,10 +566,10 @@ static void i5400_proccess_non_recoverable_info(struct mem_ctl_info *mci,


	/* Form out message */
	/* Form out message */
	snprintf(msg, sizeof(msg),
	snprintf(msg, sizeof(msg),
		 "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s RAS=%d CAS=%d "
		 "%s (Branch=%d DRAM-Bank=%d Buffer ID = %d RDWR=%s "
		 "%s Err=0x%lx (%s))",
		 "RAS=%d CAS=%d %s Err=0x%lx (%s))",
		 type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas, type,
		 type, branch >> 1, bank, buf_id, rdwr_str(rdwr), ras, cas,
		 allErrors, error_name[errnum]);
		 type, allErrors, error_name[errnum]);


	/* Call the helper to output message */
	/* Call the helper to output message */
	edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
	edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
@@ -590,7 +596,7 @@ static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci,
	int errnum;
	int errnum;


	/* mask off the Error bits that are possible */
	/* mask off the Error bits that are possible */
	allErrors = FROM_NF_FERR(info->ferr_nf_fbd & FERR_NF_MASK);
	allErrors = from_nf_ferr(info->ferr_nf_fbd & FERR_NF_MASK);
	if (!allErrors)
	if (!allErrors)
		return;		/* if no error, return now */
		return;		/* if no error, return now */


@@ -631,10 +637,10 @@ static void i5400_process_nonfatal_error_info(struct mem_ctl_info *mci,


		/* Form out message */
		/* Form out message */
		snprintf(msg, sizeof(msg),
		snprintf(msg, sizeof(msg),
			 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
			 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s "
			 "CAS=%d, CE Err=0x%lx (%s))", branch >> 1, bank,
			 "RAS=%d CAS=%d, CE Err=0x%lx (%s))",
			 rdwr_str(rdwr), ras, cas, allErrors,
			 branch >> 1, bank, rdwr_str(rdwr), ras, cas,
			error_name[errnum]);
			 allErrors, error_name[errnum]);


		/* Call the helper to output message */
		/* Call the helper to output message */
		edac_mc_handle_fbd_ce(mci, rank, channel, msg);
		edac_mc_handle_fbd_ce(mci, rank, channel, msg);
@@ -704,16 +710,9 @@ static void i5400_put_devices(struct mem_ctl_info *mci)
	pvt = mci->pvt_info;
	pvt = mci->pvt_info;


	/* Decrement usage count for devices */
	/* Decrement usage count for devices */
	if (pvt->branch_1)
	pci_dev_put(pvt->branch_1);
	pci_dev_put(pvt->branch_1);

	if (pvt->branch_0)
	pci_dev_put(pvt->branch_0);
	pci_dev_put(pvt->branch_0);

	if (pvt->fsb_error_regs)
	pci_dev_put(pvt->fsb_error_regs);
	pci_dev_put(pvt->fsb_error_regs);

	if (pvt->branchmap_werrors)
	pci_dev_put(pvt->branchmap_werrors);
	pci_dev_put(pvt->branchmap_werrors);
}
}


@@ -810,9 +809,9 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
/*
/*
 *	determine_amb_present
 *	determine_amb_present
 *
 *
 *		the information is contained in NUM_MTRS_PER_BRANCH different registers
 *		the information is contained in NUM_MTRS_PER_BRANCH different
 *		determining which of the NUM_MTRS_PER_BRANCH requires knowing
 *		registers determining which of the NUM_MTRS_PER_BRANCH requires
 *		which channel is in question
 *              knowing which channel is in question
 *
 *
 *	2 branches, each with 2 channels
 *	2 branches, each with 2 channels
 *		b0_ambpresent0 for channel '0'
 *		b0_ambpresent0 for channel '0'
@@ -842,7 +841,7 @@ static int determine_amb_present_reg(struct i5400_pvt *pvt, int channel)
/*
/*
 * determine_mtr(pvt, csrow, channel)
 * determine_mtr(pvt, csrow, channel)
 *
 *
 *	return the proper MTR register as determine by the csrow and channel desired
 * return the proper MTR register as determine by the csrow and desired channel
 */
 */
static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel)
static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel)
{
{
@@ -857,7 +856,8 @@ static int determine_mtr(struct i5400_pvt *pvt, int csrow, int channel)
	n = csrow >> 3;
	n = csrow >> 3;


	if (n >= NUM_MTRS_PER_BRANCH) {
	if (n >= NUM_MTRS_PER_BRANCH) {
		debugf0("ERROR: trying to access an invalid csrow: %d\n", csrow);
		debugf0("ERROR: trying to access an invalid csrow: %d\n",
			csrow);
		return 0;
		return 0;
	}
	}


@@ -1251,6 +1251,9 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
	int num_dimms_per_channel;
	int num_dimms_per_channel;
	int num_csrows;
	int num_csrows;


	if (dev_idx >= ARRAY_SIZE(i5400_devs))
		return -EINVAL;

	debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
	debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
		__func__,
		__func__,
		pdev->bus->number,
		pdev->bus->number,
@@ -1425,7 +1428,7 @@ MODULE_DEVICE_TABLE(pci, i5400_pci_tbl);
 *
 *
 */
 */
static struct pci_driver i5400_driver = {
static struct pci_driver i5400_driver = {
	.name = KBUILD_BASENAME,
	.name = "i5400_edac",
	.probe = i5400_init_one,
	.probe = i5400_init_one,
	.remove = __devexit_p(i5400_remove_one),
	.remove = __devexit_p(i5400_remove_one),
	.id_table = i5400_pci_tbl,
	.id_table = i5400_pci_tbl,
@@ -1463,9 +1466,11 @@ module_init(i5400_init);
module_exit(i5400_exit);
module_exit(i5400_exit);


MODULE_LICENSE("GPL");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ben Woodard <woodard@redhat.com> Red Hat Inc. (http://www.redhat.com)");
MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>");
MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com> Red Hat Inc. (http://www.redhat.com)");
MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - " I5400_REVISION);
MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - "
		   I5400_REVISION);


module_param(edac_op_state, int, 0444);
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");