Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 87a5af24 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull EDAC internal API changes from Mauro Carvalho Chehab:
 "This changeset is the first part of a series of patches that fixes the
  EDAC sybsystem.  On this set, it changes the Kernel EDAC API in order
  to properly represent the Intel i3/i5/i7, Xeon 3xxx/5xxx/7xxx, and
  Intel E5-xxxx memory controllers.

  The EDAC core used to assume that:

       - the DRAM chip select pin is directly accessed by the memory
         controller

       - when multiple channels are used, they're all filled with the
         same type of memory.

  None of the above premises is true on Intel memory controllers since
  2002, when RAMBUS and FB-DIMMs were introduced, and Advanced Memory
  Buffer or by some similar technologies hides the direct access to the
  DRAM pins.

  So, the existing drivers for those chipsets had to lie to the EDAC
  core, in general telling that just one channel is filled.  That
  produces some hard to understand error messages like:

       EDAC MC0: CE row 3, channel 0, label "DIMM1": 1 Unknown error(s): memory read error on FATAL area : cpu=0 Err=0008:00c2 (ch=2), addr = 0xad1f73480 => socket=0, Channel=0(mask=2), rank=1

  The location information there (row3 channel 0) is completely bogus:
  it has no physical meaning, and are just some random values that the
  driver uses to talk with the EDAC core.  The error actually happened
  at CPU socket 0, channel 0, slot 1, but this is not reported anywhere,
  as the EDAC core doesn't know anything about the memory layout.  So,
  only advanced users that know how the EDAC driver works and that tests
  their systems to see how DIMMs are mapped can actually benefit for
  such error logs.

  This patch series fixes the error report logic, in order to allow the
  EDAC to expose the memory architecture used by them to the EDAC core.
  So, as the EDAC core now understands how the memory is organized, it
  can provide an useful report:

       EDAC MC0: CE memory read error on DIMM1 (channel:0 slot:1 page:0x364b1b offset:0x600 grain:32 syndrome:0x0 - count:1 area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:4)

  The location of the DIMM where the error happened is reported by "MC0"
  (cpu socket #0), at "channel:0 slot:1" location, and matches the
  physical location of the DIMM.

  There are two remaining issues not covered by this patch series:

       - The EDAC sysfs API will still report bogus values.  So,
         userspace tools like edac-utils will still use the bogus data;

       - Add a new tracepoint-based way to get the binary information
         about the errors.

  Those are on a second series of patches (also at -next), but will
  probably miss the train for 3.5, due to the slow review process."

Fix up trivial conflict (due to spelling correction of removed code) in
drivers/edac/edac_device.c

* git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac: (42 commits)
  i7core: fix ranks information at the per-channel struct
  i5000: Fix the fatal error handling
  i5100_edac: Fix a warning when compiled with 32 bits
  i82975x_edac: Test nr_pages earlier to save a few CPU cycles
  e752x_edac: provide more info about how DIMMS/ranks are mapped
  i5000_edac: Fix the logic that retrieves memory information
  i5400_edac: improve debug messages to better represent the filled memory
  edac: Cleanup the logs for i7core and sb edac drivers
  edac: Initialize the dimm label with the known information
  edac: Remove the legacy EDAC ABI
  x38_edac: convert driver to use the new edac ABI
  tile_edac: convert driver to use the new edac ABI
  sb_edac: convert driver to use the new edac ABI
  r82600_edac: convert driver to use the new edac ABI
  ppc4xx_edac: convert driver to use the new edac ABI
  pasemi_edac: convert driver to use the new edac ABI
  mv64x60_edac: convert driver to use the new edac ABI
  mpc85xx_edac: convert driver to use the new edac ABI
  i82975x_edac: convert driver to use the new edac ABI
  i82875p_edac: convert driver to use the new edac ABI
  ...
parents 7e5b2db7 0bf09e82
Loading
Loading
Loading
Loading
+111 −89
Original line number Diff line number Diff line
@@ -715,25 +715,6 @@ static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
				     input_addr_to_dram_addr(mci, input_addr));
}

/*
 * Find the minimum and maximum InputAddr values that map to the given @csrow.
 * Pass back these values in *input_addr_min and *input_addr_max.
 */
static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
			      u64 *input_addr_min, u64 *input_addr_max)
{
	struct amd64_pvt *pvt;
	u64 base, mask;

	pvt = mci->pvt_info;
	BUG_ON((csrow < 0) || (csrow >= pvt->csels[0].b_cnt));

	get_cs_base_and_mask(pvt, csrow, 0, &base, &mask);

	*input_addr_min = base & ~mask;
	*input_addr_max = base | mask;
}

/* Map the Error address to a PAGE and PAGE OFFSET. */
static inline void error_address_to_page_and_offset(u64 error_address,
						    u32 *page, u32 *offset)
@@ -1058,6 +1039,37 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
	int channel, csrow;
	u32 page, offset;

	error_address_to_page_and_offset(sys_addr, &page, &offset);

	/*
	 * Find out which node the error address belongs to. This may be
	 * different from the node that detected the error.
	 */
	src_mci = find_mc_by_sys_addr(mci, sys_addr);
	if (!src_mci) {
		amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
			     (unsigned long)sys_addr);
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
				     page, offset, syndrome,
				     -1, -1, -1,
				     EDAC_MOD_STR,
				     "failed to map error addr to a node",
				     NULL);
		return;
	}

	/* Now map the sys_addr to a CSROW */
	csrow = sys_addr_to_csrow(src_mci, sys_addr);
	if (csrow < 0) {
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
				     page, offset, syndrome,
				     -1, -1, -1,
				     EDAC_MOD_STR,
				     "failed to map error addr to a csrow",
				     NULL);
		return;
	}

	/* CHIPKILL enabled */
	if (pvt->nbcfg & NBCFG_CHIPKILL) {
		channel = get_channel_from_ecc_syndrome(mci, syndrome);
@@ -1067,9 +1079,15 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
			 * 2 DIMMs is in error. So we need to ID 'both' of them
			 * as suspect.
			 */
			amd64_mc_warn(mci, "unknown syndrome 0x%04x - possible "
					   "error reporting race\n", syndrome);
			edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
			amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
				      "possible error reporting race\n",
				      syndrome);
			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
					     page, offset, syndrome,
					     csrow, -1, -1,
					     EDAC_MOD_STR,
					     "unknown syndrome - possible error reporting race",
					     NULL);
			return;
		}
	} else {
@@ -1084,28 +1102,10 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
		channel = ((sys_addr & BIT(3)) != 0);
	}

	/*
	 * Find out which node the error address belongs to. This may be
	 * different from the node that detected the error.
	 */
	src_mci = find_mc_by_sys_addr(mci, sys_addr);
	if (!src_mci) {
		amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
			     (unsigned long)sys_addr);
		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
		return;
	}

	/* Now map the sys_addr to a CSROW */
	csrow = sys_addr_to_csrow(src_mci, sys_addr);
	if (csrow < 0) {
		edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR);
	} else {
		error_address_to_page_and_offset(sys_addr, &page, &offset);

		edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow,
				  channel, EDAC_MOD_STR);
	}
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci,
			     page, offset, syndrome,
			     csrow, channel, -1,
			     EDAC_MOD_STR, "", NULL);
}

static int ddr2_cs_size(unsigned i, bool dct_width)
@@ -1611,15 +1611,20 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
	u32 page, offset;
	int nid, csrow, chan = 0;

	error_address_to_page_and_offset(sys_addr, &page, &offset);

	csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);

	if (csrow < 0) {
		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
				     page, offset, syndrome,
				     -1, -1, -1,
				     EDAC_MOD_STR,
				     "failed to map error addr to a csrow",
				     NULL);
		return;
	}

	error_address_to_page_and_offset(sys_addr, &page, &offset);

	/*
	 * We need the syndromes for channel detection only when we're
	 * ganged. Otherwise @chan should already contain the channel at
@@ -1628,16 +1633,10 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
	if (dct_ganging_enabled(pvt))
		chan = get_channel_from_ecc_syndrome(mci, syndrome);

	if (chan >= 0)
		edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan,
				  EDAC_MOD_STR);
	else
		/*
		 * Channel unknown, report all channels on this CSROW as failed.
		 */
		for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++)
			edac_mc_handle_ce(mci, page, offset, syndrome,
					  csrow, chan, EDAC_MOD_STR);
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
			     page, offset, syndrome,
			     csrow, chan, -1,
			     EDAC_MOD_STR, "", NULL);
}

/*
@@ -1918,7 +1917,12 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
	/* Ensure that the Error Address is VALID */
	if (!(m->status & MCI_STATUS_ADDRV)) {
		amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
		edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
				     0, 0, 0,
				     -1, -1, -1,
				     EDAC_MOD_STR,
				     "HW has no ERROR_ADDRESS available",
				     NULL);
		return;
	}

@@ -1942,11 +1946,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)

	if (!(m->status & MCI_STATUS_ADDRV)) {
		amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
				     0, 0, 0,
				     -1, -1, -1,
				     EDAC_MOD_STR,
				     "HW has no ERROR_ADDRESS available",
				     NULL);
		return;
	}

	sys_addr = get_error_address(m);
	error_address_to_page_and_offset(sys_addr, &page, &offset);

	/*
	 * Find out which node the error address belongs to. This may be
@@ -1956,7 +1966,11 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
	if (!src_mci) {
		amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
				  (unsigned long)sys_addr);
		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
				     page, offset, 0,
				     -1, -1, -1,
				     EDAC_MOD_STR,
				     "ERROR ADDRESS NOT mapped to a MC", NULL);
		return;
	}

@@ -1966,10 +1980,17 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
	if (csrow < 0) {
		amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
				  (unsigned long)sys_addr);
		edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
				     page, offset, 0,
				     -1, -1, -1,
				     EDAC_MOD_STR,
				     "ERROR ADDRESS NOT mapped to CS",
				     NULL);
	} else {
		error_address_to_page_and_offset(sys_addr, &page, &offset);
		edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR);
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
				     page, offset, 0,
				     csrow, -1, -1,
				     EDAC_MOD_STR, "", NULL);
	}
}

@@ -2171,7 +2192,7 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
	nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);

	debugf0("  (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode);
	debugf0("    nr_pages= %u  channel-count = %d\n",
	debugf0("    nr_pages/channel= %u  channel-count = %d\n",
		nr_pages, pvt->channel_count);

	return nr_pages;
@@ -2185,9 +2206,12 @@ static int init_csrows(struct mem_ctl_info *mci)
{
	struct csrow_info *csrow;
	struct amd64_pvt *pvt = mci->pvt_info;
	u64 input_addr_min, input_addr_max, sys_addr, base, mask;
	u64 base, mask;
	u32 val;
	int i, empty = 1;
	int i, j, empty = 1;
	enum mem_type mtype;
	enum edac_type edac_mode;
	int nr_pages = 0;

	amd64_read_pci_cfg(pvt->F3, NBCFG, &val);

@@ -2211,41 +2235,32 @@ static int init_csrows(struct mem_ctl_info *mci)

		empty = 0;
		if (csrow_enabled(i, 0, pvt))
			csrow->nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
			nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
		if (csrow_enabled(i, 1, pvt))
			csrow->nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
		find_csrow_limits(mci, i, &input_addr_min, &input_addr_max);
		sys_addr = input_addr_to_sys_addr(mci, input_addr_min);
		csrow->first_page = (u32) (sys_addr >> PAGE_SHIFT);
		sys_addr = input_addr_to_sys_addr(mci, input_addr_max);
		csrow->last_page = (u32) (sys_addr >> PAGE_SHIFT);
			nr_pages += amd64_csrow_nr_pages(pvt, 1, i);

		get_cs_base_and_mask(pvt, i, 0, &base, &mask);
		csrow->page_mask = ~mask;
		/* 8 bytes of resolution */

		csrow->mtype = amd64_determine_memory_type(pvt, i);
		mtype = amd64_determine_memory_type(pvt, i);

		debugf1("  for MC node %d csrow %d:\n", pvt->mc_node_id, i);
		debugf1("    input_addr_min: 0x%lx input_addr_max: 0x%lx\n",
			(unsigned long)input_addr_min,
			(unsigned long)input_addr_max);
		debugf1("    sys_addr: 0x%lx  page_mask: 0x%lx\n",
			(unsigned long)sys_addr, csrow->page_mask);
		debugf1("    nr_pages: %u  first_page: 0x%lx "
			"last_page: 0x%lx\n",
			(unsigned)csrow->nr_pages,
			csrow->first_page, csrow->last_page);
		debugf1("    nr_pages: %u\n", nr_pages * pvt->channel_count);

		/*
		 * determine whether CHIPKILL or JUST ECC or NO ECC is operating
		 */
		if (pvt->nbcfg & NBCFG_ECC_ENABLE)
			csrow->edac_mode =
			    (pvt->nbcfg & NBCFG_CHIPKILL) ?
			edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ?
				    EDAC_S4ECD4ED : EDAC_SECDED;
		else
			csrow->edac_mode = EDAC_NONE;
			edac_mode = EDAC_NONE;

		for (j = 0; j < pvt->channel_count; j++) {
			csrow->channels[j].dimm->mtype = mtype;
			csrow->channels[j].dimm->edac_mode = edac_mode;
			csrow->channels[j].dimm->nr_pages = nr_pages;
		}
	}

	return empty;
@@ -2540,6 +2555,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
	struct amd64_pvt *pvt = NULL;
	struct amd64_family_type *fam_type = NULL;
	struct mem_ctl_info *mci = NULL;
	struct edac_mc_layer layers[2];
	int err = 0, ret;
	u8 nid = get_node_id(F2);

@@ -2574,7 +2590,13 @@ static int amd64_init_one_instance(struct pci_dev *F2)
		goto err_siblings;

	ret = -ENOMEM;
	mci = edac_mc_alloc(0, pvt->csels[0].b_cnt, pvt->channel_count, nid);
	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
	layers[0].size = pvt->csels[0].b_cnt;
	layers[0].is_virt_csrow = true;
	layers[1].type = EDAC_MC_LAYER_CHANNEL;
	layers[1].size = pvt->channel_count;
	layers[1].is_virt_csrow = false;
	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
	if (!mci)
		goto err_siblings;

+27 −15
Original line number Diff line number Diff line
@@ -29,7 +29,6 @@
	edac_mc_chipset_printk(mci, level, "amd76x", fmt, ##arg)

#define AMD76X_NR_CSROWS 8
#define AMD76X_NR_CHANS  1
#define AMD76X_NR_DIMMS  4

/* AMD 76x register addresses - device 0 function 0 - PCI bridge */
@@ -146,8 +145,10 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,

		if (handle_errors) {
			row = (info->ecc_mode_status >> 4) & 0xf;
			edac_mc_handle_ue(mci, mci->csrows[row].first_page, 0,
					row, mci->ctl_name);
			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
					     mci->csrows[row].first_page, 0, 0,
					     row, 0, -1,
					     mci->ctl_name, "", NULL);
		}
	}

@@ -159,8 +160,10 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,

		if (handle_errors) {
			row = info->ecc_mode_status & 0xf;
			edac_mc_handle_ce(mci, mci->csrows[row].first_page, 0,
					0, row, 0, mci->ctl_name);
			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
					     mci->csrows[row].first_page, 0, 0,
					     row, 0, -1,
					     mci->ctl_name, "", NULL);
		}
	}

@@ -186,11 +189,13 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
			enum edac_type edac_mode)
{
	struct csrow_info *csrow;
	struct dimm_info *dimm;
	u32 mba, mba_base, mba_mask, dms;
	int index;

	for (index = 0; index < mci->nr_csrows; index++) {
		csrow = &mci->csrows[index];
		dimm = csrow->channels[0].dimm;

		/* find the DRAM Chip Select Base address and mask */
		pci_read_config_dword(pdev,
@@ -203,13 +208,13 @@ static void amd76x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
		mba_mask = ((mba & 0xff80) << 16) | 0x7fffffUL;
		pci_read_config_dword(pdev, AMD76X_DRAM_MODE_STATUS, &dms);
		csrow->first_page = mba_base >> PAGE_SHIFT;
		csrow->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
		dimm->nr_pages = (mba_mask + 1) >> PAGE_SHIFT;
		csrow->last_page = csrow->first_page + dimm->nr_pages - 1;
		csrow->page_mask = mba_mask >> PAGE_SHIFT;
		csrow->grain = csrow->nr_pages << PAGE_SHIFT;
		csrow->mtype = MEM_RDDR;
		csrow->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
		csrow->edac_mode = edac_mode;
		dimm->grain = dimm->nr_pages << PAGE_SHIFT;
		dimm->mtype = MEM_RDDR;
		dimm->dtype = ((dms >> index) & 0x1) ? DEV_X4 : DEV_UNKNOWN;
		dimm->edac_mode = edac_mode;
	}
}

@@ -230,7 +235,8 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx)
		EDAC_SECDED,
		EDAC_SECDED
	};
	struct mem_ctl_info *mci = NULL;
	struct mem_ctl_info *mci;
	struct edac_mc_layer layers[2];
	u32 ems;
	u32 ems_mode;
	struct amd76x_error_info discard;
@@ -238,11 +244,17 @@ static int amd76x_probe1(struct pci_dev *pdev, int dev_idx)
	debugf0("%s()\n", __func__);
	pci_read_config_dword(pdev, AMD76X_ECC_MODE_STATUS, &ems);
	ems_mode = (ems >> 10) & 0x3;
	mci = edac_mc_alloc(0, AMD76X_NR_CSROWS, AMD76X_NR_CHANS, 0);

	if (mci == NULL) {
	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
	layers[0].size = AMD76X_NR_CSROWS;
	layers[0].is_virt_csrow = true;
	layers[1].type = EDAC_MC_LAYER_CHANNEL;
	layers[1].size = 1;
	layers[1].is_virt_csrow = false;
	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);

	if (mci == NULL)
		return -ENOMEM;
	}

	debugf0("%s(): mci = %p\n", __func__, mci);
	mci->dev = &pdev->dev;
+31 −11
Original line number Diff line number Diff line
@@ -48,8 +48,9 @@ static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar)
	syndrome = (ar & 0x000000001fe00000ul) >> 21;

	/* TODO: Decoding of the error address */
	edac_mc_handle_ce(mci, csrow->first_page + pfn, offset,
			  syndrome, 0, chan, "");
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
			     csrow->first_page + pfn, offset, syndrome,
			     0, chan, -1, "", "", NULL);
}

static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
@@ -69,7 +70,9 @@ static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
	offset = address & ~PAGE_MASK;

	/* TODO: Decoding of the error address */
	edac_mc_handle_ue(mci, csrow->first_page + pfn, offset, 0, "");
	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
			     csrow->first_page + pfn, offset, 0,
			     0, chan, -1, "", "", NULL);
}

static void cell_edac_check(struct mem_ctl_info *mci)
@@ -124,8 +127,11 @@ static void cell_edac_check(struct mem_ctl_info *mci)
static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
{
	struct csrow_info		*csrow = &mci->csrows[0];
	struct dimm_info		*dimm;
	struct cell_edac_priv		*priv = mci->pvt_info;
	struct device_node		*np;
	int				j;
	u32				nr_pages;

	for (np = NULL;
	     (np = of_find_node_by_name(np, "memory")) != NULL;) {
@@ -140,15 +146,20 @@ static void __devinit cell_edac_init_csrows(struct mem_ctl_info *mci)
		if (of_node_to_nid(np) != priv->node)
			continue;
		csrow->first_page = r.start >> PAGE_SHIFT;
		csrow->nr_pages = resource_size(&r) >> PAGE_SHIFT;
		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
		csrow->mtype = MEM_XDR;
		csrow->edac_mode = EDAC_SECDED;
		nr_pages = resource_size(&r) >> PAGE_SHIFT;
		csrow->last_page = csrow->first_page + nr_pages - 1;

		for (j = 0; j < csrow->nr_channels; j++) {
			dimm = csrow->channels[j].dimm;
			dimm->mtype = MEM_XDR;
			dimm->edac_mode = EDAC_SECDED;
			dimm->nr_pages = nr_pages / csrow->nr_channels;
		}
		dev_dbg(mci->dev,
			"Initialized on node %d, chanmask=0x%x,"
			" first_page=0x%lx, nr_pages=0x%x\n",
			priv->node, priv->chanmask,
			csrow->first_page, csrow->nr_pages);
			csrow->first_page, nr_pages);
		break;
	}
}
@@ -157,9 +168,10 @@ static int __devinit cell_edac_probe(struct platform_device *pdev)
{
	struct cbe_mic_tm_regs __iomem	*regs;
	struct mem_ctl_info		*mci;
	struct edac_mc_layer		layers[2];
	struct cell_edac_priv		*priv;
	u64				reg;
	int				rc, chanmask;
	int				rc, chanmask, num_chans;

	regs = cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(pdev->id));
	if (regs == NULL)
@@ -184,8 +196,16 @@ static int __devinit cell_edac_probe(struct platform_device *pdev)
		in_be64(&regs->mic_fir));

	/* Allocate & init EDAC MC data structure */
	mci = edac_mc_alloc(sizeof(struct cell_edac_priv), 1,
			    chanmask == 3 ? 2 : 1, pdev->id);
	num_chans = chanmask == 3 ? 2 : 1;

	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
	layers[0].size = 1;
	layers[0].is_virt_csrow = true;
	layers[1].type = EDAC_MC_LAYER_CHANNEL;
	layers[1].size = num_chans;
	layers[1].is_virt_csrow = false;
	mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers,
			    sizeof(struct cell_edac_priv));
	if (mci == NULL)
		return -ENOMEM;
	priv = mci->pvt_info;
+55 −36
Original line number Diff line number Diff line
@@ -329,9 +329,10 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci)
{
	struct cpc925_mc_pdata *pdata = mci->pvt_info;
	struct csrow_info *csrow;
	int index;
	struct dimm_info *dimm;
	int index, j;
	u32 mbmr, mbbar, bba;
	unsigned long row_size, last_nr_pages = 0;
	unsigned long row_size, nr_pages, last_nr_pages = 0;

	get_total_mem(pdata);

@@ -350,20 +351,24 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci)

		row_size = bba * (1UL << 28);	/* 256M */
		csrow->first_page = last_nr_pages;
		csrow->nr_pages = row_size >> PAGE_SHIFT;
		csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
		nr_pages = row_size >> PAGE_SHIFT;
		csrow->last_page = csrow->first_page + nr_pages - 1;
		last_nr_pages = csrow->last_page + 1;

		csrow->mtype = MEM_RDDR;
		csrow->edac_mode = EDAC_SECDED;
		for (j = 0; j < csrow->nr_channels; j++) {
			dimm = csrow->channels[j].dimm;

			dimm->nr_pages = nr_pages / csrow->nr_channels;
			dimm->mtype = MEM_RDDR;
			dimm->edac_mode = EDAC_SECDED;

			switch (csrow->nr_channels) {
			case 1: /* Single channel */
			csrow->grain = 32; /* four-beat burst of 32 bytes */
				dimm->grain = 32; /* four-beat burst of 32 bytes */
				break;
			case 2: /* Dual channel */
			default:
			csrow->grain = 64; /* four-beat burst of 64 bytes */
				dimm->grain = 64; /* four-beat burst of 64 bytes */
				break;
			}

@@ -371,18 +376,19 @@ static void cpc925_init_csrows(struct mem_ctl_info *mci)
			case 6: /* 0110, no way to differentiate X8 VS X16 */
			case 5:	/* 0101 */
			case 8: /* 1000 */
			csrow->dtype = DEV_X16;
				dimm->dtype = DEV_X16;
				break;
			case 7: /* 0111 */
			case 9: /* 1001 */
			csrow->dtype = DEV_X8;
				dimm->dtype = DEV_X8;
				break;
			default:
			csrow->dtype = DEV_UNKNOWN;
				dimm->dtype = DEV_UNKNOWN;
				break;
			}
		}
	}
}

/* Enable memory controller ECC detection */
static void cpc925_mc_init(struct mem_ctl_info *mci)
@@ -549,13 +555,18 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
	if (apiexcp & CECC_EXCP_DETECTED) {
		cpc925_mc_printk(mci, KERN_INFO, "DRAM CECC Fault\n");
		channel = cpc925_mc_find_channel(mci, syndrome);
		edac_mc_handle_ce(mci, pfn, offset, syndrome,
				  csrow, channel, mci->ctl_name);
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
				     pfn, offset, syndrome,
				     csrow, channel, -1,
				     mci->ctl_name, "", NULL);
	}

	if (apiexcp & UECC_EXCP_DETECTED) {
		cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n");
		edac_mc_handle_ue(mci, pfn, offset, csrow, mci->ctl_name);
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
				     pfn, offset, 0,
				     csrow, -1, -1,
				     mci->ctl_name, "", NULL);
	}

	cpc925_mc_printk(mci, KERN_INFO, "Dump registers:\n");
@@ -927,6 +938,7 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
{
	static int edac_mc_idx;
	struct mem_ctl_info *mci;
	struct edac_mc_layer layers[2];
	void __iomem *vbase;
	struct cpc925_mc_pdata *pdata;
	struct resource *r;
@@ -962,9 +974,16 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
		goto err2;
	}

	nr_channels = cpc925_mc_get_channels(vbase);
	mci = edac_mc_alloc(sizeof(struct cpc925_mc_pdata),
			CPC925_NR_CSROWS, nr_channels + 1, edac_mc_idx);
	nr_channels = cpc925_mc_get_channels(vbase) + 1;

	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
	layers[0].size = CPC925_NR_CSROWS;
	layers[0].is_virt_csrow = true;
	layers[1].type = EDAC_MC_LAYER_CHANNEL;
	layers[1].size = nr_channels;
	layers[1].is_virt_csrow = false;
	mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
			    sizeof(struct cpc925_mc_pdata));
	if (!mci) {
		cpc925_printk(KERN_ERR, "No memory for mem_ctl_info\n");
		res = -ENOMEM;
+79 −37
Original line number Diff line number Diff line
@@ -4,7 +4,11 @@
 * This file may be distributed under the terms of the
 * GNU General Public License.
 *
 * See "enum e752x_chips" below for supported chipsets
 * Implement support for the e7520, E7525, e7320 and i3100 memory controllers.
 *
 * Datasheets:
 *	http://www.intel.in/content/www/in/en/chipsets/e7525-memory-controller-hub-datasheet.html
 *	ftp://download.intel.com/design/intarch/datashts/31345803.pdf
 *
 * Written by Tom Zimmerman
 *
@@ -13,8 +17,6 @@
 * 	Wang Zhenyu at intel.com
 * 	Dave Jiang at mvista.com
 *
 * $Id: edac_e752x.c,v 1.5.2.11 2005/10/05 00:43:44 dsp_llnl Exp $
 *
 */

#include <linux/module.h>
@@ -187,6 +189,25 @@ enum e752x_chips {
	I3100 = 3
};

/*
 * Those chips Support single-rank and dual-rank memories only.
 *
 * On e752x chips, the odd rows are present only on dual-rank memories.
 * Dividing the rank by two will provide the dimm#
 *
 * i3100 MC has a different mapping: it supports only 4 ranks.
 *
 * The mapping is (from 1 to n):
 *	slot	   single-ranked	double-ranked
 *	dimm #1 -> rank #4		NA
 *	dimm #2 -> rank #3		NA
 *	dimm #3 -> rank #2		Ranks 2 and 3
 *	dimm #4 -> rank $1		Ranks 1 and 4
 *
 * FIXME: The current mapping for i3100 considers that it supports up to 8
 *	  ranks/chanel, but datasheet says that the MC supports only 4 ranks.
 */

struct e752x_pvt {
	struct pci_dev *bridge_ck;
	struct pci_dev *dev_d0f0;
@@ -350,8 +371,10 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one,
	channel = !(error_one & 1);

	/* e752x mc reads 34:6 of the DRAM linear address */
	edac_mc_handle_ce(mci, page, offset_in_page(sec1_add << 4),
			sec1_syndrome, row, channel, "e752x CE");
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
			     page, offset_in_page(sec1_add << 4), sec1_syndrome,
			     row, channel, -1,
			     "e752x CE", "", NULL);
}

static inline void process_ce(struct mem_ctl_info *mci, u16 error_one,
@@ -385,9 +408,12 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
			edac_mc_find_csrow_by_page(mci, block_page);

		/* e752x mc reads 34:6 of the DRAM linear address */
		edac_mc_handle_ue(mci, block_page,
				offset_in_page(error_2b << 4),
				row, "e752x UE from Read");
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
					block_page,
					offset_in_page(error_2b << 4), 0,
					 row, -1, -1,
					"e752x UE from Read", "", NULL);

	}
	if (error_one & 0x0404) {
		error_2b = scrb_add;
@@ -401,9 +427,11 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
			edac_mc_find_csrow_by_page(mci, block_page);

		/* e752x mc reads 34:6 of the DRAM linear address */
		edac_mc_handle_ue(mci, block_page,
				offset_in_page(error_2b << 4),
				row, "e752x UE from Scruber");
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
					block_page,
					offset_in_page(error_2b << 4), 0,
					row, -1, -1,
					"e752x UE from Scruber", "", NULL);
	}
}

@@ -426,7 +454,9 @@ static inline void process_ue_no_info_wr(struct mem_ctl_info *mci,
		return;

	debugf3("%s()\n", __func__);
	edac_mc_handle_ue_no_info(mci, "e752x UE log memory write");
	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
			     -1, -1, -1,
			     "e752x UE log memory write", "", NULL);
}

static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error,
@@ -1044,7 +1074,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
	int drc_drbg;		/* DRB granularity 0=64mb, 1=128mb */
	int drc_ddim;		/* DRAM Data Integrity Mode 0=none, 2=edac */
	u8 value;
	u32 dra, drc, cumul_size;
	u32 dra, drc, cumul_size, i, nr_pages;

	dra = 0;
	for (index = 0; index < 4; index++) {
@@ -1053,7 +1083,7 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
		dra |= dra_reg << (index * 8);
	}
	pci_read_config_dword(pdev, E752X_DRC, &drc);
	drc_chan = dual_channel_active(ddrcsr);
	drc_chan = dual_channel_active(ddrcsr) ? 1 : 0;
	drc_drbg = drc_chan + 1;	/* 128 in dual mode, 64 in single */
	drc_ddim = (drc >> 20) & 0x3;

@@ -1078,11 +1108,17 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,

		csrow->first_page = last_cumul_size;
		csrow->last_page = cumul_size - 1;
		csrow->nr_pages = cumul_size - last_cumul_size;
		nr_pages = cumul_size - last_cumul_size;
		last_cumul_size = cumul_size;
		csrow->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
		csrow->mtype = MEM_RDDR;	/* only one type supported */
		csrow->dtype = mem_dev ? DEV_X4 : DEV_X8;

		for (i = 0; i < csrow->nr_channels; i++) {
			struct dimm_info *dimm = csrow->channels[i].dimm;

			debugf3("Initializing rank at (%i,%i)\n", index, i);
			dimm->nr_pages = nr_pages / csrow->nr_channels;
			dimm->grain = 1 << 12;	/* 4KiB - resolution of CELOG */
			dimm->mtype = MEM_RDDR;	/* only one type supported */
			dimm->dtype = mem_dev ? DEV_X4 : DEV_X8;

			/*
			* if single channel or x8 devices then SECDED
@@ -1090,14 +1126,15 @@ static void e752x_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev,
			*/
			if (drc_ddim) {
				if (drc_chan && mem_dev) {
				csrow->edac_mode = EDAC_S4ECD4ED;
					dimm->edac_mode = EDAC_S4ECD4ED;
					mci->edac_cap |= EDAC_FLAG_S4ECD4ED;
				} else {
				csrow->edac_mode = EDAC_SECDED;
					dimm->edac_mode = EDAC_SECDED;
					mci->edac_cap |= EDAC_FLAG_SECDED;
				}
			} else
			csrow->edac_mode = EDAC_NONE;
				dimm->edac_mode = EDAC_NONE;
		}
	}
}

@@ -1226,6 +1263,7 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
	u16 pci_data;
	u8 stat8;
	struct mem_ctl_info *mci;
	struct edac_mc_layer layers[2];
	struct e752x_pvt *pvt;
	u16 ddrcsr;
	int drc_chan;		/* Number of channels 0=1chan,1=2chan */
@@ -1252,11 +1290,15 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
	/* Dual channel = 1, Single channel = 0 */
	drc_chan = dual_channel_active(ddrcsr);

	mci = edac_mc_alloc(sizeof(*pvt), E752X_NR_CSROWS, drc_chan + 1, 0);

	if (mci == NULL) {
	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
	layers[0].size = E752X_NR_CSROWS;
	layers[0].is_virt_csrow = true;
	layers[1].type = EDAC_MC_LAYER_CHANNEL;
	layers[1].size = drc_chan + 1;
	layers[1].is_virt_csrow = false;
	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
	if (mci == NULL)
		return -ENOMEM;
	}

	debugf3("%s(): init mci\n", __func__);
	mci->mtype_cap = MEM_FLAG_RDDR;
Loading