Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9eb07a7f authored by Mauro Carvalho Chehab's avatar Mauro Carvalho Chehab
Browse files

edac: edac_mc_handle_error(): add an error_count parameter



In order to avoid loosing error events, it is desirable to group
error events together and generate a single trace for several identical
errors.

The trace API already allows reporting multiple errors. Change the
handle_error function to also allow that.

The changes at the drivers were made by this small script:

	$file .=$_ while (<>);
	$file =~ s/(edac_mc_handle_error)\s*\(([^\,]+)\,([^\,]+)\,/$1($2,$3, 1,/g;
	print $file;

Signed-off-by: default avatarMauro Carvalho Chehab <mchehab@redhat.com>
parent 03f7eae8
Loading
Loading
Loading
Loading
+11 −11
Original line number Diff line number Diff line
@@ -1046,7 +1046,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
	if (!src_mci) {
		amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
			     (unsigned long)sys_addr);
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
				     page, offset, syndrome,
				     -1, -1, -1,
				     "failed to map error addr to a node",
@@ -1057,7 +1057,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
	/* Now map the sys_addr to a CSROW */
	csrow = sys_addr_to_csrow(src_mci, sys_addr);
	if (csrow < 0) {
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
				     page, offset, syndrome,
				     -1, -1, -1,
				     "failed to map error addr to a csrow",
@@ -1077,7 +1077,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
			amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
				      "possible error reporting race\n",
				      syndrome);
			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
					     page, offset, syndrome,
					     csrow, -1, -1,
					     "unknown syndrome - possible error reporting race",
@@ -1096,7 +1096,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
		channel = ((sys_addr & BIT(3)) != 0);
	}

	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci,
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1,
			     page, offset, syndrome,
			     csrow, channel, -1,
			     "", "");
@@ -1608,7 +1608,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
	csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);

	if (csrow < 0) {
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
				     page, offset, syndrome,
				     -1, -1, -1,
				     "failed to map error addr to a csrow",
@@ -1624,7 +1624,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
	if (dct_ganging_enabled(pvt))
		chan = get_channel_from_ecc_syndrome(mci, syndrome);

	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
			     page, offset, syndrome,
			     csrow, chan, -1,
			     "", "");
@@ -1909,7 +1909,7 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
	/* Ensure that the Error Address is VALID */
	if (!(m->status & MCI_STATUS_ADDRV)) {
		amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
				     0, 0, 0,
				     -1, -1, -1,
				     "HW has no ERROR_ADDRESS available",
@@ -1937,7 +1937,7 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)

	if (!(m->status & MCI_STATUS_ADDRV)) {
		amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
				     0, 0, 0,
				     -1, -1, -1,
				     "HW has no ERROR_ADDRESS available",
@@ -1956,7 +1956,7 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
	if (!src_mci) {
		amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
				  (unsigned long)sys_addr);
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
				     page, offset, 0,
				     -1, -1, -1,
				     "ERROR ADDRESS NOT mapped to a MC",
@@ -1970,13 +1970,13 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
	if (csrow < 0) {
		amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
				  (unsigned long)sys_addr);
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
				     page, offset, 0,
				     -1, -1, -1,
				     "ERROR ADDRESS NOT mapped to CS",
				     "");
	} else {
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
				     page, offset, 0,
				     csrow, -1, -1,
				     "", "");
+2 −2
Original line number Diff line number Diff line
@@ -145,7 +145,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,

		if (handle_errors) {
			row = (info->ecc_mode_status >> 4) & 0xf;
			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
					     mci->csrows[row]->first_page, 0, 0,
					     row, 0, -1,
					     mci->ctl_name, "");
@@ -160,7 +160,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,

		if (handle_errors) {
			row = info->ecc_mode_status & 0xf;
			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
					     mci->csrows[row]->first_page, 0, 0,
					     row, 0, -1,
					     mci->ctl_name, "");
+2 −2
Original line number Diff line number Diff line
@@ -48,7 +48,7 @@ static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar)
	syndrome = (ar & 0x000000001fe00000ul) >> 21;

	/* TODO: Decoding of the error address */
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
			     csrow->first_page + pfn, offset, syndrome,
			     0, chan, -1, "", "");
}
@@ -70,7 +70,7 @@ static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
	offset = address & ~PAGE_MASK;

	/* TODO: Decoding of the error address */
	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
			     csrow->first_page + pfn, offset, 0,
			     0, chan, -1, "", "");
}
+2 −2
Original line number Diff line number Diff line
@@ -554,7 +554,7 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
	if (apiexcp & CECC_EXCP_DETECTED) {
		cpc925_mc_printk(mci, KERN_INFO, "DRAM CECC Fault\n");
		channel = cpc925_mc_find_channel(mci, syndrome);
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
				     pfn, offset, syndrome,
				     csrow, channel, -1,
				     mci->ctl_name, "");
@@ -562,7 +562,7 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)

	if (apiexcp & UECC_EXCP_DETECTED) {
		cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n");
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
				     pfn, offset, 0,
				     csrow, -1, -1,
				     mci->ctl_name, "");
+4 −4
Original line number Diff line number Diff line
@@ -371,7 +371,7 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one,
	channel = !(error_one & 1);

	/* e752x mc reads 34:6 of the DRAM linear address */
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
			     page, offset_in_page(sec1_add << 4), sec1_syndrome,
			     row, channel, -1,
			     "e752x CE", "");
@@ -408,7 +408,7 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
			edac_mc_find_csrow_by_page(mci, block_page);

		/* e752x mc reads 34:6 of the DRAM linear address */
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
					block_page,
					offset_in_page(error_2b << 4), 0,
					 row, -1, -1,
@@ -427,7 +427,7 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
			edac_mc_find_csrow_by_page(mci, block_page);

		/* e752x mc reads 34:6 of the DRAM linear address */
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
					block_page,
					offset_in_page(error_2b << 4), 0,
					row, -1, -1,
@@ -454,7 +454,7 @@ static inline void process_ue_no_info_wr(struct mem_ctl_info *mci,
		return;

	edac_dbg(3, "\n");
	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
			     -1, -1, -1,
			     "e752x UE log memory write", "");
}
Loading