Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 713ad546 authored by Yazen Ghannam's avatar Yazen Ghannam Committed by Borislav Petkov
Browse files

EDAC, amd64: Define and register UMC error decode function



How we need to decode UMC errors is different from how we decode bus
errors, so let's define a new function for this. We also need a way to
determine the UMC channel since we're not guaranteed that there is a
fixed relation between channel and MCA bank.

Signed-off-by: default avatarYazen Ghannam <Yazen.Ghannam@amd.com>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@gmail.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/1480359593-80369-1-git-send-email-Yazen.Ghannam@amd.com


[ Fold in decode_synd_reg(), simplify. ]
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
parent d27f3a34
Loading
Loading
Loading
Loading
+86 −3
Original line number Diff line number Diff line
@@ -2392,7 +2392,13 @@ static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
		string = "Failed to map error addr to a csrow";
		break;
	case ERR_CHANNEL:
		string = "unknown syndrome - possible error reporting race";
		string = "Unknown syndrome - possible error reporting race";
		break;
	case ERR_SYND:
		string = "MCA_SYND not valid - unknown syndrome and csrow";
		break;
	case ERR_NORM_ADDR:
		string = "Cannot decode normalized address";
		break;
	default:
		string = "WTF error";
@@ -2441,6 +2447,76 @@ static inline void decode_bus_error(int node_id, struct mce *m)
	__log_ecc_error(mci, &err, ecc_type);
}

/*
 * To find the UMC channel represented by this bank we need to match on its
 * instance_id. The instance_id of a bank is held in the lower 32 bits of its
 * IPID.
 */
static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
{
	u32 umc_instance_id[] = {0x50f00, 0x150f00};
	u32 instance_id = m->ipid & GENMASK(31, 0);
	int i, channel = -1;

	for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
		if (umc_instance_id[i] == instance_id)
			channel = i;

	return channel;
}

static void decode_umc_error(int node_id, struct mce *m)
{
	u8 ecc_type = (m->status >> 45) & 0x3;
	struct mem_ctl_info *mci;
	struct amd64_pvt *pvt;
	struct err_info err;
	u64 sys_addr;

	mci = edac_mc_find(node_id);
	if (!mci)
		return;

	pvt = mci->pvt_info;

	memset(&err, 0, sizeof(err));

	if (m->status & MCI_STATUS_DEFERRED)
		ecc_type = 3;

	err.channel = find_umc_channel(pvt, m);
	if (err.channel < 0) {
		err.err_code = ERR_CHANNEL;
		goto log_error;
	}

	if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
		err.err_code = ERR_NORM_ADDR;
		goto log_error;
	}

	error_address_to_page_and_offset(sys_addr, &err);

	if (!(m->status & MCI_STATUS_SYNDV)) {
		err.err_code = ERR_SYND;
		goto log_error;
	}

	if (ecc_type == 2) {
		u8 length = (m->synd >> 18) & 0x3f;

		if (length)
			err.syndrome = (m->synd >> 32) & GENMASK(length - 1, 0);
		else
			err.err_code = ERR_CHANNEL;
	}

	err.csrow = m->synd & 0x7;

log_error:
	__log_ecc_error(mci, &err, ecc_type);
}

/*
 * Use pvt->F3 which contains the F3 CPU PCI device to get the related
 * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
@@ -3232,6 +3308,9 @@ static int init_one_instance(unsigned int nid)
	if (report_gart_errors)
		amd_report_gart_errors(true);

	if (pvt->umc)
		amd_register_ecc_decoder(decode_umc_error);
	else
		amd_register_ecc_decoder(decode_bus_error);

	return 0;
@@ -3323,6 +3402,10 @@ static void remove_one_instance(unsigned int nid)

	/* unregister from EDAC MCE */
	amd_report_gart_errors(false);

	if (pvt->umc)
		amd_unregister_ecc_decoder(decode_umc_error);
	else
		amd_unregister_ecc_decoder(decode_bus_error);

	kfree(ecc_stngs[nid]);
+2 −0
Original line number Diff line number Diff line
@@ -382,6 +382,8 @@ enum err_codes {
	ERR_NODE	= -1,
	ERR_CSROW	= -2,
	ERR_CHANNEL	= -3,
	ERR_SYND	= -4,
	ERR_NORM_ADDR	= -5,
};

struct err_info {