Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0b21f21a authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull more EDAC updates from Borislav Petkov:
 "The second part of the EDAC pile which contains the ADXL user and a
  build fix which addresses a not-so-sensical .config but fixes
  randconfig builds people do:

   - skx_edac: Address translation for NVDIMMs (Tony Luck and Qiuxu Zhuo)

   - ACPI_ADXL build fix"

[ I don't think "sensical" is a word, particularly when used in the
  context of actually meaning "nonsensical", but I like it   - Linus ]

* tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  EDAC, skx: Fix randconfig builds
  EDAC, skx_edac: Add address translation for non-volatile DIMMs
parents 54480aa7 a324e939
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -234,6 +234,7 @@ config EDAC_SKX
	depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
	depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
	select DMI
	select ACPI_ADXL if ACPI
	help
	  Support for error detection and correction the Intel
	  Skylake server Integrated Memory Controllers. If your
+180 −13
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/bitmap.h>
#include <linux/math64.h>
#include <linux/mod_devicetable.h>
#include <linux/adxl.h>
#include <acpi/nfit.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
@@ -35,6 +36,7 @@
#include "edac_module.h"

#define EDAC_MOD_STR    "skx_edac"
#define MSG_SIZE	1024

/*
 * Debug macros
@@ -54,6 +56,29 @@
static LIST_HEAD(skx_edac_list);

static u64 skx_tolm, skx_tohm;
static char *skx_msg;
static unsigned int nvdimm_count;

enum {
	INDEX_SOCKET,
	INDEX_MEMCTRL,
	INDEX_CHANNEL,
	INDEX_DIMM,
	INDEX_MAX
};

static const char * const component_names[] = {
	[INDEX_SOCKET]	= "ProcessorSocketId",
	[INDEX_MEMCTRL]	= "MemoryControllerId",
	[INDEX_CHANNEL]	= "ChannelId",
	[INDEX_DIMM]	= "DimmSlotId",
};

static int component_indices[ARRAY_SIZE(component_names)];
static int adxl_component_count;
static const char * const *adxl_component_names;
static u64 *adxl_values;
static char *adxl_msg;

#define NUM_IMC			2	/* memory controllers per socket */
#define NUM_CHANNELS		3	/* channels per memory controller */
@@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
	u16 flags;
	u64 size = 0;

	nvdimm_count++;

	dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
						   imc->src_id, 0);

@@ -941,12 +968,46 @@ static void teardown_skx_debug(void)
}
#endif /*CONFIG_EDAC_DEBUG*/

static bool skx_adxl_decode(struct decoded_addr *res)

{
	int i, len = 0;

	if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
				      res->addr < BIT_ULL(32))) {
		edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
		return false;
	}

	if (adxl_decode(res->addr, adxl_values)) {
		edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
		return false;
	}

	res->socket  = (int)adxl_values[component_indices[INDEX_SOCKET]];
	res->imc     = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
	res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
	res->dimm    = (int)adxl_values[component_indices[INDEX_DIMM]];

	for (i = 0; i < adxl_component_count; i++) {
		if (adxl_values[i] == ~0x0ull)
			continue;

		len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
				adxl_component_names[i], adxl_values[i]);
		if (MSG_SIZE - len <= 0)
			break;
	}

	return true;
}

static void skx_mce_output_error(struct mem_ctl_info *mci,
				 const struct mce *m,
				 struct decoded_addr *res)
{
	enum hw_event_mc_err_type tp_event;
	char *type, *optype, msg[256];
	char *type, *optype;
	bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
	bool overflow = GET_BITFIELD(m->status, 62, 62);
	bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
			break;
		}
	}

	snprintf(msg, sizeof(msg),
	if (adxl_component_count) {
		snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s",
			 overflow ? " OVERFLOW" : "",
			 (uncorrected_error && recoverable) ? " recoverable" : "",
			 mscod, errcode, adxl_msg);
	} else {
		snprintf(skx_msg, MSG_SIZE,
			 "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
			 overflow ? " OVERFLOW" : "",
			 (uncorrected_error && recoverable) ? " recoverable" : "",
			 mscod, errcode,
			 res->socket, res->imc, res->rank,
			 res->bank_group, res->bank_address, res->row, res->column);
	}

	edac_dbg(0, "%s\n", msg);
	edac_dbg(0, "%s\n", skx_msg);

	/* Call the helper to output message */
	edac_mc_handle_error(tp_event, mci, core_err_cnt,
			     m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
			     res->channel, res->dimm, -1,
			     optype, msg);
			     optype, skx_msg);
}

static struct mem_ctl_info *get_mci(int src_id, int lmc)
{
	struct skx_dev *d;

	if (lmc > NUM_IMC - 1) {
		skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
		return NULL;
	}

	list_for_each_entry(d, &skx_edac_list, list) {
		if (d->imc[0].src_id == src_id)
			return d->imc[lmc].mci;
	}

	skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);

	return NULL;
}

static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
@@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
	if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
		return NOTIFY_DONE;

	memset(&res, 0, sizeof(res));
	res.addr = mce->addr;

	if (adxl_component_count) {
		if (!skx_adxl_decode(&res))
			return NOTIFY_DONE;

		mci = get_mci(res.socket, res.imc);
	} else {
		if (!skx_decode(&res))
			return NOTIFY_DONE;

		mci = res.dev->imc[res.imc].mci;
	}

	if (!mci)
		return NOTIFY_DONE;

	if (mce->mcgstatus & MCG_STATUS_MCIP)
		type = "Exception";
@@ -1094,6 +1193,62 @@ static void skx_remove(void)
	}
}

static void __init skx_adxl_get(void)
{
	const char * const *names;
	int i, j;

	names = adxl_get_component_names();
	if (!names) {
		skx_printk(KERN_NOTICE, "No firmware support for address translation.");
		skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
		return;
	}

	for (i = 0; i < INDEX_MAX; i++) {
		for (j = 0; names[j]; j++) {
			if (!strcmp(component_names[i], names[j])) {
				component_indices[i] = j;
				break;
			}
		}

		if (!names[j])
			goto err;
	}

	adxl_component_names = names;
	while (*names++)
		adxl_component_count++;

	adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
			      GFP_KERNEL);
	if (!adxl_values) {
		adxl_component_count = 0;
		return;
	}

	adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
	if (!adxl_msg) {
		adxl_component_count = 0;
		kfree(adxl_values);
	}

	return;
err:
	skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
		   component_names[i]);
	for (j = 0; names[j]; j++)
		skx_printk(KERN_CONT, "%s ", names[j]);
	skx_printk(KERN_CONT, "\n");
}

static void __exit skx_adxl_put(void)
{
	kfree(adxl_values);
	kfree(adxl_msg);
}

/*
 * skx_init:
 *	make sure we are running on the correct cpu model
@@ -1158,6 +1313,15 @@ static int __init skx_init(void)
		}
	}

	skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
	if (!skx_msg) {
		rc = -ENOMEM;
		goto fail;
	}

	if (nvdimm_count)
		skx_adxl_get();

	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
	opstate_init();

@@ -1176,6 +1340,9 @@ static void __exit skx_exit(void)
	edac_dbg(2, "\n");
	mce_unregister_decode_chain(&skx_mce_dec);
	skx_remove();
	if (nvdimm_count)
		skx_adxl_put();
	kfree(skx_msg);
	teardown_skx_debug();
}

+5 −0
Original line number Diff line number Diff line
@@ -7,7 +7,12 @@
#ifndef _LINUX_ADXL_H
#define _LINUX_ADXL_H

#ifdef CONFIG_ACPI_ADXL
const char * const *adxl_get_component_names(void);
int adxl_decode(u64 addr, u64 component_values[]);
#else
static inline const char * const *adxl_get_component_names(void)  { return NULL; }
static inline int adxl_decode(u64 addr, u64 component_values[])   { return  -EOPNOTSUPP; }
#endif

#endif /* _LINUX_ADXL_H */