Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cbd4ebcb authored by H. Peter Anvin's avatar H. Peter Anvin
Browse files

Merge tag 'please-pull-extlog-trace' into x86/ras



Report extended error information ("extlog") using
a trace/event.  Provide a mechanism for a smart
daemon collecting this information to tell the kernel
to skip logging corrected errors to the console.

Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parents 27c93415 7c76bb5f
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -176,4 +176,6 @@ source "drivers/powercap/Kconfig"

source "drivers/mcb/Kconfig"

source "drivers/ras/Kconfig"

endmenu
+1 −0
Original line number Diff line number Diff line
@@ -158,3 +158,4 @@ obj-$(CONFIG_NTB) += ntb/
obj-$(CONFIG_FMC)		+= fmc/
obj-$(CONFIG_POWERCAP)		+= powercap/
obj-$(CONFIG_MCB)		+= mcb/
obj-$(CONFIG_RAS)		+= ras/
+3 −1
Original line number Diff line number Diff line
@@ -370,6 +370,7 @@ config ACPI_EXTLOG
	tristate "Extended Error Log support"
	depends on X86_MCE && X86_LOCAL_APIC
	select UEFI_CPER
	select RAS
	default n
	help
	  Certain usages such as Predictive Failure Analysis (PFA) require
@@ -384,6 +385,7 @@ config ACPI_EXTLOG

	  Enhanced MCA Logging allows firmware to provide additional error
	  information to system software, synchronous with MCE or CMCI. This
	  driver adds support for that functionality.
	  driver adds support for that functionality with corresponding
	  tracepoint which carries that information to userspace.

endif	# ACPI
+35 −11
Original line number Diff line number Diff line
@@ -12,10 +12,12 @@
#include <linux/cper.h>
#include <linux/ratelimit.h>
#include <linux/edac.h>
#include <linux/ras.h>
#include <asm/cpu.h>
#include <asm/mce.h>

#include "apei/apei-internal.h"
#include <ras/ras_event.h>

#define EXT_ELOG_ENTRY_MASK	GENMASK_ULL(51, 0) /* elog entry address mask */

@@ -137,8 +139,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
	struct mce *mce = (struct mce *)data;
	int	bank = mce->bank;
	int	cpu = mce->extcpu;
	struct acpi_generic_status *estatus;
	int rc;
	struct acpi_generic_status *estatus, *tmp;
	struct acpi_generic_data *gdata;
	const uuid_le *fru_id = &NULL_UUID_LE;
	char *fru_text = "";
	uuid_le *sec_type;
	static u32 err_seq;

	estatus = extlog_elog_entry_check(cpu, bank);
	if (estatus == NULL)
@@ -148,8 +154,29 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
	/* clear record status to enable BIOS to update it again */
	estatus->block_status = 0;

	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
	tmp = (struct acpi_generic_status *)elog_buf;

	if (!ras_userspace_consumers()) {
		print_extlog_rcd(NULL, tmp, cpu);
		goto out;
	}

	/* log event via trace */
	err_seq++;
	gdata = (struct acpi_generic_data *)(tmp + 1);
	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
		fru_id = (uuid_le *)gdata->fru_id;
	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
		fru_text = gdata->fru_text;
	sec_type = (uuid_le *)gdata->section_type;
	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
		struct cper_sec_mem_err *mem = (void *)(gdata + 1);
		if (gdata->error_data_length >= sizeof(*mem))
			trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
					       (u8)gdata->error_severity);
	}

out:
	return NOTIFY_STOP;
}

@@ -196,19 +223,16 @@ static int __init extlog_init(void)
	u64 cap;
	int rc;

	rdmsrl(MSR_IA32_MCG_CAP, cap);

	if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr())
		return -ENODEV;

	if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
		pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
		return -EPERM;
	}

	rc = -ENODEV;
	rdmsrl(MSR_IA32_MCG_CAP, cap);
	if (!(cap & MCG_ELOG_P))
		return rc;

	if (!extlog_get_l1addr())
		return rc;

	rc = -EINVAL;
	/* get L1 header to fetch necessary information */
	l1_hdr_size = sizeof(struct extlog_l1_head);
+1 −0
Original line number Diff line number Diff line
@@ -72,6 +72,7 @@ config EDAC_MCE_INJ

config EDAC_MM_EDAC
	tristate "Main Memory EDAC (Error Detection And Correction) reporting"
	select RAS
	help
	  Some systems are able to detect and correct errors in main
	  memory.  EDAC can report statistics on memory error
Loading