Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fab5669d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS changes from Ingo Molnar:

 - SCI reporting for other error types not only correctable ones

 - GHES cleanups

 - Add the functionality to override error reporting agents as some
   machines are sporting a new extended error logging capability which,
   if done properly in the BIOS, makes a corresponding EDAC module
   redundant

 - PCIe AER tracepoint severity levels fix

 - Error path correction for the mce device init

 - MCE timer fix

 - Add more flexibility to the error injection (EINJ) debugfs interface

* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, mce: Fix mce_start_timer semantics
  ACPI, APEI, GHES: Cleanup ghes memory error handling
  ACPI, APEI: Cleanup alignment-aware accesses
  ACPI, APEI, GHES: Do not report only correctable errors with SCI
  ACPI, APEI, EINJ: Changes to the ACPI/APEI/EINJ debugfs interface
  ACPI, eMCA: Combine eMCA/EDAC event reporting priority
  EDAC, sb_edac: Modify H/W event reporting policy
  EDAC: Add an edac_report parameter to EDAC
  PCI, AER: Fix severity usage in aer trace event
  x86, mce: Call put_device on device_register failure
parents 74e8ee82 b769e014
Loading
Loading
Loading
Loading
+18 −1
Original line number Diff line number Diff line
@@ -45,11 +45,22 @@ directory apei/einj. The following files are provided.
  injection. Before this, please specify all necessary error
  parameters.

- flags
  Present for kernel version 3.13 and above. Used to specify which
  of param{1..4} are valid and should be used by BIOS during injection.
  Value is a bitmask as specified in ACPI5.0 spec for the
  SET_ERROR_TYPE_WITH_ADDRESS data structure:
	Bit 0 - Processor APIC field valid (see param3 below)
	Bit 1 - Memory address and mask valid (param1 and param2)
	Bit 2 - PCIe (seg,bus,dev,fn) valid (param4 below)
  If set to zero, legacy behaviour is used where the type of injection
  specifies just one bit set, and param1 is multiplexed.

- param1
  This file is used to set the first error parameter value. Effect of
  parameter depends on error_type specified. For example, if error
  type is memory related type, the param1 should be a valid physical
  memory address.
  memory address. [Unless "flag" is set - see above]

- param2
  This file is used to set the second error parameter value. Effect of
@@ -58,6 +69,12 @@ directory apei/einj. The following files are provided.
  address mask. Linux requires page or narrower granularity, say,
  0xfffffffffffff000.

- param3
  Used when the 0x1 bit is set in "flag" to specify the APIC id

- param4
  Used when the 0x4 bit is set in "flag" to specify target PCIe device

- notrigger
  The EINJ mechanism is a two step process. First inject the error, then
  perform some actions to trigger it. Setting "notrigger" to 1 skips the
+8 −0
Original line number Diff line number Diff line
@@ -890,6 +890,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.

			The xen output can only be used by Xen PV guests.

	edac_report=	[HW,EDAC] Control how to report EDAC event
			Format: {"on" | "off" | "force"}
			on: enable EDAC to report H/W event. May be overridden
			by other higher priority error reporting module.
			off: disable H/W event reporting through EDAC.
			force: enforce the use of EDAC to report H/W event.
			default: on.

	ekgdboc=	[X86,KGDB] Allow early kernel console debugging
			ekgdboc=kbd

+10 −4
Original line number Diff line number Diff line
@@ -33,22 +33,28 @@
#include <linux/acpi.h>
#include <linux/cper.h>
#include <acpi/apei.h>
#include <acpi/ghes.h>
#include <asm/mce.h>

#include "mce-internal.h"

void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
{
	struct mce m;

	/* Only corrected MC is reported */
	if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA))
	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
		return;

	mce_setup(&m);
	m.bank = 1;
	/* Fake a memory read corrected error with unknown channel */
	/* Fake a memory read error with unknown channel */
	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;

	if (severity >= GHES_SEV_RECOVERABLE)
		m.status |= MCI_STATUS_UC;
	if (severity >= GHES_SEV_PANIC)
		m.status |= MCI_STATUS_PCC;

	m.addr = mem_err->physical_addr;
	mce_log(&m);
	mce_notify_irq();
+7 −5
Original line number Diff line number Diff line
@@ -1638,15 +1638,15 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)

static void mce_start_timer(unsigned int cpu, struct timer_list *t)
{
	unsigned long iv = mce_adjust_timer(check_interval * HZ);

	__this_cpu_write(mce_next_interval, iv);
	unsigned long iv = check_interval * HZ;

	if (mca_cfg.ignore_ce || !iv)
		return;

	per_cpu(mce_next_interval, cpu) = iv;

	t->expires = round_jiffies(jiffies + iv);
	add_timer_on(t, smp_processor_id());
	add_timer_on(t, cpu);
}

static void __mcheck_cpu_init_timer(void)
@@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu)
	dev->release = &mce_device_release;

	err = device_register(dev);
	if (err)
	if (err) {
		put_device(dev);
		return err;
	}

	for (i = 0; mce_device_attrs[i]; i++) {
		err = device_create_file(dev, mce_device_attrs[i]);
+16 −2
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include <acpi/acpi_bus.h>
#include <linux/cper.h>
#include <linux/ratelimit.h>
#include <linux/edac.h>
#include <asm/cpu.h>
#include <asm/mce.h>

@@ -43,6 +44,8 @@ struct extlog_l1_head {
	u8  rev1[12];
};

static int old_edac_report_status;

static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295";

/* L1 table related physical address */
@@ -150,7 +153,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,

	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);

	return NOTIFY_DONE;
	return NOTIFY_STOP;
}

static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret)
@@ -231,8 +234,12 @@ static int __init extlog_init(void)
	u64 cap;
	int rc;

	rc = -ENODEV;
	if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
		pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
		return -EPERM;
	}

	rc = -ENODEV;
	rdmsrl(MSR_IA32_MCG_CAP, cap);
	if (!(cap & MCG_ELOG_P))
		return rc;
@@ -287,6 +294,12 @@ static int __init extlog_init(void)
	if (elog_buf == NULL)
		goto err_release_elog;

	/*
	 * eMCA event report method has higher priority than EDAC method,
	 * unless EDAC event report method is mandatory.
	 */
	old_edac_report_status = get_edac_report_status();
	set_edac_report_status(EDAC_REPORTING_DISABLED);
	mce_register_decode_chain(&extlog_mce_dec);
	/* enable OS to be involved to take over management from BIOS */
	((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
@@ -308,6 +321,7 @@ static int __init extlog_init(void)

static void __exit extlog_exit(void)
{
	set_edac_report_status(old_edac_report_status);
	mce_unregister_decode_chain(&extlog_mce_dec);
	((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
	if (extlog_l1_addr)
Loading