Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cd9c57ca authored by Borislav Petkov's avatar Borislav Petkov Committed by Thomas Gleixner
Browse files

x86/MCE: Dump MCE to dmesg if no consumers



When there are no error record consumers registered with the kernel, the
only thing that appears in dmesg is something like:

  [  300.000326] mce: [Hardware Error]: Machine check events logged

and the error records are gone. Which is seriously counterproductive.

So let's dump them to dmesg instead, in such a case.

Requested-by: default avatarEric Morton <Eric.Morton@amd.com>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20161101120911.13163-4-bp@alien8.de


Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 8c203dbb
Loading
Loading
Loading
Loading
+46 −6
Original line number Diff line number Diff line
@@ -207,8 +207,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log);

static struct notifier_block mce_srao_nb;

static atomic_t num_notifiers;

void mce_register_decode_chain(struct notifier_block *nb)
{
	atomic_inc(&num_notifiers);

	/* Ensure SRAO notifier has the highest priority in the decode chain. */
	if (nb != &mce_srao_nb && nb->priority == INT_MAX)
		nb->priority -= 1;
@@ -219,6 +223,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain);

void mce_unregister_decode_chain(struct notifier_block *nb)
{
	atomic_dec(&num_notifiers);

	atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
@@ -270,12 +276,12 @@ struct mca_msr_regs msr_ops = {
	.misc	= misc_reg
};

static void print_mce(struct mce *m)
static void __print_mce(struct mce *m)
{
	int ret = 0;

	pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
	       m->extcpu, m->mcgstatus, m->bank, m->status);
	pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
		 m->extcpu,
		 (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
		 m->mcgstatus, m->bank, m->status);

	if (m->ip) {
		pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
@@ -308,6 +314,13 @@ static void print_mce(struct mce *m)
	pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
		cpu_data(m->extcpu).microcode);
}

static void print_mce(struct mce *m)
{
	int ret = 0;

	__print_mce(m);

	/*
	 * Print out human-readable details about the MCE error,
@@ -569,6 +582,32 @@ static struct notifier_block mce_srao_nb = {
	.priority = INT_MAX,
};

static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
				void *data)
{
	struct mce *m = (struct mce *)data;

	if (!m)
		return NOTIFY_DONE;

	/*
	 * Run the default notifier if we have only the SRAO
	 * notifier and us registered.
	 */
	if (atomic_read(&num_notifiers) > 2)
		return NOTIFY_DONE;

	__print_mce(m);

	return NOTIFY_DONE;
}

static struct notifier_block mce_default_nb = {
	.notifier_call	= mce_default_notifier,
	/* lowest prio, we want it to run last. */
	.priority	= 0,
};

/*
 * Read ADDR and MISC registers.
 */
@@ -2138,6 +2177,7 @@ int __init mcheck_init(void)
{
	mcheck_intel_therm_init();
	mce_register_decode_chain(&mce_srao_nb);
	mce_register_decode_chain(&mce_default_nb);
	mcheck_vendor_init_severity();

	INIT_WORK(&mce_work, mce_process_work);