Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8f0b8056 authored by Mahesh Salgaonkar's avatar Mahesh Salgaonkar Committed by Michael Ellerman
Browse files

powerpc/pseries: Display machine check error details.



Extract the MCE error details from RTAS extended log and display it to
console.

With this patch you should now see mce logs like below:

[  142.371818] Severe Machine check interrupt [Recovered]
[  142.371822]   NIP [d00000000ca301b8]: init_module+0x1b8/0x338 [bork_kernel]
[  142.371822]   Initiator: CPU
[  142.371823]   Error type: SLB [Multihit]
[  142.371824]     Effective address: d00000000ca70000

Signed-off-by: default avatarMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent a43c1590
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -197,6 +197,11 @@ static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog)
	return (elog->byte1 & 0x04) >> 2;
}

static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog)
{
	return (elog->byte2 & 0xf0) >> 4;
}

#define rtas_error_type(x)	((x)->byte3)

static inline
+133 −0
Original line number Diff line number Diff line
@@ -523,6 +523,136 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
	return 0; /* need to perform reset */
}

#define VAL_TO_STRING(ar, val)	\
	(((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")

static void pseries_print_mce_info(struct pt_regs *regs,
				   struct rtas_error_log *errp)
{
	const char *level, *sevstr;
	struct pseries_errorlog *pseries_log;
	struct pseries_mc_errorlog *mce_log;
	u8 error_type, err_sub_type;
	u64 addr;
	u8 initiator = rtas_error_initiator(errp);
	int disposition = rtas_error_disposition(errp);

	static const char * const initiators[] = {
		"Unknown",
		"CPU",
		"PCI",
		"ISA",
		"Memory",
		"Power Mgmt",
	};
	static const char * const mc_err_types[] = {
		"UE",
		"SLB",
		"ERAT",
		"TLB",
		"D-Cache",
		"Unknown",
		"I-Cache",
	};
	static const char * const mc_ue_types[] = {
		"Indeterminate",
		"Instruction fetch",
		"Page table walk ifetch",
		"Load/Store",
		"Page table walk Load/Store",
	};

	/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
	static const char * const mc_slb_types[] = {
		"Parity",
		"Multihit",
		"Indeterminate",
	};

	/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
	static const char * const mc_soft_types[] = {
		"Unknown",
		"Parity",
		"Multihit",
		"Indeterminate",
	};

	if (!rtas_error_extended(errp)) {
		pr_err("Machine check interrupt: Missing extended error log\n");
		return;
	}

	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
	if (pseries_log == NULL)
		return;

	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;

	error_type = mce_log->error_type;
	err_sub_type = rtas_mc_error_sub_type(mce_log);

	switch (rtas_error_severity(errp)) {
	case RTAS_SEVERITY_NO_ERROR:
		level = KERN_INFO;
		sevstr = "Harmless";
		break;
	case RTAS_SEVERITY_WARNING:
		level = KERN_WARNING;
		sevstr = "";
		break;
	case RTAS_SEVERITY_ERROR:
	case RTAS_SEVERITY_ERROR_SYNC:
		level = KERN_ERR;
		sevstr = "Severe";
		break;
	case RTAS_SEVERITY_FATAL:
	default:
		level = KERN_ERR;
		sevstr = "Fatal";
		break;
	}

	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
	       disposition == RTAS_DISP_FULLY_RECOVERED ?
	       "Recovered" : "Not recovered");
	if (user_mode(regs)) {
		printk("%s  NIP: [%016lx] PID: %d Comm: %s\n", level,
		       regs->nip, current->pid, current->comm);
	} else {
		printk("%s  NIP [%016lx]: %pS\n", level, regs->nip,
		       (void *)regs->nip);
	}
	printk("%s  Initiator: %s\n", level,
	       VAL_TO_STRING(initiators, initiator));

	switch (error_type) {
	case MC_ERROR_TYPE_UE:
		printk("%s  Error type: %s [%s]\n", level,
		       VAL_TO_STRING(mc_err_types, error_type),
		       VAL_TO_STRING(mc_ue_types, err_sub_type));
		break;
	case MC_ERROR_TYPE_SLB:
		printk("%s  Error type: %s [%s]\n", level,
		       VAL_TO_STRING(mc_err_types, error_type),
		       VAL_TO_STRING(mc_slb_types, err_sub_type));
		break;
	case MC_ERROR_TYPE_ERAT:
	case MC_ERROR_TYPE_TLB:
		printk("%s  Error type: %s [%s]\n", level,
		       VAL_TO_STRING(mc_err_types, error_type),
		       VAL_TO_STRING(mc_soft_types, err_sub_type));
		break;
	default:
		printk("%s  Error type: %s\n", level,
		       VAL_TO_STRING(mc_err_types, error_type));
		break;
	}

	addr = rtas_mc_get_effective_addr(mce_log);
	if (addr)
		printk("%s    Effective address: %016llx\n", level, addr);
}

static int mce_handle_error(struct rtas_error_log *errp)
{
	struct pseries_errorlog *pseries_log;
@@ -585,8 +715,11 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
	int recovered = 0;
	int disposition = rtas_error_disposition(err);

	pseries_print_mce_info(regs, err);

	if (!(regs->msr & MSR_RI)) {
		/* If MSR_RI isn't set, we cannot recover */
		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
		recovered = 0;

	} else if (disposition == RTAS_DISP_FULLY_RECOVERED) {