Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d47d1d8a authored by Anton Blanchard's avatar Anton Blanchard Committed by Benjamin Herrenschmidt
Browse files

powerpc: Rework pseries machine check handler



Rework pseries machine check handler:

- If MSR_RI isn't set, we cannot recover even if the machine check was fully
  recovered

- Rename nonfatal to recovered

- Handle RTAS_DISP_LIMITED_RECOVERY

- Use BUS_MCEERR_AR instead of BUS_ADRERR

- Don't check all the RTAS error log fields when receiving a synchronous
  machine check. Recent versions of the pseries firmware do not fill them
  in during a machine check and instead send a follow up error log with
  the detailed information. If we see a synchronous machine check, and we
  came from userspace then kill the task.

Signed-off-by: default avatarAnton Blanchard <anton@samba.org>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent e49b1fae
Loading
Loading
Loading
Loading
+30 −18
Original line number Diff line number Diff line
@@ -261,29 +261,41 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
 */
static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
{
	int nonfatal = 0;
	int recovered = 0;

	if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
	if (!(regs->msr & MSR_RI)) {
		/* If MSR_RI isn't set, we cannot recover */
		recovered = 0;

	} else if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
		/* Platform corrected itself */
		nonfatal = 1;
	} else if ((regs->msr & MSR_RI) &&
		   user_mode(regs) &&
		   err->severity == RTAS_SEVERITY_ERROR_SYNC &&
		   err->disposition == RTAS_DISP_NOT_RECOVERED &&
		   err->target == RTAS_TARGET_MEMORY &&
		   err->type == RTAS_TYPE_ECC_UNCORR &&
		   !(current->pid == 0 || is_global_init(current))) {
		/* Kill off a user process with an ECC error */
		printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
		       current->pid);
		/* XXX something better for ECC error? */
		_exception(SIGBUS, regs, BUS_ADRERR, regs->nip);
		nonfatal = 1;
		recovered = 1;

	} else if (err->disposition == RTAS_DISP_LIMITED_RECOVERY) {
		/* Platform corrected itself but could be degraded */
		printk(KERN_ERR "MCE: limited recovery, system may "
		       "be degraded\n");
		recovered = 1;

	} else if (user_mode(regs) && !is_global_init(current) &&
		   err->severity == RTAS_SEVERITY_ERROR_SYNC) {

		/*
		 * If we received a synchronous error when in userspace
		 * kill the task. Firmware may report details of the fail
		 * asynchronously, so we can't rely on the target and type
		 * fields being valid here.
		 */
		printk(KERN_ERR "MCE: uncorrectable error, killing task "
		       "%s:%d\n", current->comm, current->pid);

		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
		recovered = 1;
	}

	log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);

	return nonfatal;
	return recovered;
}

/*