Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bf80bbd7 authored by Aravind Gopalakrishnan's avatar Aravind Gopalakrishnan Committed by Borislav Petkov
Browse files

x86/mce: Add an AMD severities-grading function



Add a severities function that caters to AMD processors. This allows us
to do some vendor-specific work within the function if necessary.

Also, introduce a vendor flag bitfield for vendor-specific settings. The
severities code uses this to define error scope based on the prescence
of the flags field.

This is based off of work by Boris Petkov.

Testing details:
Fam10h, Model 9h (Greyhound)
Fam15h: Models 0h-0fh (Orochi), 30h-3fh (Kaveri) and 60h-6fh (Carrizo),
Fam16h Model 00h-0fh (Kabini)

Boris:
Intel SNB
AMD K8 (JH-E0)

Signed-off-by: default avatarAravind Gopalakrishnan <aravind.gopalakrishnan@amd.com>
Acked-by: default avatarTony Luck <tony.luck@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Chen Yucong <slaoub@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: linux-edac@vger.kernel.org
Link: http://lkml.kernel.org/r/1427125373-2918-2-git-send-email-Aravind.Gopalakrishnan@amd.com


[ Fixup build, clean up comments. ]
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
parent c9ce8712
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -116,6 +116,12 @@ struct mca_config {
	u32 rip_msr;
};

struct mce_vendor_flags {
	__u64		overflow_recov	: 1, /* cpuid_ebx(80000007) */
			__reserved_0	: 63;
};
extern struct mce_vendor_flags mce_flags;

extern struct mca_config mca_cfg;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
+56 −0
Original line number Diff line number Diff line
@@ -186,12 +186,68 @@ static int error_context(struct mce *m)
	return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
}

/*
 * See AMD Error Scope Hierarchy table in a newer BKDG. For example
 * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
 */
static int mce_severity_amd(struct mce *m, enum context ctx)
{
	/* Processor Context Corrupt, no need to fumble too much, die! */
	if (m->status & MCI_STATUS_PCC)
		return MCE_PANIC_SEVERITY;

	if (m->status & MCI_STATUS_UC) {

		/*
		 * On older systems where overflow_recov flag is not present, we
		 * should simply panic if an error overflow occurs. If
		 * overflow_recov flag is present and set, then software can try
		 * to at least kill process to prolong system operation.
		 */
		if (mce_flags.overflow_recov) {
			/* software can try to contain */
			if (!(m->mcgstatus & MCG_STATUS_RIPV))
				if (ctx == IN_KERNEL)
					return MCE_PANIC_SEVERITY;

				/* kill current process */
				return MCE_AR_SEVERITY;
		} else {
			/* at least one error was not logged */
			if (m->status & MCI_STATUS_OVER)
				return MCE_PANIC_SEVERITY;
		}

		/*
		 * For any other case, return MCE_UC_SEVERITY so that we log the
		 * error and exit #MC handler.
		 */
		return MCE_UC_SEVERITY;
	}

	/*
	 * deferred error: poll handler catches these and adds to mce_ring so
	 * memory-failure can take recovery actions.
	 */
	if (m->status & MCI_STATUS_DEFERRED)
		return MCE_DEFERRED_SEVERITY;

	/*
	 * corrected error: poll handler catches these and passes responsibility
	 * of decoding the error to EDAC
	 */
	return MCE_KEEP_SEVERITY;
}

int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
{
	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
	enum context ctx = error_context(m);
	struct severity *s;

	if (m->cpuvendor == X86_VENDOR_AMD)
		return mce_severity_amd(m, ctx);

	for (s = severities;; s++) {
		if ((m->status & s->mask) != s->result)
			continue;
+9 −0
Original line number Diff line number Diff line
@@ -64,6 +64,7 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
DEFINE_PER_CPU(unsigned, mce_exception_count);

struct mce_bank *mce_banks __read_mostly;
struct mce_vendor_flags mce_flags __read_mostly;

struct mca_config mca_cfg __read_mostly = {
	.bootlog  = -1,
@@ -1534,6 +1535,13 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
		if (c->x86 == 6 && cfg->banks > 0)
			mce_banks[0].ctl = 0;

		/*
		 * overflow_recov is supported for F15h Models 00h-0fh
		 * even though we don't have a CPUID bit for it.
		 */
		if (c->x86 == 0x15 && c->x86_model <= 0xf)
			mce_flags.overflow_recov = 1;

		/*
		 * Turn off MC4_MISC thresholding banks on those models since
		 * they're not supported there.
@@ -1633,6 +1641,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
		break;
	case X86_VENDOR_AMD:
		mce_amd_feature_init(c);
		mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
		break;
	default:
		break;