Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 786f02b7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull x86/mce merge window patches from Tony Luck:
 "Including two that make error_context() checks less sucky"

* tag 'x86-mce-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  x86/mce: Add instruction recovery signatures to mce-severity table
  x86/mce: Fix check for processor context when machine check was taken.
  MCE: Fix vm86 handling for 32bit mce handler
  x86/mce Add validation check before GHES error is recorded
  x86/mce: Avoid reading every machine check bank register twice.
parents fa2af6e4 37c3459b
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -42,7 +42,8 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
	struct mce m;

	/* Only corrected MC is reported */
	if (!corrected)
	if (!corrected || !(mem_err->validation_bits &
				CPER_MEM_VALID_PHYSICAL_ADDRESS))
		return;

	mce_setup(&m);
+20 −6
Original line number Diff line number Diff line
@@ -126,6 +126,16 @@ static struct severity {
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
		USER
		),
	MCESEV(
		KEEP, "HT thread notices Action required: instruction fetch error",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
		MCGMASK(MCG_STATUS_EIPV, 0)
		),
	MCESEV(
		AR, "Action required: instruction fetch error",
		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
		USER
		),
#endif
	MCESEV(
		PANIC, "Action required: unknown MCACOD",
@@ -165,15 +175,19 @@ static struct severity {
};

/*
 * If the EIPV bit is set, it means the saved IP is the
 * instruction which caused the MCE.
 * If mcgstatus indicated that ip/cs on the stack were
 * no good, then "m->cs" will be zero and we will have
 * to assume the worst case (IN_KERNEL) as we actually
 * have no idea what we were executing when the machine
 * check hit.
 * If we do have a good "m->cs" (or a faked one in the
 * case we were executing in VM86 mode) we can use it to
 * distinguish an exception taken in user from from one
 * taken in the kernel.
 */
static int error_context(struct mce *m)
{
	if (m->mcgstatus & MCG_STATUS_EIPV)
		return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
	/* Unknown, assume kernel */
	return IN_KERNEL;
	return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
}

int mce_severity(struct mce *m, int tolerant, char **msg)
+19 −5
Original line number Diff line number Diff line
@@ -437,6 +437,14 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
		if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
			m->ip = regs->ip;
			m->cs = regs->cs;

			/*
			 * When in VM86 mode make the cs look like ring 3
			 * always. This is a lie, but it's better than passing
			 * the additional vm86 bit around everywhere.
			 */
			if (v8086_mode(regs))
				m->cs |= 3;
		}
		/* Use accurate RIP reporting if available. */
		if (rip_msr)
@@ -641,16 +649,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
 * Do a quick check if any of the events requires a panic.
 * This decides if we keep the events around or clear them.
 */
static int mce_no_way_out(struct mce *m, char **msg)
static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
{
	int i;
	int i, ret = 0;

	for (i = 0; i < banks; i++) {
		m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
		if (m->status & MCI_STATUS_VAL)
			__set_bit(i, validp);
		if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
			return 1;
			ret = 1;
	}
	return 0;
	return ret;
}

/*
@@ -1013,6 +1023,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
	 */
	int kill_it = 0;
	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
	DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
	char *msg = "Unknown";

	atomic_inc(&mce_entry);
@@ -1027,7 +1038,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
	final = &__get_cpu_var(mces_seen);
	*final = m;

	no_way_out = mce_no_way_out(&m, &msg);
	memset(valid_banks, 0, sizeof(valid_banks));
	no_way_out = mce_no_way_out(&m, &msg, valid_banks);

	barrier();

@@ -1047,6 +1059,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
	order = mce_start(&no_way_out);
	for (i = 0; i < banks; i++) {
		__clear_bit(i, toclear);
		if (!test_bit(i, valid_banks))
			continue;
		if (!mce_banks[i].ctl)
			continue;