Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 37d43acf authored by Yazen Ghannam's avatar Yazen Ghannam Committed by Thomas Gleixner
Browse files

x86/mce/AMD: Redo error logging from APIC LVT interrupt handlers



We have support for the new SMCA MCA_DE{STAT,ADDR} registers in Linux.
So we've used these registers in place of MCA_{STATUS,ADDR} on SMCA
systems.

However, the guidance for current SMCA implementations of is to continue
using MCA_{STATUS,ADDR} and to use MCA_DE{STAT,ADDR} only if a Deferred
error was not found in the former registers. If we logged a Deferred
error in MCA_STATUS then we should also clear MCA_DESTAT. This also
means we shouldn't clear MCA_CONFIG[LogDeferredInMcaStat].

Rework __log_error() to only log an error and add helpers for the
different error types being logged from the corresponding interrupt
handlers.

Boris: carve out common functionality into a _log_error_bank(). Cleanup
comments, check MCi_STATUS bits before reading MSRs. Streamline flow.

Signed-off-by: default avatarYazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1493147772-2721-1-git-send-email-Yazen.Ghannam@amd.com


Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 473e90b2
Loading
Loading
Loading
Loading
+74 −73
Original line number Diff line number Diff line
@@ -471,20 +471,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
		 */
		smca_high |= BIT(0);

		/*
		 * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
		 * registers with the option of additionally logging to
		 * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
		 *
		 * This bit is usually set by BIOS to retain the old behavior
		 * for OSes that don't use the new registers. Linux supports the
		 * new registers so let's disable that additional logging here.
		 *
		 * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
		 * portion of the MSR).
		 */
		smca_high &= ~BIT(2);

		/*
		 * SMCA sets the Deferred Error Interrupt type per bank.
		 *
@@ -755,37 +741,19 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
}
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);

static void
__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
	u32 msr_status = msr_ops.status(bank);
	u32 msr_addr = msr_ops.addr(bank);
	struct mce m;
	u64 status;

	WARN_ON_ONCE(deferred_err && threshold_err);

	if (deferred_err && mce_flags.smca) {
		msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
		msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
	}

	rdmsrl(msr_status, status);

	if (!(status & MCI_STATUS_VAL))
		return;

	mce_setup(&m);

	m.status = status;
	m.misc   = misc;
	m.bank   = bank;
	m.tsc	 = rdtsc();

	if (threshold_err)
		m.misc = misc;

	if (m.status & MCI_STATUS_ADDRV) {
		rdmsrl(msr_addr, m.addr);
		m.addr = addr;

		/*
		 * Extract [55:<lsb>] where lsb is the least significant
@@ -806,8 +774,6 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
	}

	mce_log(&m);

	wrmsrl(msr_status, 0);
}

static inline void __smp_deferred_error_interrupt(void)
@@ -832,45 +798,85 @@ asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
	exiting_ack_irq();
}

/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
/*
 * Returns true if the logged error is deferred. False, otherwise.
 */
static inline bool
_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
{
	unsigned int bank;
	u32 msr_status;
	u64 status;
	u64 status, addr = 0;

	for (bank = 0; bank < mca_cfg.banks; ++bank) {
		msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
					      : msr_ops.status(bank);
	rdmsrl(msr_stat, status);
	if (!(status & MCI_STATUS_VAL))
		return false;

		rdmsrl(msr_status, status);
	if (status & MCI_STATUS_ADDRV)
		rdmsrl(msr_addr, addr);

		if (!(status & MCI_STATUS_VAL) ||
		    !(status & MCI_STATUS_DEFERRED))
			continue;
	__log_error(bank, status, addr, misc);

		__log_error(bank, true, false, 0);
		break;
	}
	wrmsrl(status, 0);

	return status & MCI_STATUS_DEFERRED;
}

/*
 * APIC Interrupt Handler
 * We have three scenarios for checking for Deferred errors:
 *
 * 1) Non-SMCA systems check MCA_STATUS and log error if found.
 * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
 *    clear MCA_DESTAT.
 * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
 *    log it.
 */
static void log_error_deferred(unsigned int bank)
{
	bool defrd;

	defrd = _log_error_bank(bank, msr_ops.status(bank),
					msr_ops.addr(bank), 0);

	if (!mce_flags.smca)
		return;

	/* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
	if (defrd) {
		wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
		return;
	}

	/*
 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
 * the interrupt goes off when error_count reaches threshold_limit.
 * the handler will simply log mcelog w/ software defined bank number.
	 * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
	 * for a valid error.
	 */
	_log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
			      MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
}

/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
	unsigned int bank;

	for (bank = 0; bank < mca_cfg.banks; ++bank)
		log_error_deferred(bank);
}

static void log_error_thresholding(unsigned int bank, u64 misc)
{
	_log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc);
}

/*
 * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
 * goes off when error_count reaches threshold_limit.
 */
static void amd_threshold_interrupt(void)
{
	u32 low = 0, high = 0, address = 0;
	unsigned int bank, block, cpu = smp_processor_id();
	struct thresh_restart tr;

	/* assume first bank caused it */
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
			continue;
@@ -893,24 +899,19 @@ static void amd_threshold_interrupt(void)
			     (high & MASK_LOCKED_HI))
				continue;

			/*
			 * Log the machine check that caused the threshold
			 * event.
			 */
			if (high & MASK_OVERFLOW_HI)
				goto log;
		}
	}
	return;
			if (!(high & MASK_OVERFLOW_HI))
				continue;

log:
	__log_error(bank, false, true, ((u64)high << 32) | low);
			/* Log the MCE which caused the threshold event. */
			log_error_thresholding(bank, ((u64)high << 32) | low);

			/* Reset threshold block after logging error. */
			memset(&tr, 0, sizeof(tr));
			tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block];
			threshold_restart_bank(&tr);
		}
	}
}

/*
 * Sysfs Interface