Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ffa6f55e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:

 - Support for varying MCA bank numbers per CPU: this is in preparation
   for future CPU enablement (Yazen Ghannam)

 - MCA banks read race fix (Tony Luck)

 - Facility to filter MCEs which should not be logged (Yazen Ghannam)

 - The usual round of cleanups and fixes

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/MCE/AMD: Don't report L1 BTB MCA errors on some family 17h models
  x86/MCE: Add an MCE-record filtering function
  RAS/CEC: Increment cec_entered under the mutex lock
  x86/mce: Fix debugfs_simple_attr.cocci warnings
  x86/mce: Remove mce_report_event()
  x86/mce: Handle varying MCA bank counts
  x86/mce: Fix machine_check_poll() tests for error types
  MAINTAINERS: Fix file pattern for X86 MCE INFRASTRUCTURE
  x86/MCE: Group AMD function prototypes in <asm/mce.h>
parents 275b103a 71a84402
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -16941,7 +16941,7 @@ M: Tony Luck <tony.luck@intel.com>
M:	Borislav Petkov <bp@alien8.de>
M:	Borislav Petkov <bp@alien8.de>
L:	linux-edac@vger.kernel.org
L:	linux-edac@vger.kernel.org
S:	Maintained
S:	Maintained
F:	arch/x86/kernel/cpu/mcheck/*
F:	arch/x86/kernel/cpu/mce/*


X86 MICROCODE UPDATE SUPPORT
X86 MICROCODE UPDATE SUPPORT
M:	Borislav Petkov <bp@alien8.de>
M:	Borislav Petkov <bp@alien8.de>
+11 −14
Original line number Original line Diff line number Diff line
@@ -210,16 +210,6 @@ static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {}
static inline void cmci_recheck(void) {}
#endif
#endif


#ifdef CONFIG_X86_MCE_AMD
void mce_amd_feature_init(struct cpuinfo_x86 *c);
int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
#else
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
#endif

static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }

int mce_available(struct cpuinfo_x86 *c);
int mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
bool mce_is_memory_error(struct mce *m);
bool mce_is_correctable(struct mce *m);
bool mce_is_correctable(struct mce *m);
@@ -345,12 +335,19 @@ extern bool amd_mce_is_memory_error(struct mce *m);
extern int mce_threshold_create_device(unsigned int cpu);
extern int mce_threshold_create_device(unsigned int cpu);
extern int mce_threshold_remove_device(unsigned int cpu);
extern int mce_threshold_remove_device(unsigned int cpu);


void mce_amd_feature_init(struct cpuinfo_x86 *c);
int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);

#else
#else


static inline int mce_threshold_create_device(unsigned int cpu)		{ return 0; };
static inline int mce_threshold_create_device(unsigned int cpu)		{ return 0; };
static inline int mce_threshold_remove_device(unsigned int cpu)		{ return 0; };
static inline int mce_threshold_remove_device(unsigned int cpu)		{ return 0; };
static inline bool amd_mce_is_memory_error(struct mce *m)		{ return false; };
static inline bool amd_mce_is_memory_error(struct mce *m)		{ return false; };

static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)		{ }
static inline int
umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)	{ return -EINVAL; };
#endif
#endif


static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c)	{ return mce_amd_feature_init(c); }

#endif /* _ASM_X86_MCE_H */
#endif /* _ASM_X86_MCE_H */
+39 −13
Original line number Original line Diff line number Diff line
@@ -563,33 +563,59 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
	return offset;
	return offset;
}
}


bool amd_filter_mce(struct mce *m)
{
	enum smca_bank_types bank_type = smca_get_bank_type(m->bank);
	struct cpuinfo_x86 *c = &boot_cpu_data;
	u8 xec = (m->status >> 16) & 0x3F;

	/* See Family 17h Models 10h-2Fh Erratum #1114. */
	if (c->x86 == 0x17 &&
	    c->x86_model >= 0x10 && c->x86_model <= 0x2F &&
	    bank_type == SMCA_IF && xec == 10)
		return true;

	return false;
}

/*
/*
 * Turn off MC4_MISC thresholding banks on all family 0x15 models since
 * Turn off thresholding banks for the following conditions:
 * they're not supported there.
 * - MC4_MISC thresholding is not supported on Family 0x15.
 * - Prevent possible spurious interrupts from the IF bank on Family 0x17
 *   Models 0x10-0x2F due to Erratum #1114.
 */
 */
void disable_err_thresholding(struct cpuinfo_x86 *c)
void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
{
{
	int i;
	int i, num_msrs;
	u64 hwcr;
	u64 hwcr;
	bool need_toggle;
	bool need_toggle;
	u32 msrs[] = {
	u32 msrs[NR_BLOCKS];
		0x00000413, /* MC4_MISC0 */

		0xc0000408, /* MC4_MISC1 */
	if (c->x86 == 0x15 && bank == 4) {
	};
		msrs[0] = 0x00000413; /* MC4_MISC0 */
		msrs[1] = 0xc0000408; /* MC4_MISC1 */
		num_msrs = 2;
	} else if (c->x86 == 0x17 &&
		   (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) {


	if (c->x86 != 0x15)
		if (smca_get_bank_type(bank) != SMCA_IF)
			return;
			return;


		msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank);
		num_msrs = 1;
	} else {
		return;
	}

	rdmsrl(MSR_K7_HWCR, hwcr);
	rdmsrl(MSR_K7_HWCR, hwcr);


	/* McStatusWrEn has to be set */
	/* McStatusWrEn has to be set */
	need_toggle = !(hwcr & BIT(18));
	need_toggle = !(hwcr & BIT(18));

	if (need_toggle)
	if (need_toggle)
		wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
		wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));


	/* Clear CntP bit safely */
	/* Clear CntP bit safely */
	for (i = 0; i < ARRAY_SIZE(msrs); i++)
	for (i = 0; i < num_msrs; i++)
		msr_clear_bit(msrs[i], 62);
		msr_clear_bit(msrs[i], 62);


	/* restore old settings */
	/* restore old settings */
@@ -604,12 +630,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
	unsigned int bank, block, cpu = smp_processor_id();
	unsigned int bank, block, cpu = smp_processor_id();
	int offset = -1;
	int offset = -1;


	disable_err_thresholding(c);

	for (bank = 0; bank < mca_cfg.banks; ++bank) {
	for (bank = 0; bank < mca_cfg.banks; ++bank) {
		if (mce_flags.smca)
		if (mce_flags.smca)
			smca_configure(bank, cpu);
			smca_configure(bank, cpu);


		disable_err_thresholding(c, bank);

		for (block = 0; block < NR_BLOCKS; ++block) {
		for (block = 0; block < NR_BLOCKS; ++block) {
			address = get_block_address(address, low, high, bank, block);
			address = get_block_address(address, low, high, bank, block);
			if (!address)
			if (!address)
+58 −44
Original line number Original line Diff line number Diff line
@@ -460,23 +460,6 @@ static void mce_irq_work_cb(struct irq_work *entry)
	mce_schedule_work();
	mce_schedule_work();
}
}


static void mce_report_event(struct pt_regs *regs)
{
	if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
		mce_notify_irq();
		/*
		 * Triggering the work queue here is just an insurance
		 * policy in case the syscall exit notify handler
		 * doesn't run soon enough or ends up running on the
		 * wrong CPU (can happen when audit sleeps)
		 */
		mce_schedule_work();
		return;
	}

	irq_work_queue(&mce_irq_work);
}

/*
/*
 * Check if the address reported by the CPU is in a format we can parse.
 * Check if the address reported by the CPU is in a format we can parse.
 * It would be possible to add code for most other cases, but all would
 * It would be possible to add code for most other cases, but all would
@@ -712,19 +695,49 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)


		barrier();
		barrier();
		m.status = mce_rdmsrl(msr_ops.status(i));
		m.status = mce_rdmsrl(msr_ops.status(i));

		/* If this entry is not valid, ignore it */
		if (!(m.status & MCI_STATUS_VAL))
		if (!(m.status & MCI_STATUS_VAL))
			continue;
			continue;


		/*
		/*
		 * Uncorrected or signalled events are handled by the exception
		 * If we are logging everything (at CPU online) or this
		 * handler when it is enabled, so don't process those here.
		 * is a corrected error, then we must log it.
		 *
		 */
		 * TBD do the same check for MCI_STATUS_EN here?
		if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC))
			goto log_it;

		/*
		 * Newer Intel systems that support software error
		 * recovery need to make additional checks. Other
		 * CPUs should skip over uncorrected errors, but log
		 * everything else.
		 */
		if (!mca_cfg.ser) {
			if (m.status & MCI_STATUS_UC)
				continue;
			goto log_it;
		}

		/* Log "not enabled" (speculative) errors */
		if (!(m.status & MCI_STATUS_EN))
			goto log_it;

		/*
		 * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
		 * UC == 1 && PCC == 0 && S == 0
		 */
		if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S))
			goto log_it;

		/*
		 * Skip anything else. Presumption is that our read of this
		 * bank is racing with a machine check. Leave the log alone
		 * for do_machine_check() to deal with it.
		 */
		 */
		if (!(flags & MCP_UC) &&
		    (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
		continue;
		continue;


log_it:
		error_seen = true;
		error_seen = true;


		mce_read_aux(&m, i);
		mce_read_aux(&m, i);
@@ -1301,7 +1314,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
		mce_panic("Fatal machine check on current CPU", &m, msg);
		mce_panic("Fatal machine check on current CPU", &m, msg);


	if (worst > 0)
	if (worst > 0)
		mce_report_event(regs);
		irq_work_queue(&mce_irq_work);

	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);


	sync_core();
	sync_core();
@@ -1451,13 +1465,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
static int __mcheck_cpu_mce_banks_init(void)
static int __mcheck_cpu_mce_banks_init(void)
{
{
	int i;
	int i;
	u8 num_banks = mca_cfg.banks;


	mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL);
	mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL);
	if (!mce_banks)
	if (!mce_banks)
		return -ENOMEM;
		return -ENOMEM;


	for (i = 0; i < num_banks; i++) {
	for (i = 0; i < MAX_NR_BANKS; i++) {
		struct mce_bank *b = &mce_banks[i];
		struct mce_bank *b = &mce_banks[i];


		b->ctl = -1ULL;
		b->ctl = -1ULL;
@@ -1471,28 +1484,19 @@ static int __mcheck_cpu_mce_banks_init(void)
 */
 */
static int __mcheck_cpu_cap_init(void)
static int __mcheck_cpu_cap_init(void)
{
{
	unsigned b;
	u64 cap;
	u64 cap;
	u8 b;


	rdmsrl(MSR_IA32_MCG_CAP, cap);
	rdmsrl(MSR_IA32_MCG_CAP, cap);


	b = cap & MCG_BANKCNT_MASK;
	b = cap & MCG_BANKCNT_MASK;
	if (!mca_cfg.banks)
	if (WARN_ON_ONCE(b > MAX_NR_BANKS))
		pr_info("CPU supports %d MCE banks\n", b);

	if (b > MAX_NR_BANKS) {
		pr_warn("Using only %u machine check banks out of %u\n",
			MAX_NR_BANKS, b);
		b = MAX_NR_BANKS;
		b = MAX_NR_BANKS;
	}


	/* Don't support asymmetric configurations today */
	mca_cfg.banks = max(mca_cfg.banks, b);
	WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
	mca_cfg.banks = b;


	if (!mce_banks) {
	if (!mce_banks) {
		int err = __mcheck_cpu_mce_banks_init();
		int err = __mcheck_cpu_mce_banks_init();

		if (err)
		if (err)
			return err;
			return err;
	}
	}
@@ -1771,6 +1775,14 @@ static void __mcheck_cpu_init_timer(void)
	mce_start_timer(t);
	mce_start_timer(t);
}
}


bool filter_mce(struct mce *m)
{
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
		return amd_filter_mce(m);

	return false;
}

/* Handle unconfigured int18 (should never happen) */
/* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
{
{
@@ -2425,8 +2437,8 @@ static int fake_panic_set(void *data, u64 val)
	return 0;
	return 0;
}
}


DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
			fake_panic_set, "%llu\n");
			 "%llu\n");


static int __init mcheck_debugfs_init(void)
static int __init mcheck_debugfs_init(void)
{
{
@@ -2435,8 +2447,8 @@ static int __init mcheck_debugfs_init(void)
	dmce = mce_get_debugfs_dir();
	dmce = mce_get_debugfs_dir();
	if (!dmce)
	if (!dmce)
		return -ENOMEM;
		return -ENOMEM;
	ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
	ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce,
					  &fake_panic_fops);
						 NULL, &fake_panic_fops);
	if (!ffake_panic)
	if (!ffake_panic)
		return -ENOMEM;
		return -ENOMEM;


@@ -2451,6 +2463,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key);


static int __init mcheck_late_init(void)
static int __init mcheck_late_init(void)
{
{
	pr_info("Using %d MCE banks\n", mca_cfg.banks);

	if (mca_cfg.recovery)
	if (mca_cfg.recovery)
		static_branch_inc(&mcsafe_key);
		static_branch_inc(&mcsafe_key);


+3 −0
Original line number Original line Diff line number Diff line
@@ -99,6 +99,9 @@ int mce_gen_pool_add(struct mce *mce)
{
{
	struct mce_evt_llist *node;
	struct mce_evt_llist *node;


	if (filter_mce(mce))
		return -EINVAL;

	if (!mce_evt_pool)
	if (!mce_evt_pool)
		return -EINVAL;
		return -EINVAL;


Loading