Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7687b80a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/MCE update from Ingo Molnar:
 "Various MCE robustness enhancements.

  One of the changes adds CMCI (Corrected Machine Check Interrupt) poll
  mode on Intel Nehalem+ CPUs, which mode is automatically entered when
  the rate of messages is too high - and exited once the storm is over.

  An MCE events storm will roughly look like this:

   [ 5342.740616] mce: [Hardware Error]: Machine check events logged
   [ 5342.746501] mce: [Hardware Error]: Machine check events logged
   [ 5342.757971] CMCI storm detected: switching to poll mode
   [ 5372.674957] CMCI storm subsided: switching to interrupt mode

  This should make such events more survivable"

* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Provide boot argument to honour bios-set CMCI threshold
  x86, MCE: Remove unused defines
  x86, mce: Enable MCA support by default
  x86/mce: Add CMCI poll mode
  x86/mce: Make cmci_discover() quiet
  x86: mce: Remove the frozen cases in the hotplug code
  x86: mce: Split timer init
  x86: mce: Serialize mce injection
  x86: mce: Disable preemption when calling raise_local()
parents ac07f5c3 39ba5010
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -50,6 +50,13 @@ Machine check
		monarchtimeout:
		Sets the time in us to wait for other CPUs on machine checks. 0
		to disable.
   mce=bios_cmci_threshold
		Don't overwrite the bios-set CMCI threshold. This boot option
		prevents Linux from overwriting the CMCI threshold set by the
		bios. Without this option, Linux always sets the CMCI
		threshold to 1. Enabling this may make memory predictive failure
		analysis less effective if the bios sets thresholds for memory
		errors since we will not see details for all errors.

   nomce (for compatibility with i386): same as mce=off

+1 −0
Original line number Diff line number Diff line
@@ -874,6 +874,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS

config X86_MCE
	bool "Machine Check / overheating reporting"
	default y
	---help---
	  Machine Check support allows the processor to notify the
	  kernel if it detects a problem (e.g. overheating, data corruption).
+2 −11
Original line number Diff line number Diff line
@@ -116,19 +116,9 @@ struct mce_log {
/* Software defined banks */
#define MCE_EXTENDED_BANK	128
#define MCE_THERMAL_BANK	MCE_EXTENDED_BANK + 0

#define K8_MCE_THRESHOLD_BASE      (MCE_EXTENDED_BANK + 1)      /* MCE_AMD */
#define K8_MCE_THRESHOLD_BANK_0    (MCE_THRESHOLD_BASE + 0 * 9)
#define K8_MCE_THRESHOLD_BANK_1    (MCE_THRESHOLD_BASE + 1 * 9)
#define K8_MCE_THRESHOLD_BANK_2    (MCE_THRESHOLD_BASE + 2 * 9)
#define K8_MCE_THRESHOLD_BANK_3    (MCE_THRESHOLD_BASE + 3 * 9)
#define K8_MCE_THRESHOLD_BANK_4    (MCE_THRESHOLD_BASE + 4 * 9)
#define K8_MCE_THRESHOLD_BANK_5    (MCE_THRESHOLD_BASE + 5 * 9)
#define K8_MCE_THRESHOLD_DRAM_ECC  (MCE_THRESHOLD_BANK_4 + 0)

#define K8_MCE_THRESHOLD_BASE      (MCE_EXTENDED_BANK + 1)

#ifdef __KERNEL__

extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);

@@ -171,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device);
#ifdef CONFIG_X86_MCE_INTEL
extern int mce_cmci_disabled;
extern int mce_ignore_ce;
extern int mce_bios_cmci_threshold;
void mce_intel_feature_init(struct cpuinfo_x86 *c);
void cmci_clear(void);
void cmci_reenable(void);
+8 −0
Original line number Diff line number Diff line
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
}

static cpumask_var_t mce_inject_cpumask;
static DEFINE_MUTEX(mce_inject_mutex);

static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
{
@@ -194,7 +195,11 @@ static void raise_mce(struct mce *m)
		put_online_cpus();
	} else
#endif
	{
		preempt_disable();
		raise_local();
		preempt_enable();
	}
}

/* Error injection interface */
@@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
	 * so do it a jiffie or two later everywhere.
	 */
	schedule_timeout(2);

	mutex_lock(&mce_inject_mutex);
	raise_mce(&m);
	mutex_unlock(&mce_inject_mutex);
	return usize;
}

+12 −0
Original line number Diff line number Diff line
@@ -28,6 +28,18 @@ extern int mce_ser;

extern struct mce_bank *mce_banks;

#ifdef CONFIG_X86_MCE_INTEL
unsigned long mce_intel_adjust_timer(unsigned long interval);
void mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu);
#else
# define mce_intel_adjust_timer mce_adjust_timer_default
static inline void mce_intel_cmci_poll(void) { }
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
#endif

void mce_timer_kick(unsigned long interval);

#ifdef CONFIG_ACPI_APEI
int apei_write_mce(struct mce *m);
ssize_t apei_read_mce(struct mce *m, u64 *record_id);
Loading