Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit caab36b5 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge branch 'x86/mce2' into x86/core

parents a1413c89 73af76df
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -783,6 +783,11 @@ config X86_MCE_AMD
	   Additional support for AMD specific MCE features such as
	   the DRAM Error Threshold.

config X86_MCE_THRESHOLD
	depends on X86_MCE_AMD || X86_MCE_INTEL
	bool
	default y

config X86_MCE_NONFATAL
	tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
	depends on X86_32 && X86_MCE
+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@
#define		APIC_ESR_SENDILL	0x00020
#define		APIC_ESR_RECVILL	0x00040
#define		APIC_ESR_ILLREGA	0x00080
#define 	APIC_LVTCMCI	0x2f0
#define	APIC_ICR	0x300
#define		APIC_DEST_SELF		0x40000
#define		APIC_DEST_ALLINC	0x80000
+32 −3
Original line number Diff line number Diff line
@@ -11,6 +11,8 @@
 */

#define MCG_CTL_P	 (1UL<<8)   /* MCG_CAP register available */
#define MCG_EXT_P	 (1ULL<<9)   /* Extended registers available */
#define MCG_CMCI_P	 (1ULL<<10)  /* CMCI supported */

#define MCG_STATUS_RIPV  (1UL<<0)   /* restart ip valid */
#define MCG_STATUS_EIPV  (1UL<<1)   /* ip points to correct instruction */
@@ -90,14 +92,29 @@ extern int mce_disabled;

#include <asm/atomic.h>

void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, device_mce);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);

/*
 * To support more than 128 would need to escape the predefined
 * Linux defined extended banks first.
 */
#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)

#ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c);
void cmci_clear(void);
void cmci_reenable(void);
void cmci_rediscover(int dying);
void cmci_recheck(void);
#else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {}
static inline void cmci_reenable(void) {}
static inline void cmci_rediscover(int dying) {}
static inline void cmci_recheck(void) {}
#endif

#ifdef CONFIG_X86_MCE_AMD
@@ -106,11 +123,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif

void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
extern int mce_available(struct cpuinfo_x86 *c);

void mce_log_therm_throt_event(__u64 status);

extern atomic_t mce_entry;

extern void do_machine_check(struct pt_regs *, long);

typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);

enum mcp_flags {
	MCP_TIMESTAMP = (1 << 0),	/* log time stamp */
	MCP_UC = (1 << 1),		/* log uncorrected errors */
};
extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);

extern int mce_notify_user(void);

#endif /* !CONFIG_X86_32 */
@@ -120,8 +149,8 @@ extern void mcheck_init(struct cpuinfo_x86 *c);
#else
#define mcheck_init(c) do { } while (0)
#endif
extern void stop_mce(void);
extern void restart_mce(void);

extern void (*mce_threshold_vector)(void);

#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
+5 −0
Original line number Diff line number Diff line
@@ -77,6 +77,11 @@
#define MSR_IA32_MC0_ADDR		0x00000402
#define MSR_IA32_MC0_MISC		0x00000403

/* These are consecutive and not in the normal 4er MCE bank block */
#define MSR_IA32_MC0_CTL2		0x00000280
#define CMCI_EN			(1ULL << 30)
#define CMCI_THRESHOLD_MASK		0xffffULL

#define MSR_P6_PERFCTR0			0x000000c1
#define MSR_P6_PERFCTR1			0x000000c2
#define MSR_P6_EVNTSEL0			0x00000186
+11 −6
Original line number Diff line number Diff line
@@ -414,9 +414,17 @@ void __init alternative_instructions(void)
	   that might execute the to be patched code.
	   Other CPUs are not running. */
	stop_nmi();
#ifdef CONFIG_X86_MCE
	stop_mce();
#endif

	/*
	 * Don't stop machine check exceptions while patching.
	 * MCEs only happen when something got corrupted and in this
	 * case we must do something about the corruption.
	 * Ignoring it is worse than a unlikely patching race.
	 * Also machine checks tend to be broadcast and if one CPU
	 * goes into machine check the others follow quickly, so we don't
	 * expect a machine check to cause undue problems during to code
	 * patching.
	 */

	apply_alternatives(__alt_instructions, __alt_instructions_end);

@@ -456,9 +464,6 @@ void __init alternative_instructions(void)
				(unsigned long)__smp_locks_end);

	restart_nmi();
#ifdef CONFIG_X86_MCE
	restart_mce();
#endif
}

/**
Loading