Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e606d81d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Ingo Molnar:
 "The main changes were:

   - Lots of enhancements for AMD SMCA (Scalable MCA
     features/extensions) systems: extract, decode and print more
     hardware error information and add matching support on the
     injection/testing side as well. (Yazn Ghannam)

   - Various MCE handling improvements on modern Intel Xeons. (Tony
     Luck)

   - Plus misc fixes and enhancements"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  x86/RAS/mce_amd_inj: Remove debugfs dir recursively on exit
  x86/RAS/mce_amd_inj: Fix signed wrap around when decrementing index 'i'
  x86/RAS/mce_amd_inj: Fix some W= warnings
  x86/MCE/AMD, EDAC: Handle reserved bank 4 on Fam17h properly
  x86/mce/AMD: Extract the error address on SMCA systems
  x86/mce, EDAC/mce_amd: Print MCA_SYND and MCA_IPID during MCE on SMCA systems
  x86/mce/AMD: Save MCA_IPID in MCE struct on SMCA systems
  x86/mce/AMD: Ensure the deferred error interrupt is of type APIC on SMCA systems
  x86/mce/AMD: Update sysfs bank names for SMCA systems
  x86/mce/AMD, EDAC/mce_amd: Define and use tables for known SMCA IP types
  EDAC/mce_amd: Use SMCA prefix for error descriptions arrays
  EDAC/mce_amd: Add missing SMCA error descriptions
  x86/mce/AMD: Read MSRs on the CPU allocating the threshold blocks
  x86/RAS: Add syndrome support to mce_amd_inj
  EDAC/mce_amd: Print syndrome register value on SMCA systems
  x86/mce: Add support for new MCA_SYND register
  x86/mce/AMD: Use msr_ops.misc() in allocate_threshold_blocks()
  x86/mce: Drop X86_FEATURE_MCE_RECOVERY and the related model string test
  x86/mce: Improve memcpy_mcsafe()
  x86/mce: Add PCI quirks to identify Xeons with machine check recovery
  ...
parents 12b7bcb4 b199ac6c
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -106,7 +106,6 @@
#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */

/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
+36 −30
Original line number Diff line number Diff line
@@ -40,9 +40,10 @@
#define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */

/* AMD-specific bits */
#define MCI_STATUS_TCC		(1ULL<<55)  /* Task context corrupt */
#define MCI_STATUS_SYNDV	(1ULL<<53)  /* synd reg. valid */
#define MCI_STATUS_DEFERRED	(1ULL<<44)  /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON	(1ULL<<43)  /* access poisonous data */
#define MCI_STATUS_TCC		(1ULL<<55)  /* Task context corrupt */

/*
 * McaX field if set indicates a given bank supports MCA extensions:
@@ -110,6 +111,7 @@
#define MSR_AMD64_SMCA_MC0_MISC0	0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG	0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID		0xc0002005
#define MSR_AMD64_SMCA_MC0_SYND		0xc0002006
#define MSR_AMD64_SMCA_MC0_DESTAT	0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR	0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1	0xc000200a
@@ -119,6 +121,7 @@
#define MSR_AMD64_SMCA_MCx_MISC(x)	(MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x)	(MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x)	(MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_SYND(x)	(MSR_AMD64_SMCA_MC0_SYND + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DESTAT(x)	(MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x)	(MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y)	((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
@@ -334,44 +337,47 @@ extern void apei_mce_report_mem_error(int corrected,
 * Scalable MCA.
 */
#ifdef CONFIG_X86_MCE_AMD
enum amd_ip_types {
	SMCA_F17H_CORE = 0,	/* Core errors */
	SMCA_DF,		/* Data Fabric */

/* These may be used by multiple smca_hwid_mcatypes */
enum smca_bank_types {
	SMCA_LS = 0,	/* Load Store */
	SMCA_IF,	/* Instruction Fetch */
	SMCA_L2_CACHE,	/* L2 Cache */
	SMCA_DE,	/* Decoder Unit */
	SMCA_EX,	/* Execution Unit */
	SMCA_FP,	/* Floating Point */
	SMCA_L3_CACHE,	/* L3 Cache */
	SMCA_CS,	/* Coherent Slave */
	SMCA_PIE,	/* Power, Interrupts, etc. */
	SMCA_UMC,	/* Unified Memory Controller */
	SMCA_PB,	/* Parameter Block */
	SMCA_PSP,	/* Platform Security Processor */
	SMCA_SMU,	/* System Management Unit */
	N_AMD_IP_TYPES
	N_SMCA_BANK_TYPES
};

struct amd_hwid {
	const char *name;
	unsigned int hwid;
struct smca_bank_name {
	const char *name;	/* Short name for sysfs */
	const char *long_name;	/* Long name for pretty-printing */
};

extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];

enum amd_core_mca_blocks {
	SMCA_LS = 0,	/* Load Store */
	SMCA_IF,	/* Instruction Fetch */
	SMCA_L2_CACHE,	/* L2 cache */
	SMCA_DE,	/* Decoder unit */
	RES,		/* Reserved */
	SMCA_EX,	/* Execution unit */
	SMCA_FP,	/* Floating Point */
	SMCA_L3_CACHE,	/* L3 cache */
	N_CORE_MCA_BLOCKS
};
#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)

extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
struct smca_hwid_mcatype {
	unsigned int bank_type;	/* Use with smca_bank_types for easy indexing. */
	u32 hwid_mcatype;	/* (hwid,mcatype) tuple */
	u32 xec_bitmap;		/* Bitmap of valid ExtErrorCodes; current max is 21. */
};

enum amd_df_mca_blocks {
	SMCA_CS = 0,	/* Coherent Slave */
	SMCA_PIE,	/* Power management, Interrupts, etc */
	N_DF_BLOCKS
struct smca_bank_info {
	struct smca_hwid_mcatype *type;
	u32 type_instance;
};

extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
extern struct smca_bank_info smca_banks[MAX_NR_BANKS];

#endif

#endif /* _ASM_X86_MCE_H */
+1 −4
Original line number Diff line number Diff line
@@ -46,10 +46,7 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)

static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
	if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
	return memcpy_mcsafe(dst, src, n);
	memcpy(dst, src, n);
	return 0;
}

/**
+18 −1
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
#define _ASM_X86_STRING_64_H

#ifdef __KERNEL__
#include <linux/jump_label.h>

/* Written 2002 by Andi Kleen */

@@ -78,6 +79,9 @@ int strcmp(const char *cs, const char *ct);
#define memset(s, c, n) __memset(s, c, n)
#endif

__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
DECLARE_STATIC_KEY_FALSE(mcsafe_key);

/**
 * memcpy_mcsafe - copy memory with indication if a machine check happened
 *
@@ -86,10 +90,23 @@ int strcmp(const char *cs, const char *ct);
 * @cnt:	number of bytes to copy
 *
 * Low level memory copy function that catches machine checks
 * We only call into the "safe" function on systems that can
 * actually do machine check recovery. Everyone else can just
 * use memcpy().
 *
 * Return 0 for success, -EFAULT for fail
 */
int memcpy_mcsafe(void *dst, const void *src, size_t cnt);
static __always_inline __must_check int
memcpy_mcsafe(void *dst, const void *src, size_t cnt)
{
#ifdef CONFIG_X86_MCE
	if (static_branch_unlikely(&mcsafe_key))
		return memcpy_mcsafe_unrolled(dst, src, cnt);
	else
#endif
		memcpy(dst, src, cnt);
	return 0;
}

#endif /* __KERNEL__ */

+2 −0
Original line number Diff line number Diff line
@@ -26,6 +26,8 @@ struct mce {
	__u32 socketid;	/* CPU socket ID */
	__u32 apicid;	/* CPU initial apic ID */
	__u64 mcgcap;	/* MCGCAP MSR: machine check capabilities of CPU */
	__u64 synd;	/* MCA_SYND MSR: only valid on SMCA systems */
	__u64 ipid;	/* MCA_IPID MSR: only valid on SMCA systems */
};

#define MCE_GET_RECORD_LEN   _IOR('M', 1, int)
Loading