Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d88bfe1d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Ingo Molnar:
 "Various RAS updates:

   - AMD MCE support updates for future CPUs, fixes and 'SMCA' (Scalable
     MCA) error decoding support (Aravind Gopalakrishnan)

   - x86 memcpy_mcsafe() support, to enable smart(er) hardware error
     recovery in NVDIMM drivers, based on an extension of the x86
     exception handling code.  (Tony Luck)"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  EDAC/sb_edac: Fix computation of channel address
  x86/mm, x86/mce: Add memcpy_mcsafe()
  x86/mce/AMD: Document some functionality
  x86/mce: Clarify comments regarding deferred error
  x86/mce/AMD: Fix logic to obtain block address
  x86/mce/AMD, EDAC: Enable error decoding of Scalable MCA errors
  x86/mce: Move MCx_CONFIG MSR definitions
  x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries
  x86/mm: Expand the exception table logic to allow new handling options
  x86/mce/AMD: Set MCAX Enable bit
  x86/mce/AMD: Carve out threshold block preparation
  x86/mce/AMD: Fix LVT offset configuration for thresholding
  x86/mce/AMD: Reduce number of blocks scanned per bank
  x86/mce/AMD: Do not perform shared bank check for future processors
  x86/mce: Fix order of AMD MCE init function call
parents e71c2c1e eb1af3b7
Loading
Loading
Loading
Loading
+35 −0
Original line number Diff line number Diff line
@@ -290,3 +290,38 @@ Due to the way that the exception table is built and needs to be ordered,
only use exceptions for code in the .text section.  Any other section
will cause the exception table to not be sorted correctly, and the
exceptions will fail.

Things changed when 64-bit support was added to x86 Linux. Rather than
double the size of the exception table by expanding the two entries
from 32-bits to 64 bits, a clever trick was used to store addresses
as relative offsets from the table itself. The assembly code changed
from:
	.long 1b,3b
to:
        .long (from) - .
        .long (to) - .

and the C-code that uses these values converts back to absolute addresses
like this:

	ex_insn_addr(const struct exception_table_entry *x)
	{
		return (unsigned long)&x->insn + x->insn;
	}

In v4.6 the exception table entry was expanded with a new field "handler".
This is also 32-bits wide and contains a third relative function
pointer which points to one of:

1) int ex_handler_default(const struct exception_table_entry *fixup)
   This is legacy case that just jumps to the fixup code
2) int ex_handler_fault(const struct exception_table_entry *fixup)
   This case provides the fault number of the trap that occurred at
   entry->insn. It is used to distinguish page faults from machine
   check.
3) int ex_handler_ext(const struct exception_table_entry *fixup)
   This case is used for uaccess_err ... we need to set a flag
   in the task structure. Before the handler functions existed this
   case was handled by adding a large offset to the fixup to tag
   it as special.
More functions can easily be added.
+17 −9
Original line number Diff line number Diff line
@@ -27,15 +27,23 @@ struct amd_l3_cache {
};

struct threshold_block {
	unsigned int		block;
	unsigned int		bank;
	unsigned int		cpu;
	u32			address;
	u16			interrupt_enable;
	bool			interrupt_capable;
	u16			threshold_limit;
	struct kobject		kobj;
	struct list_head	miscj;
	unsigned int	 block;			/* Number within bank */
	unsigned int	 bank;			/* MCA bank the block belongs to */
	unsigned int	 cpu;			/* CPU which controls MCA bank */
	u32		 address;		/* MSR address for the block */
	u16		 interrupt_enable;	/* Enable/Disable APIC interrupt */
	bool		 interrupt_capable;	/* Bank can generate an interrupt. */

	u16		 threshold_limit;	/*
						 * Value upon which threshold
						 * interrupt is generated.
						 */

	struct kobject	 kobj;			/* sysfs object */
	struct list_head miscj;			/*
						 * List of threshold blocks
						 * within a bank.
						 */
};

struct threshold_bank {
+24 −16
Original line number Diff line number Diff line
@@ -44,19 +44,22 @@

/* Exception table entry */
#ifdef __ASSEMBLY__
# define _ASM_EXTABLE(from,to)					\
# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
	.pushsection "__ex_table","a" ;				\
	.balign 8 ;						\
	.balign 4 ;						\
	.long (from) - . ;					\
	.long (to) - . ;					\
	.long (handler) - . ;					\
	.popsection

# define _ASM_EXTABLE(from, to)					\
	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)

# define _ASM_EXTABLE_FAULT(from, to)				\
	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)

# define _ASM_EXTABLE_EX(from, to)				\
	.pushsection "__ex_table","a" ;				\
	.balign 8 ;						\
	.long (from) - . ;					\
	.long (to) - . + 0x7ffffff0 ;				\
	.popsection
	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)

# define _ASM_NOKPROBE(entry)					\
	.pushsection "_kprobe_blacklist","aw" ;			\
@@ -89,19 +92,24 @@
	.endm

#else
# define _ASM_EXTABLE(from,to)					\
# define _EXPAND_EXTABLE_HANDLE(x) #x
# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
	" .pushsection \"__ex_table\",\"a\"\n"			\
	" .balign 8\n"						\
	" .balign 4\n"						\
	" .long (" #from ") - .\n"				\
	" .long (" #to ") - .\n"				\
	" .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n"	\
	" .popsection\n"

# define _ASM_EXTABLE(from, to)					\
	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)

# define _ASM_EXTABLE_FAULT(from, to)				\
	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)

# define _ASM_EXTABLE_EX(from, to)				\
	" .pushsection \"__ex_table\",\"a\"\n"			\
	" .balign 8\n"						\
	" .long (" #from ") - .\n"				\
	" .long (" #to ") - . + 0x7ffffff0\n"			\
	" .popsection\n"
	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)

/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif

+68 −1
Original line number Diff line number Diff line
@@ -40,8 +40,20 @@
#define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */

/* AMD-specific bits */
#define MCI_STATUS_DEFERRED	(1ULL<<44)  /* declare an uncorrected error */
#define MCI_STATUS_DEFERRED	(1ULL<<44)  /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON	(1ULL<<43)  /* access poisonous data */
#define MCI_STATUS_TCC		(1ULL<<55)  /* Task context corrupt */

/*
 * McaX field if set indicates a given bank supports MCA extensions:
 *  - Deferred error interrupt type is specifiable by bank.
 *  - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers,
 *    But should not be used to determine MSR numbers.
 *  - TCC bit is present in MCx_STATUS.
 */
#define MCI_CONFIG_MCAX		0x1
#define MCI_IPID_MCATYPE	0xFFFF0000
#define MCI_IPID_HWID		0xFFF

/*
 * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
@@ -91,6 +103,16 @@
#define MCE_LOG_LEN 32
#define MCE_LOG_SIGNATURE	"MACHINECHECK"

/* AMD Scalable MCA */
#define MSR_AMD64_SMCA_MC0_MISC0	0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG	0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID		0xc0002005
#define MSR_AMD64_SMCA_MC0_MISC1	0xc000200a
#define MSR_AMD64_SMCA_MCx_MISC(x)	(MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x)	(MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x)	(MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y)	((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))

/*
 * This structure contains all data related to the MCE log.  Also
 * carries a signature to make it easier to find from external
@@ -287,4 +309,49 @@ struct cper_sec_mem_err;
extern void apei_mce_report_mem_error(int corrected,
				      struct cper_sec_mem_err *mem_err);

/*
 * Enumerate new IP types and HWID values in AMD processors which support
 * Scalable MCA.
 */
#ifdef CONFIG_X86_MCE_AMD
enum amd_ip_types {
	SMCA_F17H_CORE = 0,	/* Core errors */
	SMCA_DF,		/* Data Fabric */
	SMCA_UMC,		/* Unified Memory Controller */
	SMCA_PB,		/* Parameter Block */
	SMCA_PSP,		/* Platform Security Processor */
	SMCA_SMU,		/* System Management Unit */
	N_AMD_IP_TYPES
};

struct amd_hwid {
	const char *name;
	unsigned int hwid;
};

extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];

enum amd_core_mca_blocks {
	SMCA_LS = 0,	/* Load Store */
	SMCA_IF,	/* Instruction Fetch */
	SMCA_L2_CACHE,	/* L2 cache */
	SMCA_DE,	/* Decoder unit */
	RES,		/* Reserved */
	SMCA_EX,	/* Execution unit */
	SMCA_FP,	/* Floating Point */
	SMCA_L3_CACHE,	/* L3 cache */
	N_CORE_MCA_BLOCKS
};

extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];

enum amd_df_mca_blocks {
	SMCA_CS = 0,	/* Coherent Slave */
	SMCA_PIE,	/* Power management, Interrupts, etc */
	N_DF_BLOCKS
};

extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
#endif

#endif /* _ASM_X86_MCE_H */
+13 −0
Original line number Diff line number Diff line
@@ -78,6 +78,19 @@ int strcmp(const char *cs, const char *ct);
#define memset(s, c, n) __memset(s, c, n)
#endif

/**
 * memcpy_mcsafe - copy memory with indication if a machine check happened
 *
 * @dst:	destination address
 * @src:	source address
 * @cnt:	number of bytes to copy
 *
 * Low level memory copy function that catches machine checks
 *
 * Return true for success, false for fail
 */
bool memcpy_mcsafe(void *dst, const void *src, size_t cnt);

#endif /* __KERNEL__ */

#endif /* _ASM_X86_STRING_64_H */
Loading