Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d5fc1d51 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  amd64_edac: Minor formatting fix
  amd64_edac: Fix operator precendence error
  edac, mc: Improve scrub rate handling
  amd64_edac: Correct scrub rate setting
  amd64_edac: Fix DCT base address selector
  amd64_edac: Remove polling mechanism
  x86, mce: Notify about corrected events too
  amd64_edac: Remove unneeded defines
  edac: Remove EDAC_DEBUG_VERBOSE
  amd64_edac: Sanitize syndrome extraction
parents 694f690d c4799c75
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -600,6 +600,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
		 */
		if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
			mce_log(&m);
			atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
			add_taint(TAINT_MACHINE_CHECK);
		}

+0 −8
Original line number Diff line number Diff line
@@ -39,14 +39,6 @@ config EDAC_DEBUG
	  there're four debug levels (x=0,1,2,3 from low to high).
	  Usually you should select 'N'.

config EDAC_DEBUG_VERBOSE
	bool "More verbose debugging"
	depends on EDAC_DEBUG
	help
	  This option makes debugging information more verbose.
	  Source file name and line number where debugging message
	  printed will be added to debugging message.

 config EDAC_DECODE_MCE
	tristate "Decode MCEs in human-readable form (only on AMD for now)"
	depends on CPU_SUP_AMD && X86_MCE
+56 −157
Original line number Diff line number Diff line
@@ -160,7 +160,7 @@ static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw,
	return 0;
}

static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 *bandwidth)
static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth)
{
	struct amd64_pvt *pvt = mci->pvt_info;
	u32 min_scrubrate = 0x0;
@@ -178,9 +178,9 @@ static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 *bandwidth)

	default:
		amd64_printk(KERN_ERR, "Unsupported family!\n");
		break;
		return -EINVAL;
	}
	return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, *bandwidth,
	return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, bandwidth,
					   min_scrubrate);
}

@@ -796,6 +796,11 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)

static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);

static u16 extract_syndrome(struct err_regs *err)
{
	return ((err->nbsh >> 15) & 0xff) | ((err->nbsl >> 16) & 0xff00);
}

static void amd64_cpu_display_info(struct amd64_pvt *pvt)
{
	if (boot_cpu_data.x86 == 0x11)
@@ -888,6 +893,9 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt)
		return;
	}

	amd64_printk(KERN_INFO, "using %s syndromes.\n",
		     ((pvt->syn_type == 8) ? "x8" : "x4"));

	/* Only if NOT ganged does dclr1 have valid info */
	if (!dct_ganging_enabled(pvt))
		amd64_dump_dramcfg_low(pvt->dclr1, 1);
@@ -1101,20 +1109,17 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
}

static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
					struct err_regs *info,
					u64 sys_addr)
				    struct err_regs *err_info, u64 sys_addr)
{
	struct mem_ctl_info *src_mci;
	unsigned short syndrome;
	int channel, csrow;
	u32 page, offset;
	u16 syndrome;

	/* Extract the syndrome parts and form a 16-bit syndrome */
	syndrome  = HIGH_SYNDROME(info->nbsl) << 8;
	syndrome |= LOW_SYNDROME(info->nbsh);
	syndrome = extract_syndrome(err_info);

	/* CHIPKILL enabled */
	if (info->nbcfg & K8_NBCFG_CHIPKILL) {
	if (err_info->nbcfg & K8_NBCFG_CHIPKILL) {
		channel = get_channel_from_ecc_syndrome(mci, syndrome);
		if (channel < 0) {
			/*
@@ -1123,8 +1128,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
			 * as suspect.
			 */
			amd64_mc_printk(mci, KERN_WARNING,
				       "unknown syndrome 0x%x - possible error "
				       "reporting race\n", syndrome);
					"unknown syndrome 0x%04x - possible "
					"error reporting race\n", syndrome);
			edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
			return;
		}
@@ -1430,7 +1435,7 @@ static inline u64 f10_get_base_addr_offset(u64 sys_addr, int hi_range_sel,
	u64 chan_off;

	if (hi_range_sel) {
		if (!(dct_sel_base_addr & 0xFFFFF800) &&
		if (!(dct_sel_base_addr & 0xFFFF0000) &&
		   hole_valid && (sys_addr >= 0x100000000ULL))
			chan_off = hole_off << 16;
		else
@@ -1654,13 +1659,13 @@ static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
 * (MCX_ADDR).
 */
static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
				     struct err_regs *info,
				     struct err_regs *err_info,
				     u64 sys_addr)
{
	struct amd64_pvt *pvt = mci->pvt_info;
	u32 page, offset;
	unsigned short syndrome;
	int nid, csrow, chan = 0;
	u16 syndrome;

	csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);

@@ -1671,15 +1676,14 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci,

	error_address_to_page_and_offset(sys_addr, &page, &offset);

	syndrome  = HIGH_SYNDROME(info->nbsl) << 8;
	syndrome |= LOW_SYNDROME(info->nbsh);
	syndrome = extract_syndrome(err_info);

	/*
	 * We need the syndromes for channel detection only when we're
	 * ganged. Otherwise @chan should already contain the channel at
	 * this point.
	 */
	if (dct_ganging_enabled(pvt) && pvt->nbcfg & K8_NBCFG_CHIPKILL)
	if (dct_ganging_enabled(pvt) && (pvt->nbcfg & K8_NBCFG_CHIPKILL))
		chan = get_channel_from_ecc_syndrome(mci, syndrome);

	if (chan >= 0)
@@ -1955,124 +1959,23 @@ static int map_err_sym_to_channel(int err_sym, int sym_size)
static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
{
	struct amd64_pvt *pvt = mci->pvt_info;
	u32 value = 0;
	int err_sym = 0;

	if (boot_cpu_data.x86 == 0x10) {
	int err_sym = -1;

		amd64_read_pci_cfg(pvt->misc_f3_ctl, 0x180, &value);

		/* F3x180[EccSymbolSize]=1 => x8 symbols */
		if (boot_cpu_data.x86_model > 7 &&
		    value & BIT(25)) {
	if (pvt->syn_type == 8)
		err_sym = decode_syndrome(syndrome, x8_vectors,
						  ARRAY_SIZE(x8_vectors), 8);
			return map_err_sym_to_channel(err_sym, 8);
		}
	}
	err_sym = decode_syndrome(syndrome, x4_vectors, ARRAY_SIZE(x4_vectors), 4);
	return map_err_sym_to_channel(err_sym, 4);
					  ARRAY_SIZE(x8_vectors),
					  pvt->syn_type);
	else if (pvt->syn_type == 4)
		err_sym = decode_syndrome(syndrome, x4_vectors,
					  ARRAY_SIZE(x4_vectors),
					  pvt->syn_type);
	else {
		amd64_printk(KERN_WARNING, "%s: Illegal syndrome type: %u\n",
					   __func__, pvt->syn_type);
		return err_sym;
	}

/*
 * Check for valid error in the NB Status High register. If so, proceed to read
 * NB Status Low, NB Address Low and NB Address High registers and store data
 * into error structure.
 *
 * Returns:
 *	- 1: if hardware regs contains valid error info
 *	- 0: if no valid error is indicated
 */
static int amd64_get_error_info_regs(struct mem_ctl_info *mci,
				     struct err_regs *regs)
{
	struct amd64_pvt *pvt;
	struct pci_dev *misc_f3_ctl;

	pvt = mci->pvt_info;
	misc_f3_ctl = pvt->misc_f3_ctl;

	if (amd64_read_pci_cfg(misc_f3_ctl, K8_NBSH, &regs->nbsh))
		return 0;

	if (!(regs->nbsh & K8_NBSH_VALID_BIT))
		return 0;

	/* valid error, read remaining error information registers */
	if (amd64_read_pci_cfg(misc_f3_ctl, K8_NBSL, &regs->nbsl) ||
	    amd64_read_pci_cfg(misc_f3_ctl, K8_NBEAL, &regs->nbeal) ||
	    amd64_read_pci_cfg(misc_f3_ctl, K8_NBEAH, &regs->nbeah) ||
	    amd64_read_pci_cfg(misc_f3_ctl, K8_NBCFG, &regs->nbcfg))
		return 0;

	return 1;
}

/*
 * This function is called to retrieve the error data from hardware and store it
 * in the info structure.
 *
 * Returns:
 *	- 1: if a valid error is found
 *	- 0: if no error is found
 */
static int amd64_get_error_info(struct mem_ctl_info *mci,
				struct err_regs *info)
{
	struct amd64_pvt *pvt;
	struct err_regs regs;

	pvt = mci->pvt_info;

	if (!amd64_get_error_info_regs(mci, info))
		return 0;

	/*
	 * Here's the problem with the K8's EDAC reporting: There are four
	 * registers which report pieces of error information. They are shared
	 * between CEs and UEs. Furthermore, contrary to what is stated in the
	 * BKDG, the overflow bit is never used! Every error always updates the
	 * reporting registers.
	 *
	 * Can you see the race condition? All four error reporting registers
	 * must be read before a new error updates them! There is no way to read
	 * all four registers atomically. The best than can be done is to detect
	 * that a race has occured and then report the error without any kind of
	 * precision.
	 *
	 * What is still positive is that errors are still reported and thus
	 * problems can still be detected - just not localized because the
	 * syndrome and address are spread out across registers.
	 *
	 * Grrrrr!!!!!  Here's hoping that AMD fixes this in some future K8 rev.
	 * UEs and CEs should have separate register sets with proper overflow
	 * bits that are used! At very least the problem can be fixed by
	 * honoring the ErrValid bit in 'nbsh' and not updating registers - just
	 * set the overflow bit - unless the current error is CE and the new
	 * error is UE which would be the only situation for overwriting the
	 * current values.
	 */

	regs = *info;

	/* Use info from the second read - most current */
	if (unlikely(!amd64_get_error_info_regs(mci, info)))
		return 0;

	/* clear the error bits in hardware */
	pci_write_bits32(pvt->misc_f3_ctl, K8_NBSH, 0, K8_NBSH_VALID_BIT);

	/* Check for the possible race condition */
	if ((regs.nbsh != info->nbsh) ||
	     (regs.nbsl != info->nbsl) ||
	     (regs.nbeah != info->nbeah) ||
	     (regs.nbeal != info->nbeal)) {
		amd64_mc_printk(mci, KERN_WARNING,
				"hardware STATUS read access race condition "
				"detected!\n");
		return 0;
	}
	return 1;
	return map_err_sym_to_channel(err_sym, pvt->syn_type);
}

/*
@@ -2198,20 +2101,6 @@ void amd64_decode_bus_error(int node_id, struct err_regs *regs)

}

/*
 * The main polling 'check' function, called FROM the edac core to perform the
 * error checking and if an error is encountered, error processing.
 */
static void amd64_check(struct mem_ctl_info *mci)
{
	struct err_regs regs;

	if (amd64_get_error_info(mci, &regs)) {
		struct amd64_pvt *pvt = mci->pvt_info;
		amd_decode_nb_mce(pvt->mc_node_id, &regs, 1);
	}
}

/*
 * Input:
 *	1) struct amd64_pvt which contains pvt->dram_f2_ctl pointer
@@ -2284,6 +2173,7 @@ static void amd64_free_mc_sibling_devices(struct amd64_pvt *pvt)
static void amd64_read_mc_registers(struct amd64_pvt *pvt)
{
	u64 msr_val;
	u32 tmp;
	int dram;

	/*
@@ -2349,10 +2239,22 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt)
	amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
	amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0);

	if (!dct_ganging_enabled(pvt) && boot_cpu_data.x86 >= 0x10) {
	if (boot_cpu_data.x86 >= 0x10) {
		if (!dct_ganging_enabled(pvt)) {
			amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1);
			amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_1, &pvt->dchr1);
		}
		amd64_read_pci_cfg(pvt->misc_f3_ctl, EXT_NB_MCA_CFG, &tmp);
	}

	if (boot_cpu_data.x86 == 0x10 &&
	    boot_cpu_data.x86_model > 7 &&
	    /* F3x180[EccSymbolSize]=1 => x8 symbols */
	    tmp & BIT(25))
		pvt->syn_type = 8;
	else
		pvt->syn_type = 4;

	amd64_dump_misc_regs(pvt);
}

@@ -2739,9 +2641,6 @@ static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
	mci->dev_name		= pci_name(pvt->dram_f2_ctl);
	mci->ctl_page_to_phys	= NULL;

	/* IMPORTANT: Set the polling 'check' function in this module */
	mci->edac_check		= amd64_check;

	/* memory scrubber interface */
	mci->set_sdram_scrub_rate = amd64_set_scrub_rate;
	mci->get_sdram_scrub_rate = amd64_get_scrub_rate;
+13 −35
Original line number Diff line number Diff line
@@ -244,44 +244,17 @@


#define F10_DCTL_SEL_LOW		0x110

#define dct_sel_baseaddr(pvt)    \
	((pvt->dram_ctl_select_low) & 0xFFFFF800)

#define dct_sel_interleave_addr(pvt)    \
	(((pvt->dram_ctl_select_low) >> 6) & 0x3)

enum {
	F10_DCTL_SEL_LOW_DctSelHiRngEn	= BIT(0),
	F10_DCTL_SEL_LOW_DctSelIntLvEn	= BIT(2),
	F10_DCTL_SEL_LOW_DctGangEn	= BIT(4),
	F10_DCTL_SEL_LOW_DctDatIntLv	= BIT(5),
	F10_DCTL_SEL_LOW_DramEnable	= BIT(8),
	F10_DCTL_SEL_LOW_MemCleared	= BIT(10),
};

#define    dct_high_range_enabled(pvt)    \
	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctSelHiRngEn)

#define dct_interleave_enabled(pvt)	   \
	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctSelIntLvEn)

#define dct_ganging_enabled(pvt)        \
	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctGangEn)

#define dct_data_intlv_enabled(pvt)    \
	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctDatIntLv)

#define dct_dram_enabled(pvt)    \
	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DramEnable)

#define dct_memory_cleared(pvt)    \
	(pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_MemCleared)

#define dct_sel_baseaddr(pvt)		((pvt->dram_ctl_select_low) & 0xFFFFF800)
#define dct_sel_interleave_addr(pvt)	(((pvt->dram_ctl_select_low) >> 6) & 0x3)
#define dct_high_range_enabled(pvt)	(pvt->dram_ctl_select_low & BIT(0))
#define dct_interleave_enabled(pvt)	(pvt->dram_ctl_select_low & BIT(2))
#define dct_ganging_enabled(pvt)	(pvt->dram_ctl_select_low & BIT(4))
#define dct_data_intlv_enabled(pvt)	(pvt->dram_ctl_select_low & BIT(5))
#define dct_dram_enabled(pvt)		(pvt->dram_ctl_select_low & BIT(8))
#define dct_memory_cleared(pvt)		(pvt->dram_ctl_select_low & BIT(10))

#define F10_DCTL_SEL_HIGH		0x114


/*
 * Function 3 - Misc Control
 */
@@ -382,6 +355,8 @@ enum {
#define K8_NBCAP_SECDED			BIT(3)
#define K8_NBCAP_DCT_DUAL		BIT(0)

#define EXT_NB_MCA_CFG			0x180

/* MSRs */
#define K8_MSR_MCGCTL_NBE		BIT(4)

@@ -471,6 +446,9 @@ struct amd64_pvt {
	u32 dram_ctl_select_high;	/* DRAM Controller Select High Reg */
	u32 online_spare;               /* On-Line spare Reg */

	/* x4 or x8 syndromes in use */
	u8 syn_type;

	/* temp storage for when input is received from sysfs */
	struct err_regs ctl_error_info;

+2 −2
Original line number Diff line number Diff line
@@ -958,7 +958,7 @@ static void e752x_check(struct mem_ctl_info *mci)
}

/* Program byte/sec bandwidth scrub rate to hardware */
static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *new_bw)
static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
{
	const struct scrubrate *scrubrates;
	struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info;
@@ -975,7 +975,7 @@ static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *new_bw)
	 * desired rate and program the cooresponding register value.
	 */
	for (i = 0; scrubrates[i].bandwidth != SDRATE_EOT; i++)
		if (scrubrates[i].bandwidth >= *new_bw)
		if (scrubrates[i].bandwidth >= new_bw)
			break;

	if (scrubrates[i].bandwidth == SDRATE_EOT)
Loading