Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 53595345 authored by Tony Luck's avatar Tony Luck Committed by Borislav Petkov
Browse files

EDAC, i7core: Remove double buffering of error records



In the bad old days the functions from x86_mce_decoder_chain could be
called in machine check context. So we used to carefully copy them and
defer processing until later. But in

  f29a7aff ("x86/mce: Avoid potential deadlock due to printk() in MCE context")

we switched the logging code to save the record in a genpool, and call
the functions that registered to be notified later from a work queue.

So drop all the double buffering and do all the work we want to do as
soon as i7core_mce_check_error() is called.

Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
Acked-by: default avatarMauro Carvalho Chehab <mchehab@osg.samsung.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/29ab2c370915c6e132fc5d88e7b72cb834bedbfe.1461855008.git.tony.luck@intel.com


Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
parent de0336b3
Loading
Loading
Loading
Loading
+5 −76
Original line number Diff line number Diff line
@@ -271,16 +271,6 @@ struct i7core_pvt {

	bool		is_registered, enable_scrub;

	/* Fifo double buffers */
	struct mce		mce_entry[MCE_LOG_LEN];
	struct mce		mce_outentry[MCE_LOG_LEN];

	/* Fifo in/out counters */
	unsigned		mce_in, mce_out;

	/* Count indicator to show errors not got */
	unsigned		mce_overrun;

	/* DCLK Frequency used for computing scrub rate */
	int			dclk_freq;

@@ -1792,56 +1782,15 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
 *	i7core_check_error	Retrieve and process errors reported by the
 *				hardware. Called by the Core module.
 */
static void i7core_check_error(struct mem_ctl_info *mci)
static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m)
{
	struct i7core_pvt *pvt = mci->pvt_info;
	int i;
	unsigned count = 0;
	struct mce *m;

	/*
	 * MCE first step: Copy all mce errors into a temporary buffer
	 * We use a double buffering here, to reduce the risk of
	 * losing an error.
	 */
	smp_rmb();
	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
		% MCE_LOG_LEN;
	if (!count)
		goto check_ce_error;

	m = pvt->mce_outentry;
	if (pvt->mce_in + count > MCE_LOG_LEN) {
		unsigned l = MCE_LOG_LEN - pvt->mce_in;

		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
		smp_wmb();
		pvt->mce_in = 0;
		count -= l;
		m += l;
	}
	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
	smp_wmb();
	pvt->mce_in += count;

	smp_rmb();
	if (pvt->mce_overrun) {
		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
			      pvt->mce_overrun);
		smp_wmb();
		pvt->mce_overrun = 0;
	}

	/*
	 * MCE second step: parse errors and display
	 */
	for (i = 0; i < count; i++)
		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
	i7core_mce_output_error(mci, m);

	/*
	 * Now, let's increment CE error counts
	 */
check_ce_error:
	if (!pvt->is_registered)
		i7core_udimm_check_mc_ecc_err(mci);
	else
@@ -1849,12 +1798,8 @@ static void i7core_check_error(struct mem_ctl_info *mci)
}

/*
 * i7core_mce_check_error	Replicates mcelog routine to get errors
 *				This routine simply queues mcelog errors, and
 *				return. The error itself should be handled later
 *				by i7core_check_error.
 * WARNING: As this routine should be called at NMI time, extra care should
 * be taken to avoid deadlocks, and to be as fast as possible.
 * Check that logging is enabled and that this is the right type
 * of error for us to handle.
 */
static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
				  void *data)
@@ -1882,21 +1827,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
	if (mce->bank != 8)
		return NOTIFY_DONE;

	smp_rmb();
	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
		smp_wmb();
		pvt->mce_overrun++;
		return NOTIFY_DONE;
	}

	/* Copy memory error at the ringbuffer */
	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
	smp_wmb();
	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;

	/* Handle fatal errors immediately */
	if (mce->mcgstatus & 1)
		i7core_check_error(mci);
	i7core_check_error(mci, mce);

	/* Advise mcelog that the errors were handled */
	return NOTIFY_STOP;
@@ -2243,8 +2174,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
	get_dimm_config(mci);
	/* record ptr to the generic device */
	mci->pdev = &i7core_dev->pdev[0]->dev;
	/* Set the function pointer to an actual operation function */
	mci->edac_check = i7core_check_error;

	/* Enable scrubrate setting */
	if (pvt->enable_scrub)