Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 78d1e02f authored by Dave Olson's avatar Dave Olson Committed by Roland Dreier
Browse files

IB/ipath: Workaround problem of errormask register being overwritten



On some system hardware, we are seeing moderately common cases of the
chip errormask register being overwritten due to a chip bug in iba6120
that is triggered by a vendor-specific PCIe broadcast message.  This
patch merely checks periodically, and corrects it if needed (the
overwrite can cause us to not get error and hardware error
interrupts).  Also, make dd->ipath_errormask the one, true canonical
source for kr_errormask, and remove references to ipath_ignorederrs as
it is currently unused.

Signed-off-by: default avatarDave Olson <dave.olson@qlogic.com>
Signed-off-by: default avatarJohn Gregor <john.gregor@qlogic.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 3810f2a8
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -851,13 +851,14 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
			 dd->ipath_hwerrmask);

	dd->ipath_maskederrs = dd->ipath_ignorederrs;
	/* clear all */
	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
	/* enable errors that are masked, at least this first time. */
	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
			 ~dd->ipath_maskederrs);
	/* clear any interrups up to this point (ints still not enabled) */
	dd->ipath_errormask = ipath_read_kreg64(dd,
		dd->ipath_kregs->kr_errormask);
	/* clear any interrupts up to this point (ints still not enabled) */
	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);

	/*
+13 −12
Original line number Diff line number Diff line
@@ -517,10 +517,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)

	supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);

	/*
	 * don't report errors that are masked (includes those always
	 * ignored)
	 */
	/* don't report errors that are masked */
	errs &= ~dd->ipath_maskederrs;

	/* do these first, they are most important */
@@ -566,19 +563,19 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
		 * ones on this particular interrupt, which also isn't great
		 */
		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
		dd->ipath_errormask &= ~dd->ipath_maskederrs;
		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
				 ~dd->ipath_maskederrs);
			dd->ipath_errormask);
		s_iserr = ipath_decode_err(msg, sizeof msg,
				 (dd->ipath_maskederrs & ~dd->
				  ipath_ignorederrs));
			dd->ipath_maskederrs);

		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
		if (dd->ipath_maskederrs &
			~(INFINIPATH_E_RRCVEGRFULL |
			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
			ipath_dev_err(dd, "Temporarily disabling "
			    "error(s) %llx reporting; too frequent (%s)\n",
				(unsigned long long) (dd->ipath_maskederrs &
				~dd->ipath_ignorederrs), msg);
				(unsigned long long)dd->ipath_maskederrs,
				msg);
		else {
			/*
			 * rcvegrfull and rcvhdrqfull are "normal",
@@ -793,6 +790,9 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
	/* disable error interrupts, to avoid confusion */
	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);

	/* also disable interrupts; errormask is sometimes overwriten */
	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);

	/*
	 * clear all sends, because they have may been
	 * completed by usercode while in freeze mode, and
@@ -817,7 +817,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
	for (i = 0; i < dd->ipath_pioavregs; i++) {
		/* deal with 6110 chip bug */
		im = i > 3 ? ((i&1) ? i-1 : i+1) : i;
		val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64)));
		val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im);
		dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
			= le64_to_cpu(val);
	}
@@ -832,7 +832,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
		E_SPKT_ERRS_IGNORE);
	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
		~dd->ipath_maskederrs);
		dd->ipath_errormask);
	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
}

+2 −9
Original line number Diff line number Diff line
@@ -261,18 +261,10 @@ struct ipath_devdata {
	 * limiting of hwerror reporting
	 */
	ipath_err_t ipath_lasthwerror;
	/*
	 * errors masked because they occur too fast, also includes errors
	 * that are always ignored (ipath_ignorederrs)
	 */
	/* errors masked because they occur too fast */
	ipath_err_t ipath_maskederrs;
	/* time in jiffies at which to re-enable maskederrs */
	unsigned long ipath_unmasktime;
	/*
	 * errors always ignored (masked), at least for a given
	 * chip/device, because they are wrong or not useful
	 */
	ipath_err_t ipath_ignorederrs;
	/* count of egrfull errors, combined for all ports */
	u64 ipath_last_tidfull;
	/* for ipath_qcheck() */
@@ -436,6 +428,7 @@ struct ipath_devdata {
	u64 ipath_lastibcstat;
	/* hwerrmask shadow */
	ipath_err_t ipath_hwerrmask;
	ipath_err_t ipath_errormask; /* errormask shadow */
	/* interrupt config reg shadow */
	u64 ipath_intconfig;
	/* kr_sendpiobufbase value */
+48 −6
Original line number Diff line number Diff line
@@ -196,6 +196,45 @@ static void ipath_qcheck(struct ipath_devdata *dd)
	}
}

static void ipath_chk_errormask(struct ipath_devdata *dd)
{
	static u32 fixed;
	u32 ctrl;
	unsigned long errormask;
	unsigned long hwerrs;

	if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
		return;

	errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);

	if (errormask == dd->ipath_errormask)
		return;
	fixed++;

	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);

	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
		dd->ipath_errormask);

	if ((hwerrs & dd->ipath_hwerrmask) ||
		(ctrl & INFINIPATH_C_FREEZEMODE)) {
		/* force re-interrupt of pending events, just in case */
		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
		ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
		ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
		dev_info(&dd->pcidev->dev,
			"errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
			fixed, errormask, (unsigned long)dd->ipath_errormask,
			ctrl, hwerrs);
	} else
		ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
			fixed, errormask,
			(unsigned long)dd->ipath_errormask);
}


/**
 * ipath_get_faststats - get word counters from chip before they overflow
 * @opaque - contains a pointer to the infinipath device ipath_devdata
@@ -251,14 +290,13 @@ void ipath_get_faststats(unsigned long opaque)
		dd->ipath_lasterror = 0;
	if (dd->ipath_lasthwerror)
		dd->ipath_lasthwerror = 0;
	if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
	if (dd->ipath_maskederrs
	    && time_after(jiffies, dd->ipath_unmasktime)) {
		char ebuf[256];
		int iserr;
		iserr = ipath_decode_err(ebuf, sizeof ebuf,
				 (dd->ipath_maskederrs & ~dd->
				  ipath_ignorederrs));
		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
			dd->ipath_maskederrs);
		if (dd->ipath_maskederrs &
				~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
				INFINIPATH_E_PKTERRS ))
			ipath_dev_err(dd, "Re-enabling masked errors "
@@ -278,9 +316,12 @@ void ipath_get_faststats(unsigned long opaque)
				ipath_cdbg(ERRPKT, "Re-enabling packet"
						" problem interrupt (%s)\n", ebuf);
		}
		dd->ipath_maskederrs = dd->ipath_ignorederrs;

		/* re-enable masked errors */
		dd->ipath_errormask |= dd->ipath_maskederrs;
		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
				 ~dd->ipath_maskederrs);
			dd->ipath_errormask);
		dd->ipath_maskederrs = 0;
	}

	/* limit qfull messages to ~one per minute per port */
@@ -294,6 +335,7 @@ void ipath_get_faststats(unsigned long opaque)
		}
	}

	ipath_chk_errormask(dd);
done:
	mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
}