Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit aecd3b5a authored by Michael Albaugh's avatar Michael Albaugh Committed by Roland Dreier
Browse files

IB/ipath: Log "active" time and some errors to EEPROM



We currently track various errors, now we enhance that capability by
logging some of them to EEPROM.  We also now log a cumulative "active"
time defined by traffic though the InfiniPath HCA beyond the normal SM
traffic.

Signed-off-by: default avatarMichael Albaugh <michael.albaugh@qlogic.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 8e9ab3f1
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -2005,6 +2005,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
			 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);

	ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
	ipath_update_eeprom_log(dd);
}

/**
+227 −6
Original line number Diff line number Diff line
@@ -367,8 +367,8 @@ bail:
 * @len: number of bytes to receive
 */

int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
		      void *buffer, int len)
static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
					u8 eeprom_offset, void *buffer, int len)
{
	/* compiler complains unless initialized */
	u8 single_byte = 0;
@@ -418,6 +418,7 @@ bail:
	return ret;
}


/**
 * ipath_eeprom_write - writes data to the eeprom via I2C
 * @dd: the infinipath device
@@ -425,7 +426,7 @@ bail:
 * @buffer: data to write
 * @len: number of bytes to write
 */
int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
				const void *buffer, int len)
{
	u8 single_byte;
@@ -500,6 +501,38 @@ bail:
	return ret;
}

/*
 * The public entry-points ipath_eeprom_read() and ipath_eeprom_write()
 * are now just wrappers around the internal functions.
 */
int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
			void *buff, int len)
{
	int ret;

	ret = down_interruptible(&dd->ipath_eep_sem);
	if (!ret) {
		ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
		up(&dd->ipath_eep_sem);
	}

	return ret;
}

int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
			const void *buff, int len)
{
	int ret;

	ret = down_interruptible(&dd->ipath_eep_sem);
	if (!ret) {
		ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
		up(&dd->ipath_eep_sem);
	}

	return ret;
}

static u8 flash_csum(struct ipath_flash *ifp, int adjust)
{
	u8 *ip = (u8 *) ifp;
@@ -527,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
	void *buf;
	struct ipath_flash *ifp;
	__be64 guid;
	int len;
	int len, eep_stat;
	u8 csum, *bguid;
	int t = dd->ipath_unit;
	struct ipath_devdata *dd0 = ipath_lookup(0);
@@ -571,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
		goto bail;
	}

	if (ipath_eeprom_read(dd, 0, buf, len)) {
	down(&dd->ipath_eep_sem);
	eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
	up(&dd->ipath_eep_sem);

	if (eep_stat) {
		ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
		goto done;
	}
@@ -646,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
	ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
		   (unsigned long long) be64_to_cpu(dd->ipath_guid));

	memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
	/*
	 * Power-on (actually "active") hours are kept as little-endian value
	 * in EEPROM, but as seconds in a (possibly as small as 24-bit)
	 * atomic_t while running.
	 */
	atomic_set(&dd->ipath_active_time, 0);
	dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);

done:
	vfree(buf);

bail:;
}

/**
 * ipath_update_eeprom_log - copy active-time and error counters to eeprom
 * @dd: the infinipath device
 *
 * Although the time is kept as seconds in the ipath_devdata struct, it is
 * rounded to hours for re-write, as we have only 16 bits in EEPROM.
 * First-cut code reads whole (expected) struct ipath_flash, modifies,
 * re-writes. Future direction: read/write only what we need, assuming
 * that the EEPROM had to have been "good enough" for driver init, and
 * if not, we aren't making it worse.
 *
 */

int ipath_update_eeprom_log(struct ipath_devdata *dd)
{
	void *buf;
	struct ipath_flash *ifp;
	int len, hi_water;
	uint32_t new_time, new_hrs;
	u8 csum;
	int ret, idx;
	unsigned long flags;

	/* first, check if we actually need to do anything. */
	ret = 0;
	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
		if (dd->ipath_eep_st_new_errs[idx]) {
			ret = 1;
			break;
		}
	}
	new_time = atomic_read(&dd->ipath_active_time);

	if (ret == 0 && new_time < 3600)
		return 0;

	/*
	 * The quick-check above determined that there is something worthy
	 * of logging, so get current contents and do a more detailed idea.
	 */
	len = offsetof(struct ipath_flash, if_future);
	buf = vmalloc(len);
	ret = 1;
	if (!buf) {
		ipath_dev_err(dd, "Couldn't allocate memory to read %u "
				"bytes from eeprom for logging\n", len);
		goto bail;
	}

	/* Grab semaphore and read current EEPROM. If we get an
	 * error, let go, but if not, keep it until we finish write.
	 */
	ret = down_interruptible(&dd->ipath_eep_sem);
	if (ret) {
		ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
		goto free_bail;
	}
	ret = ipath_eeprom_internal_read(dd, 0, buf, len);
	if (ret) {
		up(&dd->ipath_eep_sem);
		ipath_dev_err(dd, "Unable read EEPROM for logging\n");
		goto free_bail;
	}
	ifp = (struct ipath_flash *)buf;

	csum = flash_csum(ifp, 0);
	if (csum != ifp->if_csum) {
		up(&dd->ipath_eep_sem);
		ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
				csum, ifp->if_csum);
		ret = 1;
		goto free_bail;
	}
	hi_water = 0;
	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
		int new_val = dd->ipath_eep_st_new_errs[idx];
		if (new_val) {
			/*
			 * If we have seen any errors, add to EEPROM values
			 * We need to saturate at 0xFF (255) and we also
			 * would need to adjust the checksum if we were
			 * trying to minimize EEPROM traffic
			 * Note that we add to actual current count in EEPROM,
			 * in case it was altered while we were running.
			 */
			new_val += ifp->if_errcntp[idx];
			if (new_val > 0xFF)
				new_val = 0xFF;
			if (ifp->if_errcntp[idx] != new_val) {
				ifp->if_errcntp[idx] = new_val;
				hi_water = offsetof(struct ipath_flash,
						if_errcntp) + idx;
			}
			/*
			 * update our shadow (used to minimize EEPROM
			 * traffic), to match what we are about to write.
			 */
			dd->ipath_eep_st_errs[idx] = new_val;
			dd->ipath_eep_st_new_errs[idx] = 0;
		}
	}
	/*
	 * now update active-time. We would like to round to the nearest hour
	 * but unless atomic_t are sure to be proper signed ints we cannot,
	 * because we need to account for what we "transfer" to EEPROM and
	 * if we log an hour at 31 minutes, then we would need to set
	 * active_time to -29 to accurately count the _next_ hour.
	 */
	if (new_time > 3600) {
		new_hrs = new_time / 3600;
		atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
		new_hrs += dd->ipath_eep_hrs;
		if (new_hrs > 0xFFFF)
			new_hrs = 0xFFFF;
		dd->ipath_eep_hrs = new_hrs;
		if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
			ifp->if_powerhour[0] = new_hrs & 0xFF;
			hi_water = offsetof(struct ipath_flash, if_powerhour);
		}
		if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
			ifp->if_powerhour[1] = new_hrs >> 8;
			hi_water = offsetof(struct ipath_flash, if_powerhour)
					+ 1;
		}
	}
	/*
	 * There is a tiny possibility that we could somehow fail to write
	 * the EEPROM after updating our shadows, but problems from holding
	 * the spinlock too long are a much bigger issue.
	 */
	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
	if (hi_water) {
		/* we made some change to the data, uopdate cksum and write */
		csum = flash_csum(ifp, 1);
		ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
	}
	up(&dd->ipath_eep_sem);
	if (ret)
		ipath_dev_err(dd, "Failed updating EEPROM\n");

free_bail:
	vfree(buf);
bail:
	return ret;

}

/**
 * ipath_inc_eeprom_err - increment one of the four error counters
 * that are logged to EEPROM.
 * @dd: the infinipath device
 * @eidx: 0..3, the counter to increment
 * @incr: how much to add
 *
 * Each counter is 8-bits, and saturates at 255 (0xFF). They
 * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
 * is called, but it can only be called in a context that allows sleep.
 * This function can be called even at interrupt level.
 */

void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
{
	uint new_val;
	unsigned long flags;

	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
	new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
	if (new_val > 255)
		new_val = 255;
	dd->ipath_eep_st_new_errs[eidx] = new_val;
	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
	return;
}
+22 −0
Original line number Diff line number Diff line
@@ -440,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
	u32 bits, ctrl;
	int isfatal = 0;
	char bitsmsg[64];
	int log_idx;

	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);

@@ -468,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,

	hwerrs &= dd->ipath_hwerrmask;

	/* We log some errors to EEPROM, check if we have any of those. */
	for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
		if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
			ipath_inc_eeprom_err(dd, log_idx, 1);

	/*
	 * make sure we get this much out, unless told to be quiet,
	 * it's a parity error we may recover from,
@@ -1171,6 +1177,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd)

	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;

	/*
	 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
	 * 2 is Some Misc, 3 is reserved for future.
	 */
	dd->ipath_eep_st_masks[0].hwerrs_to_log =
		INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
		INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;

	dd->ipath_eep_st_masks[1].hwerrs_to_log =
		INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
		INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;

	dd->ipath_eep_st_masks[2].errs_to_log =
		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;

}

/**
+27 −0
Original line number Diff line number Diff line
@@ -340,6 +340,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
	u32 bits, ctrl;
	int isfatal = 0;
	char bitsmsg[64];
	int log_idx;

	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
	if (!hwerrs) {
@@ -367,6 +368,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,

	hwerrs &= dd->ipath_hwerrmask;

	/* We log some errors to EEPROM, check if we have any of those. */
	for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
		if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
			ipath_inc_eeprom_err(dd, log_idx, 1);

	/*
	 * make sure we get this much out, unless told to be quiet,
	 * or it's occurred within the last 5 seconds
@@ -950,6 +956,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)

	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;

	/*
	 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
	 * 2 is Some Misc, 3 is reserved for future.
	 */
	dd->ipath_eep_st_masks[0].hwerrs_to_log =
		INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
		INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;

	/* Ignore errors in PIO/PBC on systems with unordered write-combining */
	if (ipath_unordered_wc())
		dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;

	dd->ipath_eep_st_masks[1].hwerrs_to_log =
		INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
		INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;

	dd->ipath_eep_st_masks[2].errs_to_log =
		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;


}

/* setup the MSI stuff again after a reset.  I'd like to just call
+2 −0
Original line number Diff line number Diff line
@@ -341,6 +341,8 @@ static int init_chip_first(struct ipath_devdata *dd,
	spin_lock_init(&dd->ipath_tid_lock);

	spin_lock_init(&dd->ipath_gpio_lock);
	spin_lock_init(&dd->ipath_eep_st_lock);
	sema_init(&dd->ipath_eep_sem, 1);

done:
	*pdp = pd;
Loading