Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 673b975f authored by Dean Luick's avatar Dean Luick Committed by Doug Ledford
Browse files

IB/hfi1: Add QSFP sanity pre-check



Sometimes a QSFP device does not respond in the expected time
after a power-on.  Add a read pre-check/retry when starting
the link on driver load.

Reviewed-by: default avatarEaswar Hariharan <easwar.hariharan@intel.com>
Signed-off-by: default avatarDean Luick <dean.luick@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent af534939
Loading
Loading
Loading
Loading
+78 −8
Original line number Diff line number Diff line
@@ -9490,6 +9490,78 @@ static void init_lcb(struct hfi1_devdata *dd)
	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00);
}

/*
 * Perform a test read on the QSFP.  Return 0 on success, -ERRNO
 * on error.
 */
static int test_qsfp_read(struct hfi1_pportdata *ppd)
{
	int ret;
	u8 status;

	/* report success if not a QSFP */
	if (ppd->port_type != PORT_TYPE_QSFP)
		return 0;

	/* read byte 2, the status byte */
	ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1);
	if (ret < 0)
		return ret;
	if (ret != 1)
		return -EIO;

	return 0; /* success */
}

/*
 * Values for QSFP retry.
 *
 * Give up after 10s (20 x 500ms).  The overall timeout was empirically
 * arrived at from experience on a large cluster.
 */
#define MAX_QSFP_RETRIES 20
#define QSFP_RETRY_WAIT 500 /* msec */

/*
 * Try a QSFP read.  If it fails, schedule a retry for later.
 * Called on first link activation after driver load.
 */
static void try_start_link(struct hfi1_pportdata *ppd)
{
	if (test_qsfp_read(ppd)) {
		/* read failed */
		if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) {
			dd_dev_err(ppd->dd, "QSFP not responding, giving up\n");
			return;
		}
		dd_dev_info(ppd->dd,
			    "QSFP not responding, waiting and retrying %d\n",
			    (int)ppd->qsfp_retry_count);
		ppd->qsfp_retry_count++;
		queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
				   msecs_to_jiffies(QSFP_RETRY_WAIT));
		return;
	}
	ppd->qsfp_retry_count = 0;

	/*
	 * Tune the SerDes to a ballpark setting for optimal signal and bit
	 * error rate.  Needs to be done before starting the link.
	 */
	tune_serdes(ppd);
	start_link(ppd);
}

/*
 * Workqueue function to start the link after a delay.
 */
void handle_start_link(struct work_struct *work)
{
	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
						  start_link_work.work);
	try_start_link(ppd);
}

int bringup_serdes(struct hfi1_pportdata *ppd)
{
	struct hfi1_devdata *dd = ppd->dd;
@@ -9525,14 +9597,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
		set_qsfp_int_n(ppd, 1);
	}

	/*
	 * Tune the SerDes to a ballpark setting for
	 * optimal signal and bit error rate
	 * Needs to be done before starting the link
	 */
	tune_serdes(ppd);

	return start_link(ppd);
	try_start_link(ppd);
	return 0;
}

void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
@@ -9549,6 +9615,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
	ppd->driver_link_ready = 0;
	ppd->link_enabled = 0;

	ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */
	flush_delayed_work(&ppd->start_link_work);
	cancel_delayed_work_sync(&ppd->start_link_work);

	ppd->offline_disabled_reason =
			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
+1 −0
Original line number Diff line number Diff line
@@ -706,6 +706,7 @@ void handle_link_up(struct work_struct *work);
void handle_link_down(struct work_struct *work);
void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
void handle_start_link(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
void reset_qsfp(struct hfi1_pportdata *ppd);
void qsfp_event(struct work_struct *work);
+2 −0
Original line number Diff line number Diff line
@@ -605,6 +605,7 @@ struct hfi1_pportdata {
	struct work_struct freeze_work;
	struct work_struct link_downgrade_work;
	struct work_struct link_bounce_work;
	struct delayed_work start_link_work;
	/* host link state variables */
	struct mutex hls_lock;
	u32 host_link_state;
@@ -659,6 +660,7 @@ struct hfi1_pportdata {
	u8 linkinit_reason;
	u8 local_tx_rate;	/* rate given to 8051 firmware */
	u8 last_pstate;		/* info only */
	u8 qsfp_retry_count;

	/* placeholders for IB MAD packet settings */
	u8 overrun_threshold;
+1 −0
Original line number Diff line number Diff line
@@ -500,6 +500,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
	INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
	INIT_WORK(&ppd->sma_message_work, handle_sma_message);
	INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
	INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link);
	INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);