Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 64515dc8 authored by Tomer Tayar's avatar Tomer Tayar Committed by David S. Miller
Browse files

qed: Add infrastructure for error detection and recovery



This patch adds the detection and handling of a parity error ("process kill
event"), including the update of the protocol drivers, and the prevention
of any HW access that will lead to device access towards the host while
recovery is in progress.
It also provides the means for the protocol drivers to trigger a recovery
process on their decision.

Signed-off-by: default avatarTomer Tayar <tomer.tayar@cavium.com>
Signed-off-by: default avatarAriel Elior <ariel.elior@cavium.com>
Signed-off-by: default avatarMichal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 666db486
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -804,6 +804,9 @@ struct qed_dev {

	u32				mcp_nvm_resp;

	/* Recovery */
	bool recov_in_prog;

	/* Linux specific here */
	struct  qede_dev		*edev;
	struct  pci_dev			*pdev;
@@ -943,6 +946,7 @@ void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt);
u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
		   u32 input_len, u8 *input_buf,
		   u32 max_size, u8 *unzip_buf);
void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
void qed_get_protocol_stats(struct qed_dev *cdev,
			    enum qed_mcp_protocol_type type,
			    union qed_mcp_protocol_stats *stats);
+27 −14
Original line number Diff line number Diff line
@@ -2140,6 +2140,11 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
			   "Load request was sent. Load code: 0x%x\n",
			   load_code);

		/* Only relevant for recovery:
		 * Clear the indication after LOAD_REQ is responded by the MFW.
		 */
		cdev->recov_in_prog = false;

		qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt);

		qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
@@ -2291,6 +2296,9 @@ static void qed_hw_timers_stop(struct qed_dev *cdev,
	qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_CONN, 0x0);
	qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_TASK, 0x0);

	if (cdev->recov_in_prog)
		return;

	for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
		if ((!qed_rd(p_hwfn, p_ptt,
			     TM_REG_PF_SCAN_ACTIVE_CONN)) &&
@@ -2353,6 +2361,7 @@ int qed_hw_stop(struct qed_dev *cdev)
		p_hwfn->hw_init_done = false;

		/* Send unload command to MCP */
		if (!cdev->recov_in_prog) {
			rc = qed_mcp_unload_req(p_hwfn, p_ptt);
			if (rc) {
				DP_NOTICE(p_hwfn,
@@ -2360,6 +2369,7 @@ int qed_hw_stop(struct qed_dev *cdev)
					  rc);
				rc2 = -EINVAL;
			}
		}

		qed_slowpath_irq_sync(p_hwfn);

@@ -2400,7 +2410,8 @@ int qed_hw_stop(struct qed_dev *cdev)
		qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
		qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);

		qed_mcp_unload_done(p_hwfn, p_ptt);
		if (!cdev->recov_in_prog) {
			rc = qed_mcp_unload_done(p_hwfn, p_ptt);
			if (rc) {
				DP_NOTICE(p_hwfn,
					  "Failed sending a UNLOAD_DONE command. rc = %d.\n",
@@ -2408,8 +2419,9 @@ int qed_hw_stop(struct qed_dev *cdev)
				rc2 = -EINVAL;
			}
		}
	}

	if (IS_PF(cdev)) {
	if (IS_PF(cdev) && !cdev->recov_in_prog) {
		p_hwfn = QED_LEADING_HWFN(cdev);
		p_ptt = QED_LEADING_HWFN(cdev)->p_main_ptt;

@@ -3459,6 +3471,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
				 void __iomem *p_doorbells,
				 enum qed_pci_personality personality)
{
	struct qed_dev *cdev = p_hwfn->cdev;
	int rc = 0;

	/* Split PCI bars evenly between hwfns */
@@ -3511,7 +3524,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
	/* Sending a mailbox to the MFW should be done after qed_get_hw_info()
	 * is called as it sets the ports number in an engine.
	 */
	if (IS_LEAD_HWFN(p_hwfn)) {
	if (IS_LEAD_HWFN(p_hwfn) && !cdev->recov_in_prog) {
		rc = qed_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
		if (rc)
			DP_NOTICE(p_hwfn, "Failed to initiate PF FLR\n");
+1 −1
Original line number Diff line number Diff line
@@ -12827,7 +12827,7 @@ enum MFW_DRV_MSG_TYPE {
	MFW_DRV_MSG_LLDP_DATA_UPDATED,
	MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED,
	MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED,
	MFW_DRV_MSG_RESERVED4,
	MFW_DRV_MSG_ERROR_RECOVERY,
	MFW_DRV_MSG_BW_UPDATE,
	MFW_DRV_MSG_S_TAG_UPDATE,
	MFW_DRV_MSG_GET_LAN_STATS,
+11 −0
Original line number Diff line number Diff line
@@ -703,6 +703,17 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
	int qed_status = 0;
	u32 offset = 0;

	if (p_hwfn->cdev->recov_in_prog) {
		DP_VERBOSE(p_hwfn,
			   NETIF_MSG_HW,
			   "Recovery is in progress. Avoid DMAE transaction [{src: addr 0x%llx, type %d}, {dst: addr 0x%llx, type %d}, size %d].\n",
			   src_addr, src_type, dst_addr, dst_type,
			   size_in_dwords);

		/* Let the flow complete w/o any error handling */
		return 0;
	}

	qed_dmae_opcode(p_hwfn,
			(src_type == QED_DMAE_ADDRESS_GRC),
			(dst_type == QED_DMAE_ADDRESS_GRC),
+30 −0
Original line number Diff line number Diff line
@@ -359,6 +359,8 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,

	qed_init_dp(cdev, params->dp_module, params->dp_level);

	cdev->recov_in_prog = params->recov_in_prog;

	rc = qed_init_pci(cdev, pdev);
	if (rc) {
		DP_ERR(cdev, "init pci failed\n");
@@ -2203,6 +2205,15 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type,
	return qed_mcp_get_nvm_image(hwfn, type, buf, len);
}

void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
{
	struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
	void *cookie = p_hwfn->cdev->ops_cookie;

	if (ops && ops->schedule_recovery_handler)
		ops->schedule_recovery_handler(cookie);
}

static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
			    void *handle)
{
@@ -2226,6 +2237,23 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
	return status;
}

static int qed_recovery_process(struct qed_dev *cdev)
{
	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
	struct qed_ptt *p_ptt;
	int rc = 0;

	p_ptt = qed_ptt_acquire(p_hwfn);
	if (!p_ptt)
		return -EAGAIN;

	rc = qed_start_recovery_process(p_hwfn, p_ptt);

	qed_ptt_release(p_hwfn, p_ptt);

	return rc;
}

static int qed_update_wol(struct qed_dev *cdev, bool enabled)
{
	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
@@ -2380,6 +2408,8 @@ const struct qed_common_ops qed_common_ops_pass = {
	.nvm_get_image = &qed_nvm_get_image,
	.set_coalesce = &qed_set_coalesce,
	.set_led = &qed_set_led,
	.recovery_process = &qed_recovery_process,
	.recovery_prolog = &qed_recovery_prolog,
	.update_drv_state = &qed_update_drv_state,
	.update_mac = &qed_update_mac,
	.update_mtu = &qed_update_mtu,
Loading