Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2c53b898 authored by Jilai Wang's avatar Jilai Wang
Browse files

msm: npu: Trigger SSR if RSC handshake times out



When RSC handshake times out, it means that firmware is in bad
state. It is necessary to trigger SSR in order to capture a
firmware dump to analyze the root cause.

Change-Id: Ic1a8e51004120538791ef3aa325103d7f308b4c9
Signed-off-by: default avatarJilai Wang <jilaiw@codeaurora.org>
parent 2bd8f670
Loading
Loading
Loading
Loading
+60 −31
Original line number Diff line number Diff line
@@ -66,7 +66,7 @@ static int npu_notify_aop(struct npu_device *npu_dev, bool on);
static int npu_notify_fw_pwr_state(struct npu_device *npu_dev,
	uint32_t pwr_level, bool post);
static int load_fw_nolock(struct npu_device *npu_dev, bool enable);
static void disable_fw_nolock(struct npu_device *npu_dev);
static int disable_fw_nolock(struct npu_device *npu_dev);
static int update_dcvs_activity(struct npu_device *npu_dev, uint32_t activity);
static void npu_queue_network_cmd(struct npu_network *network,
	struct npu_network_cmd *cmd);
@@ -110,7 +110,7 @@ static int wait_npu_cpc_power_off(struct npu_device *npu_dev)
		wait_cnt += NPU_FW_TIMEOUT_POLL_INTERVAL_MS;
		if (wait_cnt > max_wait_ms) {
			NPU_ERR("timeout wait for cpc power off\n");
			return -EPERM;
			return -ETIMEDOUT;
		}
		msleep(NPU_FW_TIMEOUT_POLL_INTERVAL_MS);
	} while (1);
@@ -327,7 +327,10 @@ static int enable_fw_nolock(struct npu_device *npu_dev)
	reinit_completion(&host_ctx->fw_bringup_done);
	ret = npu_notify_fw_pwr_state(npu_dev, npu_dev->pwrctrl.active_pwrlevel,
		true);
	if (ret) {
	if (ret == -ETIMEDOUT) {
		NPU_ERR("notify fw power state timed out\n");
		goto enable_pw_fail;
	} else if (ret) {
		NPU_ERR("notify fw power state failed\n");
		goto notify_fw_pwr_fail;
	}
@@ -337,7 +340,7 @@ static int enable_fw_nolock(struct npu_device *npu_dev)
	if (!ret) {
		NPU_ERR("Wait for fw bringup timedout\n");
		ret = -ETIMEDOUT;
		goto notify_fw_pwr_fail;
		goto enable_pw_fail;
	} else {
		ret = 0;
	}
@@ -375,17 +378,21 @@ int enable_fw(struct npu_device *npu_dev)
	ret = enable_fw_nolock(npu_dev);
	mutex_unlock(&host_ctx->lock);

	if (ret == -ETIMEDOUT) {
		NPU_ERR("Enable fw timedout, force SSR\n");
		host_error_hdlr(npu_dev, true);
	}
	return ret;
}

static void disable_fw_nolock(struct npu_device *npu_dev)
static int disable_fw_nolock(struct npu_device *npu_dev)
{
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
	int ret = 0;

	if (!host_ctx->fw_ref_cnt) {
		NPU_WARN("fw_ref_cnt is 0\n");
		return;
		return ret;
	}

	host_ctx->fw_ref_cnt--;
@@ -393,16 +400,20 @@ static void disable_fw_nolock(struct npu_device *npu_dev)

	if (host_ctx->fw_state != FW_ENABLED) {
		NPU_ERR("fw is not enabled\n");
		return;
		return ret;
	}

	if (host_ctx->fw_ref_cnt > 0)
		return;
		return ret;

	/* turn on auto ACK for warm shuts down */
	npu_cc_reg_write(npu_dev, NPU_CC_NPU_CPC_RSC_CTRL, 3);
	reinit_completion(&host_ctx->fw_shutdown_done);
	if (npu_notify_fw_pwr_state(npu_dev, NPU_PWRLEVEL_OFF, false)) {
	ret = npu_notify_fw_pwr_state(npu_dev, NPU_PWRLEVEL_OFF, false);
	if (ret == -ETIMEDOUT) {
		NPU_ERR("notify fw pwr off timed out\n");
		goto fail;
	} else if (ret) {
		NPU_WARN("notify fw pwr off failed\n");
		msleep(500);
	}
@@ -410,10 +421,15 @@ static void disable_fw_nolock(struct npu_device *npu_dev)
	if (!host_ctx->auto_pil_disable) {
		ret = wait_for_completion_timeout(
			&host_ctx->fw_shutdown_done, NW_RSC_TIMEOUT_MS);
		if (!ret)
		if (!ret) {
			NPU_ERR("Wait for fw shutdown timedout\n");
		else
			ret = -ETIMEDOUT;
			goto fail;
		} else {
			ret = wait_npu_cpc_power_off(npu_dev);
			if (ret)
				goto fail;
		}
	}

	npu_disable_irq(npu_dev);
@@ -430,15 +446,24 @@ static void disable_fw_nolock(struct npu_device *npu_dev)
		host_ctx->fw_state = FW_UNLOADED;
		NPU_DBG("fw is unloaded\n");
	}

fail:
	return ret;
}

void disable_fw(struct npu_device *npu_dev)
{
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
	int ret = 0;

	mutex_lock(&host_ctx->lock);
	disable_fw_nolock(npu_dev);
	ret = disable_fw_nolock(npu_dev);
	mutex_unlock(&host_ctx->lock);

	if (ret == -ETIMEDOUT) {
		NPU_ERR("disable fw timedout, force SSR\n");
		host_error_hdlr(npu_dev, true);
	}
}

/* notify fw current power level */
@@ -844,10 +869,11 @@ static int host_error_hdlr(struct npu_device *npu_dev, bool force)

	if (host_ctx->wdg_irq_sts) {
		NPU_INFO("watchdog irq triggered\n");
		npu_dump_debug_info(npu_dev);
		fw_alive = false;
	}

	npu_dump_debug_info(npu_dev);

	/*
	 * if fw is still alive, notify fw before power off
	 * otherwise if ssr happens or notify fw returns failure
@@ -1116,7 +1142,7 @@ static int wait_for_status_ready(struct npu_device *npu_dev,
		if (!wait_cnt) {
			NPU_ERR("timeout wait for status %x[%x] in reg %x\n",
				status_bits, ctrl_sts, status_reg);
			return -EPERM;
			return -ETIMEDOUT;
		}

		if (poll)
@@ -2216,7 +2242,6 @@ int32_t npu_host_load_network_v2(struct npu_client *client,
	struct npu_pwrctrl *pwr = &npu_dev->pwrctrl;
	struct npu_network *network;
	struct ipc_cmd_load_pkt_v2 *load_packet = NULL;
	struct ipc_cmd_unload_pkt unload_packet;
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
	struct npu_network_cmd *load_cmd = NULL;
	uint32_t num_patch_params, pkt_size;
@@ -2321,9 +2346,8 @@ int32_t npu_host_load_network_v2(struct npu_client *client,
	if (!ret) {
		NPU_ERR("npu: NPU_IPC_CMD_LOAD time out %lld:%d\n",
			network->id, load_cmd->trans_id);
		npu_dump_debug_info(npu_dev);
		ret = -ETIMEDOUT;
		goto error_load_network;
		goto free_load_cmd;
	}

	ret = load_cmd->ret_status;
@@ -2342,18 +2366,6 @@ int32_t npu_host_load_network_v2(struct npu_client *client,

	return ret;

error_load_network:
	NPU_DBG("Unload network %lld\n", network->id);
	/* send NPU_IPC_CMD_UNLOAD command to fw */
	unload_packet.header.cmd_type = NPU_IPC_CMD_UNLOAD;
	unload_packet.header.size = sizeof(struct ipc_cmd_unload_pkt);
	unload_packet.header.trans_id =
		atomic_add_return(1, &host_ctx->ipc_trans_id);
	unload_packet.header.flags = 0;
	unload_packet.network_hdl = (uint32_t)network->network_hdl;
	npu_send_network_cmd(npu_dev, network, &unload_packet, NULL);
	/* wait 200 ms to make sure fw has processed this command */
	msleep(200);
free_load_cmd:
	npu_dequeue_network_cmd(network, load_cmd);
	npu_free_network_cmd(host_ctx, load_cmd);
@@ -2363,6 +2375,16 @@ int32_t npu_host_load_network_v2(struct npu_client *client,
	free_network(host_ctx, client, network->id);
err_deinit_fw:
	mutex_unlock(&host_ctx->lock);

	/*
	 * treat load network timed out as error in order to
	 * force SSR
	 */
	if (ret == -ETIMEDOUT) {
		NPU_ERR("Error handling after load network failure\n");
		host_error_hdlr(npu_dev, true);
	}

	disable_fw(npu_dev);
	return ret;
}
@@ -2459,7 +2481,6 @@ int32_t npu_host_unload_network(struct npu_client *client,
	if (!ret) {
		NPU_ERR("npu: NPU_IPC_CMD_UNLOAD time out %llx:%d\n",
			network->id, unload_cmd->trans_id);
		npu_dump_debug_info(npu_dev);
		ret = -ETIMEDOUT;
		goto free_unload_cmd;
	}
@@ -2484,6 +2505,15 @@ int32_t npu_host_unload_network(struct npu_client *client,

	mutex_unlock(&host_ctx->lock);

	/*
	 * treat unload network timed out as error in order to
	 * force SSR
	 */
	if (ret == -ETIMEDOUT) {
		NPU_ERR("Error handling after load network failure\n");
		host_error_hdlr(npu_dev, true);
	}

	disable_fw(npu_dev);

	return ret;
@@ -2616,7 +2646,6 @@ int32_t npu_host_exec_network_v2(struct npu_client *client,
	if (!ret) {
		NPU_ERR("npu: %llx:%d NPU_IPC_CMD_EXECUTE_V2 time out\n",
			network->id, exec_cmd->trans_id);
		npu_dump_debug_info(npu_dev);
		ret = -ETIMEDOUT;
		goto free_exec_packet;
	}