Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 90811ee5 authored by Jilai Wang's avatar Jilai Wang Committed by Gerrit - the friendly Code Review server
Browse files

msm: npu: fix network and misc commands race condition problem



misc command doesn't do npu_init before sending to firmware, so
it's possible that npu will be turned off while npu firmware is
handling misc commands which causes times out in host driver.
To fix this issue, npu_init/npu_deinit need to be called before
and after misc commands are handled. In addition, only one misc
command should be handled at any moment. The misc command should
be failed right away while the other one is being handled.

Change-Id: I523255da15fa4e3b8b21d74b28f90d3458ebeb3c
Signed-off-by: default avatarJilai Wang <jilaiw@codeaurora.org>
parent 78d37755
Loading
Loading
Loading
Loading
+48 −20
Original line number Diff line number Diff line
@@ -292,9 +292,8 @@ int npu_host_init(struct npu_device *npu_dev)
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;

	memset(host_ctx, 0, sizeof(*host_ctx));
	init_completion(&host_ctx->loopback_done);
	init_completion(&host_ctx->misc_done);
	init_completion(&host_ctx->fw_deinit_done);
	init_completion(&host_ctx->property_done);
	mutex_init(&host_ctx->lock);
	atomic_set(&host_ctx->ipc_trans_id, 1);
	host_ctx->npu_dev = npu_dev;
@@ -308,6 +307,8 @@ int npu_host_init(struct npu_device *npu_dev)
	if (!host_ctx->prop_buf)
		return -ENOMEM;

	host_ctx->misc_pending = false;

	return 0;
}

@@ -368,6 +369,7 @@ static int host_error_hdlr(struct npu_device *npu_dev, bool force)
		network = &host_ctx->networks[i];
		if (network->is_valid && network->cmd_pending &&
			network->fw_error) {
			network->cmd_pending = false;
			if (network->cmd_async) {
				pr_debug("async cmd, queue ssr event\n");
				kevt.evt.type = MSM_NPU_EVENT_TYPE_SSR;
@@ -382,7 +384,8 @@ static int host_error_hdlr(struct npu_device *npu_dev, bool force)
			}
		}
	}
	complete_all(&host_ctx->loopback_done);
	host_ctx->misc_pending = false;
	complete_all(&host_ctx->misc_done);
	mutex_unlock(&host_ctx->lock);

	return 1;
@@ -927,7 +930,9 @@ static void app_msg_proc(struct npu_host_ctx *host_ctx, uint32_t *msg)

		pr_debug("NPU_IPC_MSG_LOOPBACK_DONE loopbackParams: 0x%x\n",
			lb_rsp_pkt->loopbackParams);
		complete_all(&host_ctx->loopback_done);
		host_ctx->misc_pending = false;

		complete_all(&host_ctx->misc_done);
		break;
	}
	case NPU_IPC_MSG_SET_PROPERTY_DONE:
@@ -942,8 +947,9 @@ static void app_msg_proc(struct npu_host_ctx *host_ctx, uint32_t *msg)
			param[0]);

		host_ctx->cmd_ret_status = prop_rsp_pkt->header.status;
		host_ctx->misc_pending = false;

		complete_all(&host_ctx->property_done);
		complete_all(&host_ctx->misc_done);
		break;
	}
	case NPU_IPC_MSG_GET_PROPERTY_DONE:
@@ -968,8 +974,9 @@ static void app_msg_proc(struct npu_host_ctx *host_ctx, uint32_t *msg)
				sizeof(struct ipc_msg_header_pkt);
			memcpy(host_ctx->prop_buf, prop_data, prop_size);
		}
		host_ctx->misc_pending = false;

		complete_all(&host_ctx->property_done);
		complete_all(&host_ctx->misc_done);
		break;
	}
	case NPU_IPC_MSG_GENERAL_NOTIFY:
@@ -1143,7 +1150,7 @@ static int npu_send_network_cmd(struct npu_device *npu_dev,
		network->cmd_async = async;
		network->cmd_ret_status = 0;
		network->cmd_pending = true;
		network->trans_id = atomic_read(&host_ctx->ipc_trans_id);
		network->trans_id = ((struct ipc_cmd_header_pkt *)cmd_ptr)->trans_id;
		ret = npu_host_ipc_send_cmd(npu_dev,
			IPC_QUEUE_APPS_EXEC, cmd_ptr);
		if (ret)
@@ -1163,11 +1170,18 @@ static int npu_send_misc_cmd(struct npu_device *npu_dev, uint32_t q_idx,
	if (host_ctx->fw_error || (host_ctx->fw_state == FW_DISABLED)) {
		pr_err("fw is in error state or disabled, can't send misc cmd\n");
		ret = -EIO;
	} else if (host_ctx->misc_pending) {
		pr_err("Another misc cmd is pending\n");
		ret = -EBUSY;
	} else {
		pr_debug("Send cmd %d\n",
			((struct ipc_cmd_header_pkt *)cmd_ptr)->cmd_type);
		host_ctx->cmd_ret_status = 0;
		reinit_completion(&host_ctx->misc_done);
		host_ctx->misc_pending = true;
		ret = npu_host_ipc_send_cmd(npu_dev, q_idx, cmd_ptr);
		if (ret)
			host_ctx->misc_pending = false;
	}
	mutex_unlock(&host_ctx->lock);

@@ -1306,6 +1320,12 @@ int32_t npu_host_set_fw_property(struct npu_device *npu_dev,
		goto set_prop_exit;
	}

	ret = fw_init(npu_dev);
	if (ret) {
		pr_err("fw_init fail\n");
		goto set_prop_exit;
	}

	prop_packet->header.cmd_type = NPU_IPC_CMD_SET_PROPERTY;
	prop_packet->header.size = pkt_size;
	prop_packet->header.trans_id =
@@ -1318,34 +1338,36 @@ int32_t npu_host_set_fw_property(struct npu_device *npu_dev,
	for (i = 0; i < num_of_params; i++)
		prop_packet->prop_param[i] = property->prop_param[i];

	reinit_completion(&host_ctx->property_done);
	ret = npu_send_misc_cmd(npu_dev, IPC_QUEUE_APPS_EXEC,
		prop_packet);

	pr_debug("NPU_IPC_CMD_SET_PROPERTY sent status: %d\n", ret);

	if (ret) {
		pr_err("NPU_IPC_CMD_SET_PROPERTY failed\n");
		goto set_prop_exit;
		goto deinit_fw;
	}

	ret = wait_for_completion_interruptible_timeout(
		&host_ctx->property_done,
		&host_ctx->misc_done,
		(host_ctx->fw_dbg_mode & FW_DBG_MODE_INC_TIMEOUT) ?
		NW_DEBUG_TIMEOUT : NW_CMD_TIMEOUT);

	if (!ret) {
		pr_err_ratelimited("npu: NPU_IPC_CMD_SET_PROPERTY time out\n");
		ret = -ETIMEDOUT;
		goto set_prop_exit;
		goto deinit_fw;
	} else if (ret < 0) {
		pr_err("Wait for set_property done interrupted by signal\n");
		goto set_prop_exit;
		goto deinit_fw;
	}

	ret = host_ctx->cmd_ret_status;
	if (ret)
		pr_err("set fw property failed %d\n", ret);

deinit_fw:
	fw_deinit(npu_dev, false, true);
set_prop_exit:
	kfree(prop_packet);
	return ret;
@@ -1368,6 +1390,12 @@ int32_t npu_host_get_fw_property(struct npu_device *npu_dev,
	if (!prop_packet)
		return -ENOMEM;

	ret = fw_init(npu_dev);
	if (ret) {
		pr_err("fw_init fail\n");
		goto get_prop_exit;
	}

	prop_packet->header.cmd_type = NPU_IPC_CMD_GET_PROPERTY;
	prop_packet->header.size = pkt_size;
	prop_packet->header.trans_id =
@@ -1380,28 +1408,27 @@ int32_t npu_host_get_fw_property(struct npu_device *npu_dev,
	for (i = 0; i < num_of_params; i++)
		prop_packet->prop_param[i] = property->prop_param[i];

	reinit_completion(&host_ctx->property_done);
	ret = npu_send_misc_cmd(npu_dev, IPC_QUEUE_APPS_EXEC,
		prop_packet);
	pr_debug("NPU_IPC_CMD_GET_PROPERTY sent status: %d\n", ret);

	if (ret) {
		pr_err("NPU_IPC_CMD_GET_PROPERTY failed\n");
		goto get_prop_exit;
		goto deinit_fw;
	}

	ret = wait_for_completion_interruptible_timeout(
		&host_ctx->property_done,
		&host_ctx->misc_done,
		(host_ctx->fw_dbg_mode & FW_DBG_MODE_INC_TIMEOUT) ?
		NW_DEBUG_TIMEOUT : NW_CMD_TIMEOUT);

	if (!ret) {
		pr_err_ratelimited("npu: NPU_IPC_CMD_GET_PROPERTY time out\n");
		ret = -ETIMEDOUT;
		goto get_prop_exit;
		goto deinit_fw;
	} else if (ret < 0) {
		pr_err("Wait for get_property done interrupted by signal\n");
		goto get_prop_exit;
		goto deinit_fw;
	}

	ret = host_ctx->cmd_ret_status;
@@ -1419,6 +1446,8 @@ int32_t npu_host_get_fw_property(struct npu_device *npu_dev,
		pr_err("get fw property failed %d\n", ret);
	}

deinit_fw:
	fw_deinit(npu_dev, false, true);
get_prop_exit:
	kfree(prop_packet);
	return ret;
@@ -1609,7 +1638,7 @@ int32_t npu_host_load_network_v2(struct npu_client *client,
	mutex_lock(&host_ctx->lock);

	if (!ret) {
		pr_err_ratelimited("npu: NPU_IPC_CMD_LOAD time out\n");
		pr_err_ratelimited("npu: NPU_IPC_CMD_LOAD_V2 time out\n");
		ret = -ETIMEDOUT;
		goto error_free_network;
	}
@@ -2052,7 +2081,6 @@ int32_t npu_host_loopback_test(struct npu_device *npu_dev)
	loopback_packet.header.flags = 0;
	loopback_packet.loopbackParams = 15;

	reinit_completion(&host_ctx->loopback_done);
	ret = npu_send_misc_cmd(npu_dev, IPC_QUEUE_APPS_EXEC, &loopback_packet);

	if (ret) {
@@ -2061,7 +2089,7 @@ int32_t npu_host_loopback_test(struct npu_device *npu_dev)
	}

	ret = wait_for_completion_interruptible_timeout(
		&host_ctx->loopback_done,
		&host_ctx->misc_done,
		(host_ctx->fw_dbg_mode & FW_DBG_MODE_INC_TIMEOUT) ?
		NW_DEBUG_TIMEOUT : NW_CMD_TIMEOUT);

+2 −2
Original line number Diff line number Diff line
@@ -77,9 +77,9 @@ struct npu_host_ctx {
	struct delayed_work fw_deinit_work;
	atomic_t fw_deinit_work_cnt;
	struct workqueue_struct *wq;
	struct completion loopback_done;
	struct completion misc_done;
	struct completion fw_deinit_done;
	struct completion property_done;
	bool misc_pending;
	void *prop_buf;
	int32_t network_num;
	struct npu_network networks[MAX_LOADED_NETWORK];