Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 48b897f2 authored by Jilai Wang's avatar Jilai Wang
Browse files

msm: npu: handle system shutdown/reboot event properly



When system shuts down while there are work running on NPU,
NPUSS will be shut down without notifying kernel driver which
causes IPC commands timedout and triggers SSR handling.
This change is to detect this case and handle it properly
to avoid SSR.

Change-Id: I4ba6d702c2d39aa8e1894eba2ec049787df14aa8
Signed-off-by: default avatarJilai Wang <jilaiw@codeaurora.org>
parent c3a5f5cf
Loading
Loading
Loading
Loading
+119 −40
Original line number Original line Diff line number Diff line
@@ -15,6 +15,7 @@
#include "npu_common.h"
#include "npu_common.h"
#include <soc/qcom/subsystem_notif.h>
#include <soc/qcom/subsystem_notif.h>
#include <soc/qcom/subsystem_restart.h>
#include <soc/qcom/subsystem_restart.h>
#include <linux/reboot.h>


/* -------------------------------------------------------------------------
/* -------------------------------------------------------------------------
 * Defines
 * Defines
@@ -279,6 +280,50 @@ int load_fw(struct npu_device *npu_dev)
	return 0;
	return 0;
}
}


static void complete_pending_commands(struct npu_host_ctx *host_ctx)
{
	struct npu_network *network = NULL;
	struct npu_kevent kevt;
	struct npu_network_cmd *cmd;
	struct npu_misc_cmd *misc_cmd;
	int i;

	/* flush all pending npu cmds */
	for (i = 0; i < MAX_LOADED_NETWORK; i++) {
		network = &host_ctx->networks[i];
		if (!network->is_valid || !network->fw_error)
			continue;

		if (network->is_async) {
			NPU_DBG("async cmd, queue ssr event\n");
			kevt.evt.type = MSM_NPU_EVENT_TYPE_SSR;
			kevt.evt.u.ssr.network_hdl =
				network->network_hdl;
			if (npu_queue_event(network->client, &kevt))
				NPU_ERR("queue npu event failed\n");

			while (!list_empty(&network->cmd_list)) {
				cmd = list_first_entry(&network->cmd_list,
					struct npu_network_cmd, list);
				npu_dequeue_network_cmd(network, cmd);
				npu_free_network_cmd(host_ctx, cmd);
			}
		} else {
			list_for_each_entry(cmd, &network->cmd_list, list) {
				NPU_INFO("complete network %llx trans_id %d\n",
					network->id, cmd->trans_id);
				complete(&cmd->cmd_done);
			}
		}
	}

	list_for_each_entry(misc_cmd, &host_ctx->misc_cmd_list, list) {
		NPU_INFO("complete misc cmd trans_id %d\n",
			misc_cmd->trans_id);
		complete(&misc_cmd->cmd_done);
	}
}

int unload_fw(struct npu_device *npu_dev)
int unload_fw(struct npu_device *npu_dev)
{
{
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
@@ -294,7 +339,9 @@ int unload_fw(struct npu_device *npu_dev)
		mutex_unlock(&host_ctx->lock);
		mutex_unlock(&host_ctx->lock);
		return 0;
		return 0;
	} else if (host_ctx->fw_state == FW_ENABLED) {
	} else if (host_ctx->fw_state == FW_ENABLED) {
		NPU_ERR("fw is enabled now, can't be unloaded\n");
		NPU_ERR("fw is enabled now, device is shutting down?\n");
		host_ctx->dev_shuttingdown = true;
		complete_pending_commands(host_ctx);
		mutex_unlock(&host_ctx->lock);
		mutex_unlock(&host_ctx->lock);
		return -EBUSY;
		return -EBUSY;
	}
	}
@@ -315,6 +362,11 @@ static int enable_fw_nolock(struct npu_device *npu_dev)
	int ret = 0;
	int ret = 0;
	uint32_t reg_val;
	uint32_t reg_val;


	if (host_ctx->dev_shuttingdown) {
		NPU_ERR("device is shutting down, ignore enable request\n");
		return -EIO;
	}

	if (host_ctx->fw_state == FW_UNLOADED) {
	if (host_ctx->fw_state == FW_UNLOADED) {
		ret = load_fw_nolock(npu_dev,
		ret = load_fw_nolock(npu_dev,
			host_ctx->auto_pil_disable ? true : false);
			host_ctx->auto_pil_disable ? true : false);
@@ -470,6 +522,11 @@ static int disable_fw_nolock(struct npu_device *npu_dev)
	if (host_ctx->fw_ref_cnt > 0)
	if (host_ctx->fw_ref_cnt > 0)
		return ret;
		return ret;


	if (host_ctx->dev_shuttingdown) {
		NPU_ERR("device is shutting down, ignore disable request\n");
		return -EIO;
	}

	/* turn on auto ACK for warm shuts down */
	/* turn on auto ACK for warm shuts down */
	npu_cc_reg_write(npu_dev, NPU_CC_NPU_CPC_RSC_CTRL, 3);
	npu_cc_reg_write(npu_dev, NPU_CC_NPU_CPC_RSC_CTRL, 3);
	reinit_completion(&host_ctx->fw_shutdown_done);
	reinit_completion(&host_ctx->fw_shutdown_done);
@@ -712,6 +769,24 @@ static int npu_panic_handler(struct notifier_block *this,
	return NOTIFY_DONE;
	return NOTIFY_DONE;
}
}


static int npu_reboot_handler(struct notifier_block *this,
				unsigned long code, void *unused)
{
	struct npu_host_ctx *host_ctx =
		container_of(this, struct npu_host_ctx, reboot_nb);

	NPU_INFO("Device is rebooting with code %d\n", code);

	if ((code == NOTIFY_DONE) || (code == SYS_POWER_OFF)) {
		mutex_lock(&host_ctx->lock);
		host_ctx->dev_shuttingdown = true;
		complete_pending_commands(host_ctx);
		mutex_unlock(&host_ctx->lock);
	}

	return NOTIFY_DONE;
}

static void npu_update_pwr_work(struct work_struct *work)
static void npu_update_pwr_work(struct work_struct *work)
{
{
	int ret;
	int ret;
@@ -764,6 +839,13 @@ int npu_host_init(struct npu_device *npu_dev)
		goto fail;
		goto fail;
	}
	}


	host_ctx->reboot_nb.notifier_call = npu_reboot_handler;
	ret = register_reboot_notifier(&host_ctx->reboot_nb);
	if (ret) {
		NPU_ERR("register reboot notifier failed\n");
		goto fail;
	}

	host_ctx->panic_nb.notifier_call = npu_panic_handler;
	host_ctx->panic_nb.notifier_call = npu_panic_handler;
	ret = atomic_notifier_chain_register(&panic_notifier_list,
	ret = atomic_notifier_chain_register(&panic_notifier_list,
		&host_ctx->panic_nb);
		&host_ctx->panic_nb);
@@ -839,6 +921,7 @@ int npu_host_init(struct npu_device *npu_dev)
	if (host_ctx->notif_hdle)
	if (host_ctx->notif_hdle)
		subsys_notif_unregister_notifier(host_ctx->notif_hdle,
		subsys_notif_unregister_notifier(host_ctx->notif_hdle,
			&host_ctx->nb);
			&host_ctx->nb);
	unregister_reboot_notifier(&host_ctx->reboot_nb);
	mutex_destroy(&host_ctx->lock);
	mutex_destroy(&host_ctx->lock);
	return ret;
	return ret;
}
}
@@ -854,6 +937,7 @@ void npu_host_deinit(struct npu_device *npu_dev)
	destroy_workqueue(host_ctx->wq);
	destroy_workqueue(host_ctx->wq);
	destroy_workqueue(host_ctx->wq_pri);
	destroy_workqueue(host_ctx->wq_pri);
	subsys_notif_unregister_notifier(host_ctx->notif_hdle, &host_ctx->nb);
	subsys_notif_unregister_notifier(host_ctx->notif_hdle, &host_ctx->nb);
	unregister_reboot_notifier(&host_ctx->reboot_nb);
	mutex_destroy(&host_ctx->lock);
	mutex_destroy(&host_ctx->lock);
}
}


@@ -947,9 +1031,6 @@ static int host_error_hdlr(struct npu_device *npu_dev, bool force)
{
{
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
	struct npu_network *network = NULL;
	struct npu_network *network = NULL;
	struct npu_kevent kevt;
	struct npu_network_cmd *cmd;
	struct npu_misc_cmd *misc_cmd;
	bool fw_alive = true;
	bool fw_alive = true;
	int i, ret = 0;
	int i, ret = 0;


@@ -961,6 +1042,12 @@ static int host_error_hdlr(struct npu_device *npu_dev, bool force)
		return 0;
		return 0;
	}
	}


	if (host_ctx->dev_shuttingdown) {
		NPU_INFO("device is shutting down, igonre error handler\n");
		mutex_unlock(&host_ctx->lock);
		return -EIO;
	}

	if (host_ctx->wdg_irq_sts) {
	if (host_ctx->wdg_irq_sts) {
		NPU_INFO("watchdog irq triggered\n");
		NPU_INFO("watchdog irq triggered\n");
		fw_alive = false;
		fw_alive = false;
@@ -1070,41 +1157,8 @@ static int host_error_hdlr(struct npu_device *npu_dev, bool force)
	}
	}


	complete(&host_ctx->fw_deinit_done);
	complete(&host_ctx->fw_deinit_done);
	complete_pending_commands(host_ctx);


	/* flush all pending npu cmds */
	for (i = 0; i < MAX_LOADED_NETWORK; i++) {
		network = &host_ctx->networks[i];
		if (!network->is_valid || !network->fw_error)
			continue;

		if (network->is_async) {
			NPU_DBG("async cmd, queue ssr event\n");
			kevt.evt.type = MSM_NPU_EVENT_TYPE_SSR;
			kevt.evt.u.ssr.network_hdl =
				network->network_hdl;
			if (npu_queue_event(network->client, &kevt))
				NPU_ERR("queue npu event failed\n");

			while (!list_empty(&network->cmd_list)) {
				cmd = list_first_entry(&network->cmd_list,
					struct npu_network_cmd, list);
				npu_dequeue_network_cmd(network, cmd);
				npu_free_network_cmd(host_ctx, cmd);
			}
		} else {
			list_for_each_entry(cmd, &network->cmd_list, list) {
				NPU_DBG("complete network %llx trans_id %d\n",
					network->id, cmd->trans_id);
				complete(&cmd->cmd_done);
			}
		}
	}

	list_for_each_entry(misc_cmd, &host_ctx->misc_cmd_list, list) {
		NPU_DBG("complete misc cmd trans_id %d\n",
			misc_cmd->trans_id);
		complete(&misc_cmd->cmd_done);
	}
	mutex_unlock(&host_ctx->lock);
	mutex_unlock(&host_ctx->lock);


	return ret;
	return ret;
@@ -2086,6 +2140,7 @@ static int npu_send_network_cmd(struct npu_device *npu_dev,
	WARN_ON(!mutex_is_locked(&host_ctx->lock));
	WARN_ON(!mutex_is_locked(&host_ctx->lock));


	if (network->fw_error || host_ctx->fw_error ||
	if (network->fw_error || host_ctx->fw_error ||
		host_ctx->dev_shuttingdown ||
		(host_ctx->fw_state != FW_ENABLED)) {
		(host_ctx->fw_state != FW_ENABLED)) {
		NPU_ERR("fw is in error state or disabled\n");
		NPU_ERR("fw is in error state or disabled\n");
		ret = -EIO;
		ret = -EIO;
@@ -2111,7 +2166,8 @@ static int npu_send_misc_cmd(struct npu_device *npu_dev, uint32_t q_idx,


	WARN_ON(!mutex_is_locked(&host_ctx->lock));
	WARN_ON(!mutex_is_locked(&host_ctx->lock));


	if (host_ctx->fw_error || (host_ctx->fw_state != FW_ENABLED)) {
	if (host_ctx->fw_error || host_ctx->dev_shuttingdown ||
		(host_ctx->fw_state != FW_ENABLED)) {
		NPU_ERR("fw is in error state or disabled\n");
		NPU_ERR("fw is in error state or disabled\n");
		ret = -EIO;
		ret = -EIO;
	} else {
	} else {
@@ -2548,6 +2604,12 @@ int32_t npu_host_load_network_v2(struct npu_client *client,
		goto free_load_cmd;
		goto free_load_cmd;
	}
	}


	if (host_ctx->dev_shuttingdown) {
		ret = -EIO;
		NPU_ERR("device is shutting down\n");
		goto free_load_cmd;
	}

	if (!ret) {
	if (!ret) {
		NPU_ERR("npu: NPU_IPC_CMD_LOAD time out %lld:%d\n",
		NPU_ERR("npu: NPU_IPC_CMD_LOAD time out %lld:%d\n",
			network->id, load_cmd->trans_id);
			network->id, load_cmd->trans_id);
@@ -2633,6 +2695,11 @@ int32_t npu_host_unload_network(struct npu_client *client,
		goto free_network;
		goto free_network;
	}
	}


	if (host_ctx->dev_shuttingdown) {
		NPU_ERR("device is shutting down, skip unload network in fw\n");
		goto free_network;
	}

	NPU_DBG("Unload network %lld\n", network->id);
	NPU_DBG("Unload network %lld\n", network->id);
	/* prepare IPC packet for UNLOAD */
	/* prepare IPC packet for UNLOAD */
	unload_packet.header.cmd_type = NPU_IPC_CMD_UNLOAD;
	unload_packet.header.cmd_type = NPU_IPC_CMD_UNLOAD;
@@ -2686,7 +2753,7 @@ int32_t npu_host_unload_network(struct npu_client *client,


	mutex_lock(&host_ctx->lock);
	mutex_lock(&host_ctx->lock);


	if (network->fw_error) {
	if (network->fw_error || host_ctx->dev_shuttingdown) {
		ret = -EIO;
		ret = -EIO;
		NPU_ERR("fw is in error state during unload network\n");
		NPU_ERR("fw is in error state during unload network\n");
		goto free_network;
		goto free_network;
@@ -2779,6 +2846,12 @@ int32_t npu_host_exec_network_v2(struct npu_client *client,
		goto exec_v2_done;
		goto exec_v2_done;
	}
	}


	if (host_ctx->dev_shuttingdown) {
		NPU_ERR("device is shutting down\n");
		ret = -EIO;
		goto exec_v2_done;
	}

	if (network->is_async && !async_ioctl) {
	if (network->is_async && !async_ioctl) {
		NPU_ERR("network is in async mode\n");
		NPU_ERR("network is in async mode\n");
		ret = -EINVAL;
		ret = -EINVAL;
@@ -2869,6 +2942,12 @@ int32_t npu_host_exec_network_v2(struct npu_client *client,
		goto free_exec_cmd;
		goto free_exec_cmd;
	}
	}


	if (host_ctx->dev_shuttingdown) {
		ret = -EIO;
		NPU_ERR("device is shutting down during execute_v2 network\n");
		goto free_exec_cmd;
	}

	if (!ret) {
	if (!ret) {
		NPU_ERR("npu: %llx:%d NPU_IPC_CMD_EXECUTE_V2 time out\n",
		NPU_ERR("npu: %llx:%d NPU_IPC_CMD_EXECUTE_V2 time out\n",
			network->id, exec_cmd->trans_id);
			network->id, exec_cmd->trans_id);
+3 −1
Original line number Original line Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
/*
 * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved.
 * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
 */
 */


#ifndef _NPU_MGR_H
#ifndef _NPU_MGR_H
@@ -131,10 +131,12 @@ struct npu_host_ctx {
	uint32_t err_irq_sts;
	uint32_t err_irq_sts;
	uint32_t wdg_irq_sts;
	uint32_t wdg_irq_sts;
	bool fw_error;
	bool fw_error;
	bool dev_shuttingdown;
	bool cancel_work;
	bool cancel_work;
	bool app_crashed;
	bool app_crashed;
	struct notifier_block nb;
	struct notifier_block nb;
	struct notifier_block panic_nb;
	struct notifier_block panic_nb;
	struct notifier_block reboot_nb;
	void *notif_hdle;
	void *notif_hdle;
	spinlock_t bridge_mbox_lock;
	spinlock_t bridge_mbox_lock;
	bool bridge_mbox_pwr_on;
	bool bridge_mbox_pwr_on;