Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bd2d4557 authored by Jilai Wang's avatar Jilai Wang
Browse files

msm: npu: Add asynchronous ioctl support



In order to improve the NPU performance, ioctl commands like
execute_network need to be executed in asynchronous mode to
allow user applications to run multiple operations parallelly.

Change-Id: I8cf4f20569539667a0eda6a0b8e3ec610ddde210
Signed-off-by: default avatarJilai Wang <jilaiw@codeaurora.org>
parent de9f0c5e
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -208,6 +208,19 @@ struct npu_device {
	uint32_t execute_v2_flag;
};

struct npu_kevent {
	struct list_head list;
	struct msm_npu_event evt;
	uint64_t reserved[4];
};

struct npu_client {
	struct npu_device *npu_dev;
	wait_queue_head_t wait;

	struct mutex list_lock;
	struct list_head evt_list;
};

/* -------------------------------------------------------------------------
 * Function Prototypes
+150 −31
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/of_platform.h>
#include <linux/poll.h>
#include <linux/regulator/consumer.h>
#include <linux/thermal.h>
#include <linux/soc/qcom/llcc-qcom.h>
@@ -75,16 +76,25 @@ static int npu_set_cur_state(struct thermal_cooling_device *cdev,
				unsigned long state);
static int npu_open(struct inode *inode, struct file *file);
static int npu_close(struct inode *inode, struct file *file);
static int npu_get_info(struct npu_device *npu_dev, unsigned long arg);
static int npu_map_buf(struct npu_device *npu_dev, unsigned long arg);
static int npu_unmap_buf(struct npu_device *npu_dev, unsigned long arg);
static int npu_load_network(struct npu_device *npu_dev, unsigned long arg);
static int npu_load_network_v2(struct npu_device *npu_dev, unsigned long arg);
static int npu_unload_network(struct npu_device *npu_dev, unsigned long arg);
static int npu_exec_network(struct npu_device *npu_dev, unsigned long arg);
static int npu_exec_network_v2(struct npu_device *npu_dev, unsigned long arg);
static int npu_get_info(struct npu_client *client, unsigned long arg);
static int npu_map_buf(struct npu_client *client, unsigned long arg);
static int npu_unmap_buf(struct npu_client *client,
	unsigned long arg);
static int npu_load_network(struct npu_client *client,
	unsigned long arg);
static int npu_load_network_v2(struct npu_client *client,
	unsigned long arg);
static int npu_unload_network(struct npu_client *client,
	unsigned long arg);
static int npu_exec_network(struct npu_client *client,
	unsigned long arg);
static int npu_exec_network_v2(struct npu_client *client,
	unsigned long arg);
static int npu_receive_event(struct npu_client *client,
	unsigned long arg);
static long npu_ioctl(struct file *file, unsigned int cmd,
					unsigned long arg);
static unsigned int npu_poll(struct file *filp, struct poll_table_struct *p);
static int npu_parse_dt_clock(struct npu_device *npu_dev);
static int npu_parse_dt_regulator(struct npu_device *npu_dev);
static int npu_of_parse_pwrlevels(struct npu_device *npu_dev,
@@ -207,6 +217,7 @@ static const struct file_operations npu_fops = {
#ifdef CONFIG_COMPAT
	 .compat_ioctl = npu_ioctl,
#endif
	.poll = npu_poll,
};

static const struct thermal_cooling_device_ops npu_cooling_ops = {
@@ -793,14 +804,35 @@ static int npu_open(struct inode *inode, struct file *file)
{
	struct npu_device *npu_dev = container_of(inode->i_cdev,
		struct npu_device, cdev);
	struct npu_client *client;

	file->private_data = npu_dev;
	client = kmalloc(sizeof(*client), GFP_KERNEL);
	if (!client)
		return -ENOMEM;

	client->npu_dev = npu_dev;
	init_waitqueue_head(&client->wait);
	mutex_init(&client->list_lock);
	INIT_LIST_HEAD(&client->evt_list);
	file->private_data = client;

	return 0;
}

static int npu_close(struct inode *inode, struct file *file)
{
	struct npu_client *client = file->private_data;
	struct npu_kevent *kevent;

	while (!list_empty(&client->evt_list)) {
		kevent = list_first_entry(&client->evt_list,
			struct npu_kevent, list);
		list_del(&kevent->list);
		kfree(kevent);
	}

	mutex_destroy(&client->list_lock);
	kfree(client);
	return 0;
}

@@ -808,8 +840,9 @@ static int npu_close(struct inode *inode, struct file *file)
 * IOCTL Implementations
 * -------------------------------------------------------------------------
 */
static int npu_get_info(struct npu_device *npu_dev, unsigned long arg)
static int npu_get_info(struct npu_client *client, unsigned long arg)
{
	struct npu_device *npu_dev = client->npu_dev;
	struct msm_npu_get_info_ioctl req;
	void __user *argp = (void __user *)arg;
	int ret = 0;
@@ -837,8 +870,9 @@ static int npu_get_info(struct npu_device *npu_dev, unsigned long arg)
	return 0;
}

static int npu_map_buf(struct npu_device *npu_dev, unsigned long arg)
static int npu_map_buf(struct npu_client *client, unsigned long arg)
{
	struct npu_device *npu_dev = client->npu_dev;
	struct msm_npu_map_buf_ioctl req;
	void __user *argp = (void __user *)arg;
	int ret = 0;
@@ -866,8 +900,9 @@ static int npu_map_buf(struct npu_device *npu_dev, unsigned long arg)
	return 0;
}

static int npu_unmap_buf(struct npu_device *npu_dev, unsigned long arg)
static int npu_unmap_buf(struct npu_client *client, unsigned long arg)
{
	struct npu_device *npu_dev = client->npu_dev;
	struct msm_npu_unmap_buf_ioctl req;
	void __user *argp = (void __user *)arg;
	int ret = 0;
@@ -895,7 +930,8 @@ static int npu_unmap_buf(struct npu_device *npu_dev, unsigned long arg)
	return 0;
}

static int npu_load_network(struct npu_device *npu_dev, unsigned long arg)
static int npu_load_network(struct npu_client *client,
	unsigned long arg)
{
	struct msm_npu_load_network_ioctl req;
	void __user *argp = (void __user *)arg;
@@ -910,7 +946,7 @@ static int npu_load_network(struct npu_device *npu_dev, unsigned long arg)

	pr_debug("network load with perf request %d\n", req.perf_mode);

	ret = npu_host_load_network(npu_dev, &req);
	ret = npu_host_load_network(client, &req);
	if (ret) {
		pr_err("network load failed: %d\n", ret);
		return -EFAULT;
@@ -924,7 +960,8 @@ static int npu_load_network(struct npu_device *npu_dev, unsigned long arg)
	return 0;
}

static int npu_load_network_v2(struct npu_device *npu_dev, unsigned long arg)
static int npu_load_network_v2(struct npu_client *client,
	unsigned long arg)
{
	struct msm_npu_load_network_ioctl_v2 req;
	void __user *argp = (void __user *)arg;
@@ -956,7 +993,7 @@ static int npu_load_network_v2(struct npu_device *npu_dev, unsigned long arg)

	pr_debug("network load with perf request %d\n", req.perf_mode);

	ret = npu_host_load_network_v2(npu_dev, &req, patch_info);
	ret = npu_host_load_network_v2(client, &req, patch_info);
	if (ret) {
		pr_err("network load failed: %d\n", ret);
	} else {
@@ -969,7 +1006,8 @@ static int npu_load_network_v2(struct npu_device *npu_dev, unsigned long arg)
	return ret;
}

static int npu_unload_network(struct npu_device *npu_dev, unsigned long arg)
static int npu_unload_network(struct npu_client *client,
	unsigned long arg)
{
	struct msm_npu_unload_network_ioctl req;
	void __user *argp = (void __user *)arg;
@@ -982,7 +1020,7 @@ static int npu_unload_network(struct npu_device *npu_dev, unsigned long arg)
		return -EFAULT;
	}

	ret = npu_host_unload_network(npu_dev, &req);
	ret = npu_host_unload_network(client, &req);

	if (ret) {
		pr_err("npu_host_unload_network failed\n");
@@ -998,7 +1036,8 @@ static int npu_unload_network(struct npu_device *npu_dev, unsigned long arg)
	return 0;
}

static int npu_exec_network(struct npu_device *npu_dev, unsigned long arg)
static int npu_exec_network(struct npu_client *client,
	unsigned long arg)
{
	struct msm_npu_exec_network_ioctl req;
	void __user *argp = (void __user *)arg;
@@ -1011,7 +1050,15 @@ static int npu_exec_network(struct npu_device *npu_dev, unsigned long arg)
		return -EFAULT;
	}

	ret = npu_host_exec_network(npu_dev, &req);
	if ((req.input_layer_num > MSM_NPU_MAX_INPUT_LAYER_NUM) ||
		(req.output_layer_num > MSM_NPU_MAX_OUTPUT_LAYER_NUM)) {
		pr_err("Invalid input/out layer num %d[max:%d] %d[max:%d]\n",
			req.input_layer_num, MSM_NPU_MAX_INPUT_LAYER_NUM,
			req.output_layer_num, MSM_NPU_MAX_OUTPUT_LAYER_NUM);
		return -EINVAL;
	}

	ret = npu_host_exec_network(client, &req);

	if (ret) {
		pr_err("npu_host_exec_network failed\n");
@@ -1027,7 +1074,8 @@ static int npu_exec_network(struct npu_device *npu_dev, unsigned long arg)
	return 0;
}

static int npu_exec_network_v2(struct npu_device *npu_dev, unsigned long arg)
static int npu_exec_network_v2(struct npu_client *client,
	unsigned long arg)
{
	struct msm_npu_exec_network_ioctl_v2 req;
	void __user *argp = (void __user *)arg;
@@ -1063,7 +1111,7 @@ static int npu_exec_network_v2(struct npu_device *npu_dev, unsigned long arg)
			req.patch_buf_info_num * sizeof(*patch_buf_info));
	}

	ret = npu_host_exec_network_v2(npu_dev, &req, patch_buf_info);
	ret = npu_host_exec_network_v2(client, &req, patch_buf_info);
	if (ret) {
		pr_err("npu_host_exec_network failed\n");
	} else {
@@ -1076,36 +1124,90 @@ static int npu_exec_network_v2(struct npu_device *npu_dev, unsigned long arg)
	return ret;
}

static int npu_process_kevent(struct npu_kevent *kevt)
{
	int ret = 0;

	switch (kevt->evt.type) {
	case MSM_NPU_EVENT_TYPE_EXEC_V2_DONE:
		ret = copy_to_user((void __user *)kevt->reserved[1],
			(void *)&kevt->reserved[0],
			kevt->evt.u.exec_v2_done.stats_buf_size);
		if (ret) {
			pr_err("fail to copy to user\n");
			kevt->evt.u.exec_v2_done.stats_buf_size = 0;
			ret = -EFAULT;
		}
		break;
	default:
		break;
	}

	return ret;
}

static int npu_receive_event(struct npu_client *client,
	unsigned long arg)
{
	void __user *argp = (void __user *)arg;
	struct npu_kevent *kevt;
	int ret = 0;

	mutex_lock(&client->list_lock);
	if (list_empty(&client->evt_list)) {
		pr_err("event list is empty\n");
		ret = -EINVAL;
	} else {
		kevt = list_first_entry(&client->evt_list,
			struct npu_kevent, list);
		list_del(&kevt->list);
		npu_process_kevent(kevt);
		ret = copy_to_user(argp, &kevt->evt,
			sizeof(struct msm_npu_event));
		if (ret) {
			pr_err("fail to copy to user\n");
			ret = -EFAULT;
		}
		kfree(kevt);
	}
	mutex_unlock(&client->list_lock);

	return ret;
}

static long npu_ioctl(struct file *file, unsigned int cmd,
						 unsigned long arg)
{
	int ret = -ENOIOCTLCMD;
	struct npu_device *npu_dev = file->private_data;
	struct npu_client *client = file->private_data;

	switch (cmd) {
	case MSM_NPU_GET_INFO:
		ret = npu_get_info(npu_dev, arg);
		ret = npu_get_info(client, arg);
		break;
	case MSM_NPU_MAP_BUF:
		ret = npu_map_buf(npu_dev, arg);
		ret = npu_map_buf(client, arg);
		break;
	case MSM_NPU_UNMAP_BUF:
		ret = npu_unmap_buf(npu_dev, arg);
		ret = npu_unmap_buf(client, arg);
		break;
	case MSM_NPU_LOAD_NETWORK:
		ret = npu_load_network(npu_dev, arg);
		ret = npu_load_network(client, arg);
		break;
	case MSM_NPU_LOAD_NETWORK_V2:
		ret = npu_load_network_v2(npu_dev, arg);
		ret = npu_load_network_v2(client, arg);
		break;
	case MSM_NPU_UNLOAD_NETWORK:
		ret = npu_unload_network(npu_dev, arg);
		ret = npu_unload_network(client, arg);
		break;
	case MSM_NPU_EXEC_NETWORK:
		ret = npu_exec_network(npu_dev, arg);
		ret = npu_exec_network(client, arg);
		break;
	case MSM_NPU_EXEC_NETWORK_V2:
		ret = npu_exec_network_v2(npu_dev, arg);
		ret = npu_exec_network_v2(client, arg);
		break;
	case MSM_NPU_RECEIVE_EVENT:
		ret = npu_receive_event(client, arg);
		break;
	default:
		pr_err("unexpected IOCTL %x\n", cmd);
@@ -1114,6 +1216,23 @@ static long npu_ioctl(struct file *file, unsigned int cmd,
	return ret;
}

static unsigned int npu_poll(struct file *filp, struct poll_table_struct *p)
{
	struct npu_client *client = filp->private_data;
	int rc = 0;

	poll_wait(filp, &client->wait, p);

	mutex_lock(&client->list_lock);
	if (!list_empty(&client->evt_list)) {
		pr_debug("poll cmd done\n");
		rc = POLLIN | POLLRDNORM;
	}
	mutex_unlock(&client->list_lock);

	return rc;
}

/* -------------------------------------------------------------------------
 * Device Tree Parsing
 * -------------------------------------------------------------------------
+116 −50
Original line number Diff line number Diff line
@@ -42,7 +42,8 @@
static void host_irq_wq(struct work_struct *work);
static void turn_off_fw_logging(struct npu_device *npu_dev);
static int wait_for_fw_ready(struct npu_device *npu_dev, uint32_t status_bits);
static struct npu_network *alloc_network(struct npu_host_ctx *ctx);
static struct npu_network *alloc_network(struct npu_host_ctx *ctx,
	struct npu_client *client);
static struct npu_network *get_network_by_hdl(struct npu_host_ctx *ctx,
	uint32_t hdl);
static struct npu_network *get_network_by_id(struct npu_host_ctx *ctx,
@@ -54,9 +55,10 @@ static void host_session_msg_hdlr(struct npu_device *npu_dev);
static void host_session_log_hdlr(struct npu_device *npu_dev);
static int host_error_hdlr(struct npu_device *npu_dev);
static int npu_send_network_cmd(struct npu_device *npu_dev,
	struct npu_network *network, void *cmd_ptr);
	struct npu_network *network, void *cmd_ptr, bool async);
static int npu_send_misc_cmd(struct npu_device *npu_dev, uint32_t q_idx,
	void *cmd_ptr);
static int npu_queue_event(struct npu_client *client, struct npu_kevent *evt);

/* -------------------------------------------------------------------------
 * Function Definitions - Init / Deinit
@@ -298,6 +300,7 @@ static int host_error_hdlr(struct npu_device *npu_dev)
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
	struct npu_network *network = NULL;
	bool fw_alive = true;
	struct npu_kevent kevt;
	int i;

	if ((host_ctx->wdg_irq_sts == 0) && (host_ctx->err_irq_sts == 0))
@@ -317,10 +320,19 @@ static int host_error_hdlr(struct npu_device *npu_dev)
		network = &host_ctx->networks[i];
		if (network->is_valid && network->cmd_pending &&
			network->fw_error) {
			if (network->cmd_async) {
				pr_debug("async cmd, queue ssr event\n");
				kevt.evt.type = MSM_NPU_EVENT_TYPE_SSR;
				kevt.evt.u.ssr.network_hdl =
					network->network_hdl;
				if (npu_queue_event(network->client, &kevt))
					pr_err("queue npu event failed\n");
			} else {
				pr_debug("complete network %x\n", network->id);
				complete(&network->cmd_done);
			}
		}
	}
	complete_all(&host_ctx->loopback_done);

	return 1;
@@ -391,7 +403,8 @@ static int wait_for_fw_ready(struct npu_device *npu_dev, uint32_t status_bits)
 * Function Definitions - Network Management
 * -------------------------------------------------------------------------
 */
static struct npu_network *alloc_network(struct npu_host_ctx *ctx)
static struct npu_network *alloc_network(struct npu_host_ctx *ctx,
	struct npu_client *client)
{
	int32_t i;
	struct npu_network *network = ctx->networks;
@@ -416,6 +429,13 @@ static struct npu_network *alloc_network(struct npu_host_ctx *ctx)
		network->is_valid = true;
		network->fw_error = false;
		network->cmd_pending = false;
		network->client = client;
		network->stats_buf = kzalloc(MSM_NPU_MAX_STATS_BUF_SIZE,
			GFP_KERNEL);
		if (!network->stats_buf) {
			free_network(ctx, network->id);
			network = NULL;
		}
	}

	return network;
@@ -458,6 +478,7 @@ static void free_network(struct npu_host_ctx *ctx, int64_t id)
	struct npu_network *network = get_network_by_id(ctx, id);

	if (network) {
		kfree(network->stats_buf);
		mutex_lock(&ctx->lock);
		memset(network, 0, sizeof(struct npu_network));
		ctx->network_num--;
@@ -469,10 +490,28 @@ static void free_network(struct npu_host_ctx *ctx, int64_t id)
 * Function Definitions - IPC
 * -------------------------------------------------------------------------
 */
static int npu_queue_event(struct npu_client *client, struct npu_kevent *evt)
{
	struct npu_kevent *kevt = kmalloc(sizeof(*kevt), GFP_KERNEL);

	if (!kevt)
		return -ENOMEM;

	*kevt = *evt;
	INIT_LIST_HEAD(&kevt->list);
	mutex_lock(&client->list_lock);
	list_add_tail(&kevt->list, &client->evt_list);
	mutex_unlock(&client->list_lock);
	wake_up_interruptible(&client->wait);

	return 0;
}

static void app_msg_proc(struct npu_host_ctx *host_ctx, uint32_t *msg)
{
	uint32_t msg_id;
	struct npu_network *network = NULL;
	struct npu_kevent kevt;

	msg_id = msg[1];
	switch (msg_id) {
@@ -501,7 +540,20 @@ static void app_msg_proc(struct npu_host_ctx *host_ctx, uint32_t *msg)
			break;
		}
		network->cmd_pending = false;

		if (!network->cmd_async) {
			complete(&network->cmd_done);
		} else {
			pr_debug("async cmd, queue event\n");
			kevt.evt.type = MSM_NPU_EVENT_TYPE_EXEC_DONE;
			kevt.evt.u.exec_done.network_hdl =
				exe_rsp_pkt->network_hdl;
			kevt.evt.u.exec_done.exec_result =
				exe_rsp_pkt->header.status;
			if (npu_queue_event(network->client, &kevt))
				pr_err("queue npu event failed\n");
		}

		break;
	}
	case NPU_IPC_MSG_EXECUTE_V2_DONE:
@@ -527,13 +579,27 @@ static void app_msg_proc(struct npu_host_ctx *host_ctx, uint32_t *msg)
			stats_size);
		stats_size = stats_size < network->stats_buf_size ?
			stats_size : network->stats_buf_size;
		network->stats_buf_size = stats_size;
		if (stats_size)
			memcpy(network->stats_buf, exe_rsp_pkt->stats_data,
				stats_size);

		network->stats_buf_size = stats_size;
		network->cmd_pending = false;
		if (network->cmd_async) {
			pr_debug("async cmd, queue event\n");
			kevt.evt.type = MSM_NPU_EVENT_TYPE_EXEC_V2_DONE;
			kevt.evt.u.exec_v2_done.network_hdl =
				exe_rsp_pkt->network_hdl;
			kevt.evt.u.exec_v2_done.exec_result =
				exe_rsp_pkt->header.status;
			kevt.evt.u.exec_v2_done.stats_buf_size = stats_size;
			kevt.reserved[0] = (uint64_t)network->stats_buf;
			kevt.reserved[1] = (uint64_t)network->stats_buf_u;
			if (npu_queue_event(network->client, &kevt))
				pr_err("queue npu event failed\n");
		} else {
			complete(&network->cmd_done);
		}
		break;
	}
	case NPU_IPC_MSG_LOAD_DONE:
@@ -679,7 +745,7 @@ int32_t npu_host_unmap_buf(struct npu_device *npu_dev,
}

static int npu_send_network_cmd(struct npu_device *npu_dev,
	struct npu_network *network, void *cmd_ptr)
	struct npu_network *network, void *cmd_ptr, bool async)
{
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
	int ret = 0;
@@ -689,9 +755,13 @@ static int npu_send_network_cmd(struct npu_device *npu_dev,
		(host_ctx->fw_state == FW_DISABLED)) {
		pr_err("fw is in error state or disabled, can't send network cmd\n");
		ret = -EIO;
	} else if (network->cmd_pending) {
		pr_err("Another cmd is pending\n");
		ret = -EBUSY;
	} else {
		pr_debug("Send cmd %d\n",
			((struct ipc_cmd_header_pkt *)cmd_ptr)->cmd_type);
		network->cmd_async = async;
		ret = npu_host_ipc_send_cmd(npu_dev,
			IPC_QUEUE_APPS_EXEC, cmd_ptr);
		if (!ret)
@@ -783,10 +853,11 @@ static uint32_t find_networks_perf_mode(struct npu_host_ctx *host_ctx)
	return max_perf_mode;
}

int32_t npu_host_load_network(struct npu_device *npu_dev,
int32_t npu_host_load_network(struct npu_client *client,
			struct msm_npu_load_network_ioctl *load_ioctl)
{
	int ret = 0;
	struct npu_device *npu_dev = client->npu_dev;
	struct npu_network *network;
	struct ipc_cmd_load_pkt load_packet;
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
@@ -796,7 +867,7 @@ int32_t npu_host_load_network(struct npu_device *npu_dev,
	if (ret)
		return ret;

	network = alloc_network(host_ctx);
	network = alloc_network(host_ctx, client);
	if (!network) {
		ret = -ENOMEM;
		goto err_deinit_fw;
@@ -830,7 +901,7 @@ int32_t npu_host_load_network(struct npu_device *npu_dev,

	/* NPU_IPC_CMD_LOAD will go onto IPC_QUEUE_APPS_EXEC */
	reinit_completion(&network->cmd_done);
	ret = npu_send_network_cmd(npu_dev, network, &load_packet);
	ret = npu_send_network_cmd(npu_dev, network, &load_packet, false);
	if (ret) {
		pr_err("NPU_IPC_CMD_LOAD sent failed: %d\n", ret);
		goto error_free_network;
@@ -860,11 +931,12 @@ int32_t npu_host_load_network(struct npu_device *npu_dev,
	return ret;
}

int32_t npu_host_load_network_v2(struct npu_device *npu_dev,
int32_t npu_host_load_network_v2(struct npu_client *client,
			struct msm_npu_load_network_ioctl_v2 *load_ioctl,
			struct msm_npu_patch_info_v2 *patch_info)
{
	int ret = 0, i;
	struct npu_device *npu_dev = client->npu_dev;
	struct npu_network *network;
	struct ipc_cmd_load_pkt_v2 *load_packet = NULL;
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
@@ -875,7 +947,7 @@ int32_t npu_host_load_network_v2(struct npu_device *npu_dev,
	if (ret)
		return ret;

	network = alloc_network(host_ctx);
	network = alloc_network(host_ctx, client);
	if (!network) {
		ret = -ENOMEM;
		goto err_deinit_fw;
@@ -926,7 +998,7 @@ int32_t npu_host_load_network_v2(struct npu_device *npu_dev,

	/* NPU_IPC_CMD_LOAD_V2 will go onto IPC_QUEUE_APPS_EXEC */
	reinit_completion(&network->cmd_done);
	ret = npu_send_network_cmd(npu_dev, network, load_packet);
	ret = npu_send_network_cmd(npu_dev, network, load_packet, false);
	if (ret) {
		pr_debug("NPU_IPC_CMD_LOAD_V2 sent failed: %d\n", ret);
		goto error_free_network;
@@ -957,10 +1029,11 @@ int32_t npu_host_load_network_v2(struct npu_device *npu_dev,
	return ret;
}

int32_t npu_host_unload_network(struct npu_device *npu_dev,
int32_t npu_host_unload_network(struct npu_client *client,
			struct msm_npu_unload_network_ioctl *unload)
{
	int ret = 0;
	struct npu_device *npu_dev = client->npu_dev;
	struct ipc_cmd_unload_pkt unload_packet;
	struct npu_network *network;
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
@@ -985,7 +1058,7 @@ int32_t npu_host_unload_network(struct npu_device *npu_dev,

	/* NPU_IPC_CMD_UNLOAD will go onto IPC_QUEUE_APPS_EXEC */
	reinit_completion(&network->cmd_done);
	ret = npu_send_network_cmd(npu_dev, network, &unload_packet);
	ret = npu_send_network_cmd(npu_dev, network, &unload_packet, false);

	if (ret) {
		pr_err("NPU_IPC_CMD_UNLOAD sent failed: %d\n", ret);
@@ -1010,17 +1083,17 @@ int32_t npu_host_unload_network(struct npu_device *npu_dev,
	return ret;
}

int32_t npu_host_exec_network(struct npu_device *npu_dev,
int32_t npu_host_exec_network(struct npu_client *client,
			struct msm_npu_exec_network_ioctl *exec_ioctl)
{
	struct npu_device *npu_dev = client->npu_dev;
	struct ipc_cmd_execute_pkt exec_packet;
	/* npu mapped addr */
	uint64_t input_addr = 0, output_addr = 0;
	uint64_t input_off, output_off;
	int32_t ret;
	struct npu_network *network;
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
	int i = 0;
	bool async_ioctl = !!exec_ioctl->async;

	network = get_network_by_hdl(host_ctx, exec_ioctl->network_hdl);

@@ -1032,14 +1105,15 @@ int32_t npu_host_exec_network(struct npu_device *npu_dev,

	memset(&exec_packet, 0, sizeof(exec_packet));
	if (exec_ioctl->patching_required) {
		if (exec_ioctl->input_layer_num == 1)
			input_addr = exec_ioctl->input_layers[0].buf_phys_addr;
		if (exec_ioctl->output_layer_num == 1)
			output_addr =
				exec_ioctl->output_layers[0].buf_phys_addr;
		if ((exec_ioctl->input_layer_num != 1) ||
			(exec_ioctl->output_layer_num != 1)) {
			pr_err("Invalid input/output layer num\n");
			return -EINVAL;
		}

		input_off = exec_ioctl->input_layers[0].buf_phys_addr;
		output_off = exec_ioctl->output_layers[0].buf_phys_addr;
		exec_packet.patch_params.num_params = 2;
		input_off = (uint64_t)input_addr;
		output_off = (uint64_t)output_addr;
		host_copy_patch_data(&exec_packet.patch_params.param[0],
			(uint32_t)input_off, &exec_ioctl->input_layers[0]);
		host_copy_patch_data(&exec_packet.patch_params.param[1],
@@ -1057,10 +1131,12 @@ int32_t npu_host_exec_network(struct npu_device *npu_dev,

	/* Send it on the high priority queue */
	reinit_completion(&network->cmd_done);
	ret = npu_send_network_cmd(npu_dev, network, &exec_packet);
	ret = npu_send_network_cmd(npu_dev, network, &exec_packet, async_ioctl);

	if (ret) {
		pr_err("NPU_IPC_CMD_EXECUTE sent failed: %d\n", ret);
	} else if (async_ioctl) {
		pr_debug("Async ioctl, return now\n");
	} else if (!wait_for_completion_interruptible_timeout(
		&network->cmd_done,
		(host_ctx->fw_dbg_mode & FW_DBG_MODE_INC_TIMEOUT) ?
@@ -1072,28 +1148,22 @@ int32_t npu_host_exec_network(struct npu_device *npu_dev,
	} else if (network->fw_error) {
		ret = -EIO;
		pr_err("execute cmd returns with error\n");
	} else {
		/* Invalidate output buffers */
		for (i = 0; i < exec_ioctl->output_layer_num; i++) {
			if (exec_ioctl->output_layer_num == 1) {
				npu_mem_invalidate(npu_dev,
					exec_ioctl->output_layers[i].buf_hdl);
			}
		}
	}

	return ret;
}

int32_t npu_host_exec_network_v2(struct npu_device *npu_dev,
int32_t npu_host_exec_network_v2(struct npu_client *client,
	struct msm_npu_exec_network_ioctl_v2 *exec_ioctl,
	struct msm_npu_patch_buf_info *patch_buf_info)
{
	struct npu_device *npu_dev = client->npu_dev;
	struct ipc_cmd_execute_pkt_v2 *exec_packet;
	int32_t ret;
	struct npu_network *network;
	struct npu_host_ctx *host_ctx = &npu_dev->host_ctx;
	uint32_t num_patch_params, pkt_size;
	bool async_ioctl = !!exec_ioctl->async;
	int i;

	network = get_network_by_hdl(host_ctx, exec_ioctl->network_hdl);
@@ -1131,11 +1201,7 @@ int32_t npu_host_exec_network_v2(struct npu_device *npu_dev,
	exec_packet->network_hdl = network->network_hdl;
	exec_packet->num_patch_params = num_patch_params;

	/* allocate stats_buf to be filled after execution */
	network->stats_buf = kzalloc(exec_ioctl->stats_buf_size, GFP_KERNEL);
	if (!network->stats_buf)
		network->stats_buf_size = 0;
	else
	network->stats_buf_u = (void __user *)exec_ioctl->stats_buf_addr;
	network->stats_buf_size = exec_ioctl->stats_buf_size;

	pr_debug("Execute_v2 flags %x stats_buf_size %d\n",
@@ -1143,10 +1209,12 @@ int32_t npu_host_exec_network_v2(struct npu_device *npu_dev,

	/* Send it on the high priority queue */
	reinit_completion(&network->cmd_done);
	ret = npu_send_network_cmd(npu_dev, network, exec_packet);
	ret = npu_send_network_cmd(npu_dev, network, exec_packet, async_ioctl);

	if (ret) {
		pr_err("NPU_IPC_CMD_EXECUTE_V2 sent failed: %d\n", ret);
	} else if (async_ioctl) {
		pr_debug("Async ioctl, return now\n");
	} else if (!wait_for_completion_interruptible_timeout(
		&network->cmd_done,
		(host_ctx->fw_dbg_mode & FW_DBG_MODE_INC_TIMEOUT) ?
@@ -1160,15 +1228,13 @@ int32_t npu_host_exec_network_v2(struct npu_device *npu_dev,
		pr_err("execute cmd returns with error\n");
	} else {
		exec_ioctl->stats_buf_size = network->stats_buf_size;
		if (exec_ioctl->stats_buf_size) {
			if (copy_to_user(
				(void __user *)exec_ioctl->stats_buf_addr,
				network->stats_buf, exec_ioctl->stats_buf_size))
		if (copy_to_user((void __user *)exec_ioctl->stats_buf_addr,
			network->stats_buf, exec_ioctl->stats_buf_size)) {
			pr_err("copy stats to user failed\n");
			exec_ioctl->stats_buf_size = 0;
		}
	}

	kfree(network->stats_buf);
	kfree(exec_packet);
	return ret;
}
+9 −5
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
 */
#include <linux/spinlock.h>
#include "npu_hw_access.h"
#include "npu_common.h"

/* -------------------------------------------------------------------------
 * Defines
@@ -50,11 +51,14 @@ struct npu_network {
	uint32_t perf_mode;
	uint32_t num_layers;
	void *stats_buf;
	void __user *stats_buf_u;
	uint32_t stats_buf_size;
	bool is_valid;
	bool fw_error;
	bool cmd_pending;
	bool cmd_async;
	struct completion cmd_done;
	struct npu_client *client;
};

enum fw_state {
@@ -107,16 +111,16 @@ int32_t npu_host_map_buf(struct npu_device *npu_dev,
	struct msm_npu_map_buf_ioctl *map_ioctl);
int32_t npu_host_unmap_buf(struct npu_device *npu_dev,
	struct msm_npu_unmap_buf_ioctl *unmap_ioctl);
int32_t npu_host_load_network(struct npu_device *npu_dev,
int32_t npu_host_load_network(struct npu_client *client,
	struct msm_npu_load_network_ioctl *load_ioctl);
int32_t npu_host_load_network_v2(struct npu_device *npu_dev,
int32_t npu_host_load_network_v2(struct npu_client *client,
	struct msm_npu_load_network_ioctl_v2 *load_ioctl,
	struct msm_npu_patch_info_v2 *patch_info);
int32_t npu_host_unload_network(struct npu_device *npu_dev,
int32_t npu_host_unload_network(struct npu_client *client,
	struct msm_npu_unload_network_ioctl *unload);
int32_t npu_host_exec_network(struct npu_device *npu_dev,
int32_t npu_host_exec_network(struct npu_client *client,
	struct msm_npu_exec_network_ioctl *exec_ioctl);
int32_t npu_host_exec_network_v2(struct npu_device *npu_dev,
int32_t npu_host_exec_network_v2(struct npu_client *client,
	struct msm_npu_exec_network_ioctl_v2 *exec_ioctl,
	struct msm_npu_patch_buf_info *patch_buf_info);
int32_t npu_host_loopback_test(struct npu_device *npu_dev);
+36 −0
Original line number Diff line number Diff line
@@ -45,6 +45,15 @@
#define MSM_NPU_EXEC_NETWORK_V2 \
	_IOWR(MSM_NPU_IOCTL_MAGIC, 8, struct msm_npu_exec_network_ioctl_v2)

/* receive event */
#define MSM_NPU_RECEIVE_EVENT \
	_IOR(MSM_NPU_IOCTL_MAGIC, 9, struct msm_npu_event)

#define MSM_NPU_EVENT_TYPE_START 0x10000000
#define MSM_NPU_EVENT_TYPE_EXEC_DONE (MSM_NPU_EVENT_TYPE_START + 1)
#define MSM_NPU_EVENT_TYPE_EXEC_V2_DONE (MSM_NPU_EVENT_TYPE_START + 2)
#define MSM_NPU_EVENT_TYPE_SSR (MSM_NPU_EVENT_TYPE_START + 3)

#define MSM_NPU_MAX_INPUT_LAYER_NUM 8
#define MSM_NPU_MAX_OUTPUT_LAYER_NUM 4
#define MSM_NPU_MAX_PATCH_LAYER_NUM (MSM_NPU_MAX_INPUT_LAYER_NUM +\
@@ -219,4 +228,31 @@ struct msm_npu_exec_network_ioctl_v2 {
	uint32_t reserved;
};

struct msm_npu_event_execute_done {
	uint32_t network_hdl;
	int32_t exec_result;
};

struct msm_npu_event_execute_v2_done {
	uint32_t network_hdl;
	int32_t exec_result;
	/* stats buf size filled */
	uint32_t stats_buf_size;
};

struct msm_npu_event_ssr {
	uint32_t network_hdl;
};

struct msm_npu_event {
	uint32_t type;
	union {
		struct msm_npu_event_execute_done exec_done;
		struct msm_npu_event_execute_v2_done exec_v2_done;
		struct msm_npu_event_ssr ssr;
		uint8_t data[128];
	} u;
	uint32_t reserved[4];
};

#endif /*_UAPI_MSM_NPU_H_*/