Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1f44fc79 authored by Arun Menon's avatar Arun Menon Committed by Vikash Garodia
Browse files

msm: vidc: recover from firmware fatal error



If firmware goes in to bad state, the video driver
should try to restart the firmware, so that subsequent
new video sessions can play fine without error.
Previous to this change, the video driver would
unload the firmware only after all open instances
have been closed by the client. If the client fails
to close any of the open instances, then firmware
would not be unloaded and continue to remain in bad
state. This would fail all new video sessions and could
be overcome only after the device was reset.

Change-Id: Ifadb2ecf6e7021ed60ee3b0992e4d1950f8ca53d
Signed-off-by: default avatarArun Menon <avmenon@codeaurora.org>
parent acf473c0
Loading
Loading
Loading
Loading
+126 −78
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/workqueue.h>
#include <soc/qcom/subsystem_restart.h>
#include <asm/div64.h>
#include "msm_vidc_common.h"
@@ -909,95 +910,132 @@ static void handle_session_error(enum command_response cmd, void *data)

	msm_vidc_queue_v4l2_event(inst, V4L2_EVENT_MSM_VIDC_SYS_ERROR);
}
static void handle_sys_error(enum command_response cmd, void *data)

struct sys_err_handler_data {
	struct msm_vidc_core *core;
	struct delayed_work work;
};


void hw_sys_error_handler(struct work_struct *work)
{
	struct msm_vidc_cb_cmd_done *response = data;
	struct msm_vidc_inst *inst = NULL;
	struct msm_vidc_core *core = NULL;
	struct hfi_device *hdev = NULL;
	struct sys_err_handler_data *handler = NULL;
	int rc = 0;

	subsystem_crashed("venus");
	if (response) {
		core = get_vidc_core(response->device_id);
		dprintk(VIDC_WARN, "SYS_ERROR received for core %p\n", core);
		if (core) {
			mutex_lock(&core->lock);
			core->state = VIDC_CORE_INVALID;
			mutex_unlock(&core->lock);
			mutex_lock(&core->sync_lock);
			list_for_each_entry(inst, &core->instances,
					list) {
				mutex_lock(&inst->lock);
				inst->state = MSM_VIDC_CORE_INVALID;
				if (inst->core)
					hdev = inst->core->device;
				if (hdev && inst->session) {
					dprintk(VIDC_DBG,
					"cleaning up inst: 0x%p\n", inst);
					rc = call_hfi_op(hdev, session_clean,
						(void *) inst->session);
					if (rc)
						dprintk(VIDC_ERR,
							"Sess clean failed :%p\n",
							inst);
				}
				inst->session = NULL;
				mutex_unlock(&inst->lock);
				msm_vidc_queue_v4l2_event(inst,
						V4L2_EVENT_MSM_VIDC_SYS_ERROR);
	handler = container_of(work, struct sys_err_handler_data, work.work);
	if (!handler || !handler->core || !handler->core->device) {
		dprintk(VIDC_ERR, "%s - invalid work or core handle\n",
				__func__);
		goto exit;
	}
			mutex_unlock(&core->sync_lock);
		} else {

	core = handler->core;
	hdev = core->device;

	mutex_lock(&core->sync_lock);
	/*
	* Restart the firmware to bring out of bad state.
	*/
	if ((core->state == VIDC_CORE_INVALID) &&
		hdev->resurrect_fw) {
		mutex_lock(&core->lock);
		rc = call_hfi_op(hdev, resurrect_fw,
				hdev->hfi_device_data);
		if (rc) {
			dprintk(VIDC_ERR,
				"Got SYS_ERR but unable to identify core\n");
				"%s - resurrect_fw failed: %d\n",
				__func__, rc);
		}
		core->state = VIDC_CORE_LOADED;
		mutex_unlock(&core->lock);
	} else {
		dprintk(VIDC_ERR,
			"Failed to get valid response for sys error\n");
		dprintk(VIDC_DBG,
			"fw unloaded after sys error, no need to resurrect\n");
	}
	mutex_unlock(&core->sync_lock);

exit:
	/* free sys error handler, allocated in handle_sys_err */
	kfree(handler);
}

static void handle_sys_watchdog_timeout(enum command_response cmd, void *data)
static void handle_sys_error(enum command_response cmd, void *data)
{
	struct msm_vidc_cb_cmd_done *response = data;
	struct msm_vidc_inst *inst;
	struct msm_vidc_core *core = NULL;
	struct sys_err_handler_data *handler = NULL;
	struct hfi_device *hdev = NULL;
	struct msm_vidc_inst *inst = NULL;
	int rc = 0;
	dprintk(VIDC_ERR, "Venus Subsystem crashed\n");

	subsystem_crashed("venus");
	if (!response) {
		dprintk(VIDC_ERR,
			"Failed to get valid response for sys error\n");
		return;
	}

	core = get_vidc_core(response->device_id);
	if (!core) {
		dprintk(VIDC_ERR, "Wrong device_id received\n");
		dprintk(VIDC_ERR,
				"Got SYS_ERR but unable to identify core\n");
		return;
	}
	subsystem_crashed("venus");

	dprintk(VIDC_WARN, "SYS_ERROR %d received for core %p\n", cmd, core);
	mutex_lock(&core->lock);
	core->state = VIDC_CORE_INVALID;
	mutex_unlock(&core->lock);


	/*
	* 1. Delete each instance session from hfi list
	* 2. Notify all clients about hardware error.
	*/
	mutex_lock(&core->sync_lock);
	list_for_each_entry(inst, &core->instances, list) {
		if (inst) {
			msm_vidc_queue_v4l2_event(inst,
					V4L2_EVENT_MSM_VIDC_SYS_ERROR);
	list_for_each_entry(inst, &core->instances,
			list) {
		mutex_lock(&inst->lock);
		inst->state = MSM_VIDC_CORE_INVALID;
		if (inst->core)
			hdev = inst->core->device;
		if (hdev && inst->session) {
			dprintk(VIDC_DBG,
			"cleaning up inst: 0x%p\n", inst);
			rc = call_hfi_op(hdev, session_clean,
				(void *) inst->session);
			if (rc)
				dprintk(VIDC_ERR,
					"Sess clean failed :%p\n",
					inst);

		}
		inst->session = NULL;
		mutex_unlock(&inst->lock);
		}
		msm_vidc_queue_v4l2_event(inst,
				V4L2_EVENT_MSM_VIDC_SYS_ERROR);
	}
	mutex_unlock(&core->sync_lock);


	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
	if (!handler) {
		dprintk(VIDC_ERR,
				"%s - failed to allocate sys error handler\n",
				__func__);
		return;
	}
	handler->core = core;
	INIT_DELAYED_WORK(&handler->work, hw_sys_error_handler);

	/*
	* Sleep for 5 sec to ensure venus has completed any
	* pending cache operations. Without this sleep, we see
	* device reset when firmware is unloaded after a sys
	* error.
	*/
	schedule_delayed_work(&handler->work, msecs_to_jiffies(5000));
}

static void handle_session_close(enum command_response cmd, void *data)
@@ -1491,7 +1529,7 @@ void handle_cmd_response(enum command_response cmd, void *data)
		handle_seq_hdr_done(cmd, data);
		break;
	case SYS_WATCHDOG_TIMEOUT:
		handle_sys_watchdog_timeout(cmd, data);
		handle_sys_error(cmd, data);
		break;
	case SYS_ERROR:
		handle_sys_error(cmd, data);
@@ -1654,11 +1692,14 @@ static int msm_comm_init_core(struct msm_vidc_inst *inst)
		goto fail_vote_bus;
	}

	if (core->state < VIDC_CORE_LOADED) {
		rc = call_hfi_op(hdev, load_fw, hdev->hfi_device_data);
		if (rc) {
			dprintk(VIDC_ERR, "Failed to load video firmware\n");
			goto fail_load_fw;
		}
	}

	rc = msm_comm_scale_clocks(core);
	if (rc) {
		dprintk(VIDC_ERR, "Failed to scale clocks: %d\n", rc);
@@ -1707,26 +1748,33 @@ static int msm_vidc_deinit_core(struct msm_vidc_inst *inst)
				core->id, core->state);
		goto core_already_uninited;
	}

	msm_comm_scale_clocks_and_bus(inst);
	if (list_empty(&core->instances)) {
		if (core->state > VIDC_CORE_INIT) {
			if (core->resources.ocmem_size) {
				if (inst->state != MSM_VIDC_CORE_INVALID)
					msm_comm_unset_ocmem(core);
			call_hfi_op(hdev, free_ocmem, hdev->hfi_device_data);
				call_hfi_op(hdev, free_ocmem,
						hdev->hfi_device_data);
			}
			dprintk(VIDC_DBG, "Calling vidc_hal_core_release\n");
		rc = call_hfi_op(hdev, core_release, hdev->hfi_device_data);
			rc = call_hfi_op(hdev, core_release,
					hdev->hfi_device_data);
			if (rc) {
			dprintk(VIDC_ERR, "Failed to release core, id = %d\n",
				dprintk(VIDC_ERR,
					"Failed to release core, id = %d\n",
					core->id);
				goto exit;
			}
		}
		mutex_lock(&core->lock);
		core->state = VIDC_CORE_UNINIT;
		mutex_unlock(&core->lock);
		call_hfi_op(hdev, unload_fw, hdev->hfi_device_data);
		msm_comm_unvote_buses(core);
	}

core_already_uninited:
	change_inst_state(inst, MSM_VIDC_CORE_UNINIT);
exit:
+1 −0
Original line number Diff line number Diff line
@@ -66,6 +66,7 @@ enum vidc_ports {

enum vidc_core_state {
	VIDC_CORE_UNINIT = 0,
	VIDC_CORE_LOADED,
	VIDC_CORE_INIT,
	VIDC_CORE_INIT_DONE,
	VIDC_CORE_INVALID
+89 −0
Original line number Diff line number Diff line
@@ -76,6 +76,16 @@ struct tzbsp_video_set_state_req {
	u32 spare; /*reserved for future, should be zero*/
};

#define VENUS_SET_STATE(__device, __state) {\
		mutex_lock(&(__device)->write_lock);	\
		mutex_lock(&(__device)->read_lock);		\
		(__device)->state = __state;			\
		mutex_unlock(&(__device)->write_lock);	\
		mutex_unlock(&(__device)->read_lock); }

#define IS_VENUS_IN_VALID_STATE(__device) (\
		(__device)->state != VENUS_STATE_DEINIT)

static int venus_hfi_power_enable(void *dev);

static inline int venus_hfi_clk_gating_off(struct venus_hfi_device *device);
@@ -1474,11 +1484,24 @@ static int venus_hfi_iface_cmdq_write_nolock(struct venus_hfi_device *device,
	}
	WARN(!mutex_is_locked(&device->write_lock),
			"Cmd queue write lock must be acquired");
	if (!IS_VENUS_IN_VALID_STATE(device)) {
		dprintk(VIDC_ERR, "%s - fw not in init state\n", __func__);
		result = -EINVAL;
		goto err_q_null;
	}

	q_info = &device->iface_queues[VIDC_IFACEQ_CMDQ_IDX];
	if (!q_info) {
		dprintk(VIDC_ERR, "cannot write to shared Q's\n");
		goto err_q_null;
	}

	if (!q_info->q_array.align_virtual_addr) {
		dprintk(VIDC_ERR, "cannot write to shared CMD Q's\n");
		result = -ENODATA;
		goto err_q_null;
	}

	venus_hfi_sim_modify_cmd_packet((u8 *)pkt, device);
	if (!venus_hfi_write_queue(q_info, (u8 *)pkt, &rx_req_is_set)) {
		WARN(!mutex_is_locked(&device->clk_pwr_lock),
@@ -1518,12 +1541,19 @@ static int venus_hfi_iface_msgq_read(struct venus_hfi_device *device, void *pkt)
		return -EINVAL;
	}
	mutex_lock(&device->read_lock);
	if (!IS_VENUS_IN_VALID_STATE(device)) {
		dprintk(VIDC_ERR, "%s - fw not in init state\n", __func__);
		rc = -EINVAL;
		goto read_error_null;
	}

	if (device->iface_queues[VIDC_IFACEQ_MSGQ_IDX].
		q_array.align_virtual_addr == 0) {
		dprintk(VIDC_ERR, "cannot read from shared MSG Q's\n");
		rc = -ENODATA;
		goto read_error_null;
	}

	q_info = &device->iface_queues[VIDC_IFACEQ_MSGQ_IDX];
	if (!venus_hfi_read_queue(q_info, (u8 *)pkt, &tx_req_is_set)) {
		venus_hfi_hal_sim_modify_msg_packet((u8 *)pkt, device);
@@ -1562,6 +1592,11 @@ static int venus_hfi_iface_dbgq_read(struct venus_hfi_device *device, void *pkt)
		return -EINVAL;
	}
	mutex_lock(&device->read_lock);
	if (!IS_VENUS_IN_VALID_STATE(device)) {
		dprintk(VIDC_ERR, "%s - fw not in init state\n", __func__);
		rc = -EINVAL;
		goto dbg_error_null;
	}
	if (device->iface_queues[VIDC_IFACEQ_DBGQ_IDX].
		q_array.align_virtual_addr == 0) {
		dprintk(VIDC_ERR, "cannot read from shared DBG Q's\n");
@@ -1991,6 +2026,8 @@ static int venus_hfi_core_init(void *device)
		return -ENODEV;
	}

	VENUS_SET_STATE(dev, VENUS_STATE_INIT);

	dev->intr_status = 0;
	INIT_LIST_HEAD(&dev->sess_head);
	venus_hfi_set_registers(dev);
@@ -2042,6 +2079,7 @@ static int venus_hfi_core_init(void *device)

	return rc;
err_core_init:
	VENUS_SET_STATE(dev, VENUS_STATE_DEINIT);
	disable_irq_nosync(dev->hal_data->irq);
	return rc;
}
@@ -2056,6 +2094,7 @@ static int venus_hfi_core_release(void *device)
		dprintk(VIDC_ERR, "invalid device\n");
		return -ENODEV;
	}

	if (dev->hal_client) {
		mutex_lock(&dev->clk_pwr_lock);
		rc = venus_hfi_clk_gating_off(device);
@@ -2071,6 +2110,8 @@ static int venus_hfi_core_release(void *device)
		dev->intr_status = 0;
		mutex_unlock(&dev->clk_pwr_lock);
	}
	VENUS_SET_STATE(dev, VENUS_STATE_DEINIT);

	dprintk(VIDC_INFO, "HAL exited\n");
	return 0;
}
@@ -3804,6 +3845,53 @@ static void venus_hfi_unload_fw(void *dev)
	}
}

static int venus_hfi_resurrect_fw(void *dev)
{
	struct venus_hfi_device *device = dev;
	int rc = 0;

	if (!device) {
		dprintk(VIDC_ERR, "%s Invalid paramter: %p\n",
			__func__, device);
		return -EINVAL;
	}

	rc = venus_hfi_free_ocmem(device);
	if (rc)
		dprintk(VIDC_WARN, "%s - failed to free ocmem\n", __func__);

	rc = venus_hfi_core_release(device);
	if (rc) {
		dprintk(VIDC_ERR, "%s - failed to release venus core rc = %d\n",
				__func__, rc);
		goto exit;
	}

	dprintk(VIDC_ERR, "praying for firmware resurrection\n");

	venus_hfi_unload_fw(device);


	rc = venus_hfi_vote_buses(device, device->bus_load,
			device->res->bus_set.count);
	if (rc) {
		dprintk(VIDC_ERR, "Failed to scale buses\n");
		goto exit;
	}


	rc = venus_hfi_load_fw(device);
	if (rc) {
		dprintk(VIDC_ERR, "%s - failed to load venus fw rc = %d\n",
				__func__, rc);
		goto exit;
	}

	dprintk(VIDC_ERR, "Hurray!! firmware has restarted\n");
exit:
	return rc;
}

static int venus_hfi_get_fw_info(void *dev, enum fw_info info)
{
	int rc = 0;
@@ -4066,6 +4154,7 @@ static void venus_init_hfi_callbacks(struct hfi_device *hdev)
	hdev->iommu_get_domain_partition = venus_hfi_iommu_get_domain_partition;
	hdev->load_fw = venus_hfi_load_fw;
	hdev->unload_fw = venus_hfi_unload_fw;
	hdev->resurrect_fw = venus_hfi_resurrect_fw;
	hdev->get_fw_info = venus_hfi_get_fw_info;
	hdev->get_stride_scanline = venus_hfi_get_stride_scanline;
	hdev->capability_check = venus_hfi_capability_check;
+6 −0
Original line number Diff line number Diff line
@@ -159,6 +159,11 @@ struct venus_resources {
	struct on_chip_mem ocmem;
};

enum venus_hfi_state {
	VENUS_STATE_DEINIT = 1,
	VENUS_STATE_INIT,
};

struct venus_hfi_device {
	struct list_head list;
	struct list_head sess_head;
@@ -186,6 +191,7 @@ struct venus_hfi_device {
	int reg_count;
	struct venus_resources resources;
	struct msm_vidc_platform_resources *res;
	enum venus_hfi_state state;
};

void venus_hfi_delete_device(void *device);
+1 −0
Original line number Diff line number Diff line
@@ -1274,6 +1274,7 @@ struct hfi_device {
			int *domain_num, int *partition_num);
	int (*load_fw)(void *dev);
	void (*unload_fw)(void *dev);
	int (*resurrect_fw)(void *dev);
	int (*get_fw_info)(void *dev, enum fw_info info);
	int (*get_stride_scanline)(int color_fmt, int width,
		int height,	int *stride, int *scanlines);