Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9d146fe0 authored by Harsh Shah's avatar Harsh Shah
Browse files

msm: camera: isp: Dump last active request on HW error



This change adds dumps for packet request received from user
driver when HW error is encountered. This will be useful in
debugging certain HW errors like violation/overflow. This change
will also add a notification from ISP context to CRM on error.

Change-Id: I95e2b2ff424c888b7a00d3186ad8b6418c6952c4
Signed-off-by: default avatarHarsh Shah <harshs@codeaurora.org>
parent 5c5b688f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ ccflags-y += -Idrivers/media/platform/msm/camera/cam_req_mgr
ccflags-y += -Idrivers/media/platform/msm/camera/cam_smmu/
ccflags-y += -Idrivers/media/platform/msm/camera/cam_sync
ccflags-y += -Idrivers/media/platform/msm/camera/cam_utils
ccflags-y += -Idrivers/media/platform/msm/camera/cam_cdm/

obj-$(CONFIG_SPECTRA_CAMERA) += isp_hw_mgr/
obj-$(CONFIG_SPECTRA_CAMERA) += cam_isp_dev.o cam_isp_context.o
+138 −42
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@
#include <linux/uaccess.h>
#include <linux/ratelimit.h>

#include "cam_isp_context.h"
#include "cam_mem_mgr.h"
#include "cam_sync_api.h"
#include "cam_req_mgr_dev.h"
@@ -24,6 +23,8 @@
#include "cam_debug_util.h"
#include "cam_packet_util.h"
#include "cam_context_utils.h"
#include "cam_cdm_util.h"
#include "cam_isp_context.h"

static const char isp_dev_name[] = "isp";

@@ -121,6 +122,36 @@ static void __cam_isp_ctx_dump_state_monitor_array(
	}
}

static void cam_isp_ctx_dump_req(struct cam_isp_ctx_req *req_isp)
{
	int i = 0, rc = 0;
	size_t len = 0;
	uint32_t *buf_addr;
	uint32_t *buf_start, *buf_end;

	for (i = 0; i < req_isp->num_cfg; i++) {
		rc = cam_packet_util_get_cmd_mem_addr(
			req_isp->cfg[i].handle, &buf_addr, &len);
		if (rc) {
			CAM_ERR_RATE_LIMIT(CAM_ISP,
				"Failed to get_cmd_mem_addr, rc=%d",
				rc);
		} else {
			buf_start = (uint32_t *)((uint8_t *) buf_addr +
				req_isp->cfg[i].offset);
			buf_end = (uint32_t *)((uint8_t *) buf_start +
				req_isp->cfg[i].len - 1);
			if (len < (buf_end - buf_start + 1)) {
				CAM_ERR(CAM_ISP,
					"Invalid len %lld buf_start-end=%d",
					len, (buf_end - buf_start + 1));
				continue;
			}
			cam_cdm_util_dump_cmd_buf(buf_start, buf_end);
		}
	}
}

static int __cam_isp_ctx_enqueue_request_in_order(
	struct cam_context *ctx, struct cam_ctx_request *req)
{
@@ -974,8 +1005,11 @@ static int __cam_isp_ctx_handle_error(struct cam_isp_context *ctx_isp,
	uint32_t                         i = 0;
	bool                             found = 0;
	struct cam_ctx_request          *req = NULL;
	struct cam_ctx_request          *req_to_report = NULL;
	struct cam_ctx_request          *req_to_dump = NULL;
	struct cam_ctx_request          *req_temp;
	struct cam_isp_ctx_req          *req_isp = NULL;
	struct cam_isp_ctx_req          *req_isp_to_report = NULL;
	struct cam_req_mgr_error_notify  notify;
	uint64_t                         error_request_id;
	struct cam_hw_fence_map_entry   *fence_map_out = NULL;
@@ -992,33 +1026,47 @@ static int __cam_isp_ctx_handle_error(struct cam_isp_context *ctx_isp,
		notify.error = CRM_KMD_ERR_OVERFLOW;

	/*
	 * Need to check the active req
	 * move all of them to the pending request list
	 * Note this funciton need revisit!
	 * The error is likely caused by first request on the active list.
	 * If active list is empty check wait list (maybe error hit as soon
	 * as RUP and we handle error before RUP.
	 */

	if (list_empty(&ctx->active_req_list)) {
		CAM_ERR_RATE_LIMIT(CAM_ISP,
		CAM_DBG(CAM_ISP,
			"handling error with no active request");
		if (list_empty(&ctx->wait_req_list)) {
			CAM_ERR_RATE_LIMIT(CAM_ISP,
				"Error with no active/wait request");
			goto end;
		} else {
			req_to_dump = list_first_entry(&ctx->wait_req_list,
				struct cam_ctx_request, list);
		}
	} else {
		req_to_dump = list_first_entry(&ctx->active_req_list,
			struct cam_ctx_request, list);
	}

	req_isp = (struct cam_isp_ctx_req *) req_to_dump->req_priv;
	cam_isp_ctx_dump_req(req_isp);

	__cam_isp_ctx_update_state_monitor_array(ctx_isp,
		CAM_ISP_STATE_CHANGE_TRIGGER_ERROR, req_to_dump->request_id);

	list_for_each_entry_safe(req, req_temp,
		&ctx->active_req_list, list) {
		req_isp = (struct cam_isp_ctx_req *) req->req_priv;
		if (!req_isp->bubble_report) {
				for (i = 0; i < req_isp->num_fence_map_out;
					i++) {
			for (i = 0; i < req_isp->num_fence_map_out; i++) {
				fence_map_out =
					&req_isp->fence_map_out[i];
				CAM_ERR(CAM_ISP, "req %llu, Sync fd %x",
					req->request_id,
					req_isp->fence_map_out[i].sync_id);
					if (req_isp->fence_map_out[i].sync_id
						!= -1) {
				if (req_isp->fence_map_out[i].sync_id != -1) {
					rc = cam_sync_signal(
						fence_map_out->sync_id,
						CAM_SYNC_STATE_SIGNALED_ERROR);
						fence_map_out->sync_id =
						-1;
					fence_map_out->sync_id = -1;
				}
			}
			list_del_init(&req->list);
@@ -1029,8 +1077,47 @@ static int __cam_isp_ctx_handle_error(struct cam_isp_context *ctx_isp,
			break;
		}
	}

	if (found)
		goto move_to_pending;

	list_for_each_entry_safe(req, req_temp,
		&ctx->wait_req_list, list) {
		req_isp = (struct cam_isp_ctx_req *) req->req_priv;
		if (!req_isp->bubble_report) {
			for (i = 0; i < req_isp->num_fence_map_out; i++) {
				fence_map_out =
					&req_isp->fence_map_out[i];
				CAM_ERR(CAM_ISP, "req %llu, Sync fd %x",
					req->request_id,
					req_isp->fence_map_out[i].sync_id);
				if (req_isp->fence_map_out[i].sync_id != -1) {
					rc = cam_sync_signal(
						fence_map_out->sync_id,
						CAM_SYNC_STATE_SIGNALED_ERROR);
					fence_map_out->sync_id = -1;
				}
			}
			list_del_init(&req->list);
			list_add_tail(&req->list, &ctx->free_req_list);
			ctx_isp->active_req_cnt--;
		} else {
			found = 1;
			break;
		}
	}

move_to_pending:
	/*
	 * If bubble recovery is enabled on any request we need to move that
	 * request and all the subsequent requests to the pending list.
	 * Note:
	 * We need to traverse the active list in reverse order and add
	 * to head of pending list.
	 * e.g. pending current state: 10, 11 | active current state: 8, 9
	 * intermittent for loop iteration- pending: 9, 10, 11 | active: 8
	 * final state - pending: 8, 9, 10, 11 | active: NULL
	 */
	if (found) {
		list_for_each_entry_safe_reverse(req, req_temp,
			&ctx->active_req_list, list) {
@@ -1039,6 +1126,13 @@ static int __cam_isp_ctx_handle_error(struct cam_isp_context *ctx_isp,
			list_add(&req->list, &ctx->pending_req_list);
			ctx_isp->active_req_cnt--;
		}
		list_for_each_entry_safe_reverse(req, req_temp,
			&ctx->wait_req_list, list) {
			req_isp = (struct cam_isp_ctx_req *) req->req_priv;
			list_del_init(&req->list);
			list_add(&req->list, &ctx->pending_req_list);
			ctx_isp->active_req_cnt--;
		}
	}

	do {
@@ -1052,8 +1146,11 @@ static int __cam_isp_ctx_handle_error(struct cam_isp_context *ctx_isp,
		req_isp = (struct cam_isp_ctx_req *) req->req_priv;
		error_request_id = ctx_isp->last_applied_req_id;

		if (req_isp->bubble_report)
		if (req_isp->bubble_report) {
			req_to_report = req;
			req_isp_to_report = req_to_report->req_priv;
			break;
		}

		for (i = 0; i < req_isp->num_fence_map_out; i++) {
			if (req_isp->fence_map_out[i].sync_id != -1)
@@ -1067,14 +1164,18 @@ static int __cam_isp_ctx_handle_error(struct cam_isp_context *ctx_isp,

	} while (req->request_id < ctx_isp->last_applied_req_id);


end:
	if (ctx->ctx_crm_intf && ctx->ctx_crm_intf->notify_err) {
		notify.link_hdl = ctx->link_hdl;
		notify.dev_hdl = ctx->dev_hdl;
		notify.req_id = error_request_id;

		if (req_isp && req_isp->bubble_report)
		if (req_isp_to_report && req_isp_to_report->bubble_report) {
			if (error_event_data->recovery_enabled)
				notify.error = CRM_KMD_ERR_BUBBLE;
		} else {
			notify.error = CRM_KMD_ERR_FATAL;
		}

		CAM_WARN(CAM_ISP, "Notify CRM: req %lld, frame %lld\n",
			error_request_id, ctx_isp->frame_id);
@@ -1086,15 +1187,8 @@ static int __cam_isp_ctx_handle_error(struct cam_isp_context *ctx_isp,
		rc = -EFAULT;
	}


	list_del_init(&req->list);
	list_add(&req->list, &ctx->pending_req_list);
	/* might need to check if active list is empty */
	if (req != NULL) {
		__cam_isp_ctx_update_state_monitor_array(ctx_isp,
			CAM_ISP_STATE_CHANGE_TRIGGER_ERROR, req->request_id);
	}
	CAM_DBG(CAM_ISP, "Exit");

	return rc;
}

@@ -1197,13 +1291,14 @@ static int __cam_isp_ctx_apply_req_in_activated_state(
	 *
	 */
	ctx_isp = (struct cam_isp_context *) ctx->ctx_priv;

	spin_lock_bh(&ctx->lock);
	req = list_first_entry(&ctx->pending_req_list, struct cam_ctx_request,
		list);
	spin_unlock_bh(&ctx->lock);

	/*
	 * Check whehter the request id is matching the tip, if not, this means
	 * Check whether the request id is matching the tip, if not, this means
	 * we are in the middle of the error handling. Need to reject this apply
	 */
	if (req->request_id != apply->request_id) {
@@ -2966,7 +3061,7 @@ static int __cam_isp_ctx_handle_irq_in_activated(void *context,
	struct cam_isp_context *ctx_isp =
		(struct cam_isp_context *)ctx->ctx_priv;

	spin_lock_bh(&ctx->lock);
	spin_lock(&ctx->lock);

	trace_cam_isp_activated_irq(ctx, ctx_isp->substate_activated, evt_id,
		__cam_isp_ctx_get_event_ts(evt_id, evt_data));
@@ -2981,9 +3076,10 @@ static int __cam_isp_ctx_handle_irq_in_activated(void *context,
			ctx_isp->substate_activated);
		__cam_isp_ctx_dump_state_monitor_array(ctx_isp);
	}

	CAM_DBG(CAM_ISP, "Exit: State %d Substate %d",
		 ctx->state, ctx_isp->substate_activated);
	spin_unlock_bh(&ctx->lock);
	spin_unlock(&ctx->lock);
	return rc;
}

+51 −53
Original line number Diff line number Diff line
@@ -3562,19 +3562,18 @@ static int cam_ife_hw_mgr_do_error_recovery(
 * is associated with this context. if YES
 *  a. It fills the other cores associated with this context.in
 *      affected_core[]
 *  b. Return 0 i.e.SUCCESS
 *  b. Return 1 if ctx is affected, 0 otherwise
 */
static int cam_ife_hw_mgr_is_ctx_affected(
	struct cam_ife_hw_mgr_ctx   *ife_hwr_mgr_ctx,
	uint32_t *affected_core, uint32_t size)
{

	int32_t rc = -EPERM;
	int32_t rc = 0;
	uint32_t i = 0, j = 0;
	uint32_t max_idx =  ife_hwr_mgr_ctx->num_base;
	uint32_t ctx_affected_core_idx[CAM_IFE_HW_NUM_MAX] = {0};

	CAM_DBG(CAM_ISP, "Enter:max_idx = %d", max_idx);
	CAM_DBG(CAM_ISP, "max_idx = %d", max_idx);

	if ((max_idx >= CAM_IFE_HW_NUM_MAX) ||
		(size > CAM_IFE_HW_NUM_MAX)) {
@@ -3584,33 +3583,34 @@ static int cam_ife_hw_mgr_is_ctx_affected(

	for (i = 0; i < max_idx; i++) {
		if (affected_core[ife_hwr_mgr_ctx->base[i].idx])
			rc = 0;
			rc = 1;
		else {
			ctx_affected_core_idx[j] = ife_hwr_mgr_ctx->base[i].idx;
			CAM_DBG(CAM_ISP, "Add affected IFE %d for recovery",
				ctx_affected_core_idx[j]);
			j = j + 1;
		}
	}

	if (rc == 0) {
	if (rc == 1) {
		while (j) {
			if (affected_core[ctx_affected_core_idx[j-1]] != 1)
				affected_core[ctx_affected_core_idx[j-1]] = 1;
			j = j - 1;
		}
	}
	CAM_DBG(CAM_ISP, "Exit");

	return rc;
}

/*
 *  Loop through each context
 * For any dual VFE context, if non-affected VFE is also serving
 * another context, then that context should also be notified with fatal error
 * So Loop through each context and -
 *   a. match core_idx
 *  b. For each context from ctx_list Stop the acquired resources
 *  c. Notify CRM with fatal error for the affected isp context
 *  d. For any dual VFE context, if copanion VFE is also serving
 *     other context it should also notify the CRM with fatal error
 *   b. Notify CTX with fatal error
 */
static int  cam_ife_hw_mgr_process_overflow(
static int  cam_ife_hw_mgr_find_affected_ctx(
	struct cam_ife_hw_mgr_ctx             *curr_ife_hwr_mgr_ctx,
	struct cam_isp_hw_error_event_data    *error_event_data,
	uint32_t                               curr_core_idx,
@@ -3618,44 +3618,36 @@ static int cam_ife_hw_mgr_process_overflow(
{
	uint32_t affected_core[CAM_IFE_HW_NUM_MAX] = {0};
	struct cam_ife_hw_mgr_ctx   *ife_hwr_mgr_ctx = NULL;
	cam_hw_event_cb_func	         ife_hwr_irq_err_cb;
	cam_hw_event_cb_func         notify_err_cb;
	struct cam_ife_hw_mgr       *ife_hwr_mgr = NULL;
	struct cam_hw_stop_args          stop_args;
	enum cam_isp_hw_event_type   event_type = CAM_ISP_HW_EVENT_ERROR;
	uint32_t i = 0;

	CAM_DBG(CAM_ISP, "Enter");

	if (!recovery_data) {
		CAM_ERR(CAM_ISP, "recovery_data parameter is NULL");
		return -EINVAL;
	}

	recovery_data->no_of_context = 0;
	/* affected_core is indexed by core_idx*/
	affected_core[curr_core_idx] = 1;

	ife_hwr_mgr = curr_ife_hwr_mgr_ctx->hw_mgr;

	list_for_each_entry(ife_hwr_mgr_ctx,
		&ife_hwr_mgr->used_ctx_list, list) {

		/*
		 * Check if current core_idx matches the HW associated
		 * with this context
		 */
		CAM_DBG(CAM_ISP, "Calling match Hw idx");
		if (cam_ife_hw_mgr_is_ctx_affected(ife_hwr_mgr_ctx,
		if (!cam_ife_hw_mgr_is_ctx_affected(ife_hwr_mgr_ctx,
			affected_core, CAM_IFE_HW_NUM_MAX))
			continue;

		atomic_set(&ife_hwr_mgr_ctx->overflow_pending, 1);

		ife_hwr_irq_err_cb =
		ife_hwr_mgr_ctx->common.event_cb[CAM_ISP_HW_EVENT_ERROR];

		stop_args.ctxt_to_hw_map = ife_hwr_mgr_ctx;
		notify_err_cb = ife_hwr_mgr_ctx->common.event_cb[event_type];

		/* Add affected_context in list of recovery data */
		CAM_DBG(CAM_ISP, "Add new entry in affected_ctx_list");
		CAM_DBG(CAM_ISP, "Add affected ctx %d to list",
			ife_hwr_mgr_ctx->ctx_index);
		if (recovery_data->no_of_context < CAM_CTX_MAX)
			recovery_data->affected_ctx[
				recovery_data->no_of_context++] =
@@ -3665,18 +3657,17 @@ static int cam_ife_hw_mgr_process_overflow(
		 * In the call back function corresponding ISP context
		 * will update CRM about fatal Error
		 */

		ife_hwr_irq_err_cb(ife_hwr_mgr_ctx->common.cb_priv,
		notify_err_cb(ife_hwr_mgr_ctx->common.cb_priv,
			CAM_ISP_HW_EVENT_ERROR, error_event_data);

	}

	/* fill the affected_core in recovery data */
	for (i = 0; i < CAM_IFE_HW_NUM_MAX; i++) {
		recovery_data->affected_core[i] = affected_core[i];
		CAM_DBG(CAM_ISP, "Vfe core %d is affected (%d)",
			 i, recovery_data->affected_core[i]);
	}
	CAM_DBG(CAM_ISP, "Exit");

	return 0;
}

@@ -3764,6 +3755,7 @@ static int cam_ife_hw_mgr_handle_camif_error(
	struct cam_vfe_top_irq_evt_payload      *evt_payload;
	struct cam_isp_hw_error_event_data       error_event_data = {0};
	struct cam_hw_event_recovery_data        recovery_data = {0};
	int rc = 0;

	ife_hwr_mgr_ctx = handler_priv;
	evt_payload = payload;
@@ -3780,24 +3772,35 @@ static int cam_ife_hw_mgr_handle_camif_error(
	case CAM_ISP_HW_ERROR_P2I_ERROR:
	case CAM_ISP_HW_ERROR_VIOLATION:
		CAM_ERR(CAM_ISP, "Enter: error_type (%d)", error_status);
		rc = -EFAULT;

		if (g_ife_hw_mgr.debug_cfg.enable_recovery)
			error_event_data.recovery_enabled = true;

		error_event_data.error_type =
				CAM_ISP_HW_ERROR_OVERFLOW;

		cam_ife_hw_mgr_process_overflow(ife_hwr_mgr_ctx,
		cam_ife_hw_mgr_find_affected_ctx(ife_hwr_mgr_ctx,
			&error_event_data,
			core_idx,
			&recovery_data);

		if (!g_ife_hw_mgr.debug_cfg.enable_recovery) {
			CAM_DBG(CAM_ISP, "recovery is not enabled");
			break;
		}

		CAM_DBG(CAM_ISP, "IFE Mgr recovery is enabled");
		/* Trigger for recovery */
		recovery_data.error_type = CAM_ISP_HW_ERROR_OVERFLOW;
		cam_ife_hw_mgr_do_error_recovery(&recovery_data);
		break;
	default:
		CAM_DBG(CAM_ISP, "None error (%d)", error_status);
		CAM_DBG(CAM_ISP, "No error (%d)", error_status);
		break;
	}

	return 0;
	return rc;
}

/*
@@ -4542,7 +4545,7 @@ static int cam_ife_hw_mgr_handle_buf_done_for_hw_res(
	 * the affected context and any successful buf_done event is not
	 * reported.
	 */
	rc = cam_ife_hw_mgr_process_overflow(ife_hwr_mgr_ctx,
	rc = cam_ife_hw_mgr_find_affected_ctx(ife_hwr_mgr_ctx,
		&error_event_data, evt_payload->core_index,
		&recovery_data);

@@ -4614,17 +4617,12 @@ int cam_ife_mgr_do_tasklet(void *handler_priv, void *evt_payload_priv)
	 * for this context it needs to be handled remaining
	 * interrupts are ignored.
	 */
	if (g_ife_hw_mgr.debug_cfg.enable_recovery) {
		CAM_DBG(CAM_ISP, "IFE Mgr recovery is enabled");
	rc = cam_ife_hw_mgr_handle_camif_error(ife_hwr_mgr_ctx,
		evt_payload_priv);
	} else {
		CAM_DBG(CAM_ISP, "recovery is not enabled");
		rc = 0;
	}

	if (rc) {
		CAM_ERR(CAM_ISP, "Encountered Error (%d), ignoring other irqs",
		CAM_ERR_RATE_LIMIT(CAM_ISP,
			"Encountered Error (%d), ignoring other irqs",
			rc);
		goto put_payload;
	}
+3 −1
Original line number Diff line number Diff line
@@ -184,11 +184,13 @@ struct cam_isp_hw_eof_event_data {
 *
 * @error_type:            Error type for the error event
 * @timestamp:             Timestamp for the error event
 *
 * @recovery_enabled:      Identifies if the context needs to recover & reapply
 *                         this request
 */
struct cam_isp_hw_error_event_data {
	uint32_t             error_type;
	uint64_t             timestamp;
	bool                 recovery_enabled;
};

/* enum cam_isp_hw_mgr_command - Hardware manager command type */
+37 −1
Original line number Diff line number Diff line
@@ -378,6 +378,42 @@ static int cam_vfe_camif_reg_dump(
	return rc;
}

static int cam_vfe_camif_reg_dump_bh(struct cam_vfe_mux_camif_data *camif_priv)
{
	uint32_t offset, val, wm_idx;

	for (offset = 0x0; offset < 0x1000; offset += 0x4) {
		val = cam_soc_util_r(camif_priv->soc_info, 0, offset);
		CAM_INFO(CAM_ISP, "offset 0x%x value 0x%x", offset, val);
	}

	for (offset = 0x2000; offset <= 0x20B8; offset += 0x4) {
		val = cam_soc_util_r(camif_priv->soc_info, 0, offset);
		CAM_INFO(CAM_ISP, "offset 0x%x value 0x%x", offset, val);
	}

	for (wm_idx = 0; wm_idx <= 23; wm_idx++) {
		for (offset = 0x2200 + 0x100 * wm_idx;
			offset < 0x2278 + 0x100 * wm_idx; offset += 0x4) {
			val = cam_soc_util_r(camif_priv->soc_info, 0, offset);
			CAM_INFO(CAM_ISP,
				"offset 0x%x value 0x%x", offset, val);
		}
	}

	offset = 0x420;
	val = cam_soc_util_r(camif_priv->soc_info, 1, offset);
	CAM_INFO(CAM_ISP, "CAMNOC IFE02 MaxWR_LOW offset 0x%x value 0x%x",
		offset, val);

	offset = 0x820;
	val = cam_soc_util_r(camif_priv->soc_info, 1, offset);
	CAM_INFO(CAM_ISP, "CAMNOC IFE13 MaxWR_LOW offset 0x%x value 0x%x",
		offset, val);

	return 0;
}

static int cam_vfe_camif_resource_stop(
	struct cam_isp_resource_node        *camif_res)
{
@@ -548,7 +584,7 @@ static int cam_vfe_camif_handle_irq_bottom_half(void *handler_priv,
		if (irq_status1 & camif_priv->reg_data->error_irq_mask1) {
			CAM_DBG(CAM_ISP, "Received ERROR\n");
			ret = CAM_ISP_HW_ERROR_OVERFLOW;
			cam_vfe_camif_reg_dump(camif_node);
			cam_vfe_camif_reg_dump_bh(camif_node->res_priv);
		} else {
			ret = CAM_ISP_HW_ERROR_NONE;
		}