Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dd812cfe authored by Vijay Viswanath's avatar Vijay Viswanath
Browse files

mmc: block: Error handling for WP violation in cmdq



When CQ encounters Write Protect(WP) violation, the current code resets
the card and requeue all requests. But the request which caused the error
should not be requeued as it will again cause same error and this leads to
infinite loop. So the specific request shouldn't be requeued and the
block layer informed of its failure. So added two new functions:

1)cmdq_err_check which can be used by higher layers to check if a CQ RED
error occurred.
2)cmdq_err_handle which will do error handling for CQ related errors. As
of now it handles only WP violation errors.

WP violation can be detected in following cases :
1) When a Write task being executed caused a WP violation and a command
sent in cmd line detects the error before Host Controller (HC) completes
the Write task.
2) When CMD45 of a task causes WP violation, it still gets an ok
response from card and Device Pending register sets the bit
corresponding to the task. Whichever command sent next( CMD44/46/47 and
not CMD45) will get a response error. Since we cannot always find which
task had sent the last CMD45, fail all tasks in Device Pending register.
3) If a Write task causes WP violation in card and if no commands other
than SQS commands are sent since violation occurred, the HC will
consider that write task as completed and raise completion notification.
Whichever command sent next will get response error. So fail all tasks
which are in CQ Completion Notification register. This case will not
happend if a write task is in execution while we get RED error.
4) An erase command tried to erase a WP area. In this case, remove the
DCMD command.

Change-Id: I6efaf22605081a23b7acf6baf127dff69b3be27b
Signed-off-by: default avatarVijay Viswanath <vviswana@codeaurora.org>
parent 6943048a
Loading
Loading
Loading
Loading
+217 −10
Original line number Diff line number Diff line
@@ -2983,8 +2983,6 @@ static void mmc_blk_cmdq_reset_all(struct mmc_host *host, int err)
			mmc_hostname(host), __func__,
			ctx_info->active_reqs, host->clk_requests);

	mmc_blk_cmdq_reset(host, false);

	for_each_set_bit(itag, &ctx_info->active_reqs,
			host->num_cq_slots) {
		ret = is_cmdq_dcmd_req(q, itag);
@@ -3096,6 +3094,215 @@ static enum blk_eh_timer_return mmc_blk_cmdq_req_timed_out(struct request *req)
	return BLK_EH_HANDLED;
}

static void mmc_blk_cmdq_print_mrq_info(struct mmc_request *mrq, u32 tag,
		int err)
{
	if (!mrq)
		return;

	if (mrq->cmd)
		pr_err("%s: Task: %d cmd: %u flags: %u failed: %d\n",
				mmc_hostname(mrq->host), tag, mrq->cmd->opcode,
				mrq->cmd->flags, err);

	if (mrq->data) {
		pr_err("%s: Task: %d, Blk Addr:0x%u,", mmc_hostname(mrq->host),
				tag, mrq->cmdq_req->blk_addr);
		pr_err(" Blk cnt:0x%u, Blk sz:%u failed: %d\n",
				mrq->cmdq_req->data.blocks,
				mrq->cmdq_req->data.blksz, err);
	}
}

static struct mmc_request *mmc_blk_cmdq_wp_handle(
		struct mmc_cmdq_err_info *err_info, struct mmc_host *host)
{
	u32 cmd = 0, dat = 0;
	struct mmc_request *err_mrq = NULL;

	cmd = err_info->cmd;
	dat = err_info->data_cmd;

	err_info->remove_task = false;
	err_info->fail_dev_pend = false;
	err_info->fail_comp_task = true;

	/*
	 * If a write task is not going on in the controller, then the task
	 * which caused error can be among the completed task list.
	 *
	 * For WP violations detected during DCMD commands,
	 * like ERASE command.
	 */
	if (err_info->cmd_tag == err_info->dcmd_slot &&
		(cmd == MMC_ERASE || cmd == MMC_SEND_STATUS) &&
			err_info->cmd_valid) {
		err_mrq = host->cmdq_ops->get_mrq_by_tag(host, err_info->tag);
		clear_bit(err_info->cmd_tag, &err_info->dev_pend);
		clear_bit(err_info->cmd_tag, &err_info->comp_status);
		if (!err_mrq)
			goto out;
		err_info->remove_task = true;
		err_info->tag = err_mrq->req->tag;
		return err_mrq;
	}

	/*
	 * If a write task was in execution when WP RED error was detected,
	 * the WP violation may have been caused by it. So remove it. In this
	 * case the completed tasks are innocent.
	 */
	if (dat == MMC_CMDQ_WRITE_TASK && err_info->data_valid) {
		err_mrq = host->cmdq_ops->get_mrq_by_tag(host, err_info->tag);
		clear_bit(err_info->data_tag, &err_info->dev_pend);
		clear_bit(err_info->data_tag, &err_info->comp_status);
		if (!err_mrq)
			goto out;
		err_info->remove_task = true;
		err_info->fail_comp_task = false;
	}
	/*
	 * Unless CMD45 is in CMD field of CQTERRI, the last executed CMD45 may
	 * have caused the WP violation.
	 * NOTE: We cannot get last executed CMD45 correctly always. So pick
	 * last set task in CQ_DEV_PEND to avoid infinite loop
	 */
	if (cmd != MMC_CMDQ_TASK_ADDR)
		err_info->fail_dev_pend = true;

	if (err_info->tag < 0 && err_info->fail_comp_task) {
		err_info->tag = 0;
		err_info->remove_task = false;
	}
	pr_err("%s: Write protect violation detected\n",
			mmc_hostname(host));
	return err_mrq;
out:
	err_info->tag = -1;
	return err_mrq;
}

static void mmc_blk_cmdq_remove_task(struct mmc_host *host,
		struct mmc_cmdq_err_info *err_info, struct mmc_request *mrq,
		int err, bool print_info) {

	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
	struct request *rq = mrq->req;
	int tag = mrq->cmdq_req->tag;

	if (!test_and_clear_bit(mrq->cmdq_req->tag, &ctx_info->active_reqs))
		return;

	if (print_info)
		mmc_blk_cmdq_print_mrq_info(mrq, tag, err);

	if (mrq->cmdq_req->cmdq_req_flags & DCMD) {
		clear_bit(CMDQ_STATE_DCMD_ACTIVE,
				&ctx_info->curr_state);
		blk_end_request_all(rq, err);
	} else {
		WARN_ON(!test_and_clear_bit(mrq->cmdq_req->tag,
					&ctx_info->data_active_reqs));
		mmc_cmdq_post_req(host, mrq->cmdq_req->tag, err);
		blk_end_request_all(rq, err);
	}
	mmc_host_clk_release(host);
	mmc_put_card(host->card);
	mmc_cmdq_clk_scaling_stop_busy(host, true, tag == err_info->dcmd_slot);

}

static void mmc_blk_cmdq_fail_mult_tasks(struct mmc_host *host,
		unsigned long task_list, struct mmc_cmdq_err_info *err_info,
		int err, bool print_info) {

	struct mmc_request *err_mrq = NULL;
	int tag;

	for_each_set_bit(tag, &task_list, err_info->max_slot) {
		err_mrq = host->cmdq_ops->get_mrq_by_tag(host, tag);
		if (!err_mrq || !err_mrq->req)
			continue;
		mmc_blk_cmdq_remove_task(host, err_info,
				err_mrq, err, print_info);
	}
};

/*
 * Error handler for cmdq if CQ enabled. Returns 0 if error is successfully
 * handled. Returns negative value if fails.
 */
static void mmc_blk_cmdq_err_handle(struct mmc_host *host, u32 status, int err)
{
	struct mmc_request *mrq = host->err_mrq;
	struct mmc_cmdq_err_info err_info;
	u32 red_err_info = 0;
	struct mmc_request *err_mrq = NULL;

	if (host->cmdq_ops->err_info && host->cmdq_ops->get_mrq_by_tag)
		host->cmdq_ops->err_info(host, &err_info, mrq);
	else
		return;

	if (mrq->cmdq_req->resp_arg & CQ_RED) {
		red_err_info = mrq->cmdq_req->resp_arg;
	} else if (status & CQ_RED) {
		red_err_info = status;
		err_info.timedout = true;
	}

	if (red_err_info & CQ_RED) {
		/*
		 * The request which caused RED error may need to be
		 * removed before resetting card and requeueing requests
		 */
		if (red_err_info & CQ_WP_RED) {
			err_mrq = mmc_blk_cmdq_wp_handle(&err_info, host);
			if (err_info.tag < 0)
				return;
			err = -EPERM;
		} else {
			pr_err("%s: unhandled RED error detected!\n",
					mmc_hostname(host));
			err_mrq = mrq;
		}
	}

	if (err_mrq)
		mrq = err_mrq;

	/* Remove the request if marked for removal */
	if (err_info.remove_task)
		mmc_blk_cmdq_remove_task(host, &err_info, mrq, err, true);

	/*
	 * If there is a chance that one of the completed task caused error,
	 * tell block layer that all completed tasks failed. Here priority is
	 * given to removing the errored task than on saving innocent task. This
	 * is to avoid data corruption at higher layers.
	 */
	if (err_info.fail_comp_task)
		mmc_blk_cmdq_fail_mult_tasks(host, err_info.comp_status,
				&err_info, err, true);
	else
		mmc_blk_cmdq_fail_mult_tasks(host, err_info.comp_status,
				&err_info, 0, false);
	/*
	 * If there is a chance that one of the Dev Pending task caused error,
	 * tell block layer that all Dev Pending tasks failed. Here priority is
	 * given to aviod looping with an error causing task than on saving
	 * innocent task.
	 */
	if (err_info.fail_dev_pend) {
		pr_err("%s: %s: Failing all tasks in Device Pending : 0x%lx\n",
				mmc_hostname(host), __func__,
				err_info.dev_pend);
		mmc_blk_cmdq_fail_mult_tasks(host, err_info.dev_pend,
				&err_info, err, true);
	}

}

/*
 * mmc_blk_cmdq_err: error handling of cmdq error requests.
 * Function should be called in context of error out request
@@ -3130,10 +3337,9 @@ static void mmc_blk_cmdq_err(struct mmc_queue *mq)
	}

	/* RED error - Fatal: requires reset */
	if (mrq->cmdq_req->resp_err) {
	if (mrq->cmdq_req->resp_err)
		err = mrq->cmdq_req->resp_err;
		goto reset;
	}


	/*
	 * TIMEOUT errrors can happen because of execution error
@@ -3164,8 +3370,10 @@ static void mmc_blk_cmdq_err(struct mmc_queue *mq)
		/* DCMD commands */
		err = mrq->cmd->error;
	}

reset:
	mmc_blk_cmdq_reset(host, false);
	mmc_blk_cmdq_err_handle(host, status, err);

	mmc_blk_cmdq_reset_all(host, err);
	if (mrq->cmdq_req->resp_err)
		mrq->cmdq_req->resp_err = false;
@@ -3214,11 +3422,10 @@ void mmc_blk_cmdq_complete_rq(struct request *rq)
	 * or disable state so cannot receive any completion of
	 * other requests.
	 */
	BUG_ON(test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
	WARN_ON(test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));

	/* clear pending request */
	BUG_ON(!test_and_clear_bit(cmdq_req->tag,
				   &ctx_info->active_reqs));
	BUG_ON(!test_and_clear_bit(cmdq_req->tag, &ctx_info->active_reqs));
	if (cmdq_req->cmdq_req_flags & DCMD)
		is_dcmd = true;
	else
@@ -3237,7 +3444,7 @@ void mmc_blk_cmdq_complete_rq(struct request *rq)
out:

	mmc_cmdq_clk_scaling_stop_busy(host, true, is_dcmd);
	if (!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
	if (!(err || cmdq_req->resp_err)) {
		mmc_host_clk_release(host);
		wake_up(&ctx_info->wait);
		mmc_put_card(host->card);