Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 77dacd5b authored by Ritesh Harjani's avatar Ritesh Harjani
Browse files

mmc: block: Error handling fixes and support of reset_all



This does following:

1. Adds support to handle multiple cmdq software requests timeout.
   Currently we schedule error handling work for the first timedout
   request. We requeue all pending tasks, knock off all tasks from
   device queue and reset card and controller as part of this, then
   for all other software requests timeout, if it comes while executing
   error work, we return BLK_EH_NOT_HANDLED.
   This fixes BUG_ON in case of multiple requests timesout together.

2. Current code resets CQE, power cycle the card and requeue all the
   requests in case of any error.

3. mmc_blk_cmdq_err work takes care freeing up all resource allocation
   like clk and rpm hold, in case of reset_all.

4. Currently we were never clearing error CMDQ_STATE_ERR in case of any
   error. This patch takes care of this bug.

Change-Id: I83d483c11fe2d7f2e462086cc3c0932057304c0d
Signed-off-by: default avatarRitesh Harjani <riteshh@codeaurora.org>
parent 4457076d
Loading
Loading
Loading
Loading
+170 −88
Original line number Diff line number Diff line
@@ -2893,24 +2893,120 @@ EXPORT_SYMBOL(mmc_blk_cmdq_issue_flush_rq);

static void mmc_blk_cmdq_reset(struct mmc_host *host, bool clear_all)
{
	if (!host->cmdq_ops->reset)
		return;
	int err = 0;

	if (!test_bit(CMDQ_STATE_HALT, &host->cmdq_ctx.curr_state)) {
	if (mmc_cmdq_halt(host, true)) {
		pr_err("%s: halt failed\n", mmc_hostname(host));
		goto reset;
	}
	}

	if (clear_all)
		mmc_cmdq_discard_queue(host, 0);
reset:
	mmc_hw_reset(host);
	mmc_host_clk_hold(host);
	host->cmdq_ops->reset(host, true);
	host->cmdq_ops->disable(host, true);
	mmc_host_clk_release(host);
	err = mmc_cmdq_hw_reset(host);
	if (err && err != -EOPNOTSUPP) {
		pr_err("%s: failed to cmdq_hw_reset err = %d\n",
				mmc_hostname(host), err);
		mmc_host_clk_hold(host);
		host->cmdq_ops->enable(host);
		mmc_host_clk_release(host);
		mmc_cmdq_halt(host, false);
		goto out;
	}
	/*
	 * CMDQ HW reset would have already made CQE
	 * in unhalted state, but reflect the same
	 * in software state of cmdq_ctx.
	 */
	mmc_host_clr_halt(host);
out:
	return;
}

/**
 * is_cmdq_dcmd_req - Checks if tag belongs to DCMD request.
 * @q:		request_queue pointer.
 * @tag:	tag number of request to check.
 *
 * This function checks if the request with tag number "tag"
 * is a DCMD request or not based on cmdq_req_flags set.
 *
 * returns true if DCMD req, otherwise false.
 */
static bool is_cmdq_dcmd_req(struct request_queue *q, int tag)
{
	struct request *req;
	struct mmc_queue_req *mq_rq;
	struct mmc_cmdq_req *cmdq_req;

	req = blk_queue_find_tag(q, tag);
	if (WARN_ON(!req))
		goto out;
	mq_rq = req->special;
	if (WARN_ON(!mq_rq))
		goto out;
	cmdq_req = &(mq_rq->cmdq_req);
	return (cmdq_req->cmdq_req_flags & DCMD);
out:
	return -ENOENT;
}

/**
 * mmc_blk_cmdq_reset_all - Reset everything for CMDQ block request.
 * @host:	mmc_host pointer.
 * @err:	error for which reset is performed.
 *
 * This function implements reset_all functionality for
 * cmdq. It resets the controller, power cycle the card,
 * and invalidate all busy tags(requeue all request back to
 * elevator).
 */
static void mmc_blk_cmdq_reset_all(struct mmc_host *host, int err)
{
	struct mmc_request *mrq = host->err_mrq;
	struct mmc_card *card = host->card;
	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
	struct request_queue *q;
	int itag = 0;
	int ret = 0;

	if (WARN_ON(!mrq))
		return;

	q = mrq->req->q;
	WARN_ON(!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));

	pr_debug("%s: %s: active_reqs = %lu, clk_requests = %d\n",
			mmc_hostname(host), __func__,
			ctx_info->active_reqs, host->clk_requests);

	mmc_blk_cmdq_reset(host, false);

	for_each_set_bit(itag, &ctx_info->active_reqs,
			host->num_cq_slots) {
		ret = is_cmdq_dcmd_req(q, itag);
		if (WARN_ON(ret == -ENOENT))
			continue;
		if (!ret) {
			WARN_ON(!test_and_clear_bit(itag,
				 &ctx_info->data_active_reqs));
		} else {
			clear_bit(CMDQ_STATE_DCMD_ACTIVE,
					&ctx_info->curr_state);
		}
		mmc_cmdq_post_req(host, itag, err);
		WARN_ON(!test_and_clear_bit(itag,
					&ctx_info->active_reqs));
		mmc_host_clk_release(host);
	clear_bit(CMDQ_STATE_HALT, &host->cmdq_ctx.curr_state);
		mmc_put_card(card);
	}

	spin_lock_irq(q->queue_lock);
	blk_queue_invalidate_tags(q);
	spin_unlock_irq(q->queue_lock);
}

static void mmc_blk_cmdq_shutdown(struct mmc_queue *mq)
@@ -2953,6 +3049,7 @@ static enum blk_eh_timer_return mmc_blk_cmdq_req_timed_out(struct request *req)
	struct mmc_queue_req *mq_rq = req->special;
	struct mmc_request *mrq;
	struct mmc_cmdq_req *cmdq_req;
	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;

	BUG_ON(!host);

@@ -2978,32 +3075,40 @@ static enum blk_eh_timer_return mmc_blk_cmdq_req_timed_out(struct request *req)
	else
		mrq->data->error = -ETIMEDOUT;

	BUG_ON(host->err_mrq != NULL);
	host->err_mrq = mrq;
	if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state) ||
		test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state))
		return BLK_EH_NOT_HANDLED;

	mmc_host_clk_release(mrq->host);
	set_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
	return BLK_EH_HANDLED;
}

/*
 * mmc_blk_cmdq_err: error handling of cmdq error requests.
 * Function should be called in context of error out request
 * which has claim_host and rpm acquired.
 * This may be called with CQ engine halted. Make sure to
 * unhalt it after error recovery.
 *
 * TODO: Currently cmdq error handler does reset_all in case
 * of any erorr. Need to optimize error handling.
 */
static void mmc_blk_cmdq_err(struct mmc_queue *mq)
{
	int err;
	int retry = 0;
	int gen_err;
	u32 status;

	struct mmc_host *host = mq->card->host;
	struct mmc_request *mrq = host->err_mrq;
	struct mmc_card *card = mq->card;
	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
	struct request_queue *q = mrq->req->q;
	int tag = mrq->req->tag;
	struct request_queue *q;
	int err;

	pm_runtime_get_sync(&card->dev);
	mmc_host_clk_hold(host);
	host->cmdq_ops->dumpstate(host);
	mmc_host_clk_release(host);

	if (WARN_ON(!mrq))
		return;

	q = mrq->req->q;
	err = mmc_cmdq_halt(host, true);
	if (err) {
		pr_err("halt: failed: %d\n", err);
@@ -3012,74 +3117,44 @@ static void mmc_blk_cmdq_err(struct mmc_queue *mq)

	/* RED error - Fatal: requires reset */
	if (mrq->cmdq_req->resp_err) {
		err = mrq->cmdq_req->resp_err;
		pr_crit("%s: Response error detected: Device in bad state\n",
			mmc_hostname(host));
		blk_end_request_all(mrq->req, -EIO);
		goto reset;
	}

	if (mrq->data && mrq->data->error) {
		blk_end_request_all(mrq->req, mrq->data->error);
		for (; retry < MAX_RETRIES; retry++) {
			err = get_card_status(card, &status, 0);
			if (!err)
				break;
		}

		if (err) {
			pr_err("%s: No response from card !!!\n",
			       mmc_hostname(host));
			goto reset;
		}

		if (R1_CURRENT_STATE(status) == R1_STATE_DATA ||
		    R1_CURRENT_STATE(status) == R1_STATE_RCV) {
			err =  send_stop(card, MMC_CMDQ_STOP_TIMEOUT_MS,
					 mrq->req, &gen_err, &status);
			if (err) {
				pr_err("%s: error %d sending stop (%d) command\n",
					mmc_hostname(host),
					err, status);
				goto reset;
			}
		}

		if (mmc_cmdq_discard_queue(host, tag))
			goto reset;
		else
			goto unhalt;
	}

	/*
	 * In case of software request time-out, we schedule err work only for
	 * the first error out request and handles all other request in flight
	 * here.
	 */
	if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state)) {
		err = -ETIMEDOUT;
	} else if (mrq->data && mrq->data->error) {
		err = mrq->data->error;
	} else if (mrq->cmd && mrq->cmd->error) {
		/* DCMD commands */
	if (mrq->cmd && mrq->cmd->error) {
		err = mrq->cmd->error;

		/*
		 * Notify completion for non flush commands like discard
		 * that wait for DCMD finish.
		 */
		if (!(mrq->req->cmd_flags & REQ_FLUSH)) {
			complete(&mrq->completion);
			goto reset;
		}
		clear_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx_info->curr_state);
		blk_end_request_all(mrq->req, mrq->cmd->error);
	}

reset:
	spin_lock_irq(mq->queue->queue_lock);
	blk_queue_invalidate_tags(q);
	spin_unlock_irq(mq->queue->queue_lock);
	mmc_blk_cmdq_reset(host, true);
	goto out;

unhalt:
	mmc_blk_cmdq_reset_all(host, err);
	if (mrq->cmdq_req->resp_err)
		mrq->cmdq_req->resp_err = false;
	mmc_cmdq_halt(host, false);

out:
	host->err_mrq = NULL;
	pm_runtime_mark_last_busy(&card->dev);
	clear_bit(CMDQ_STATE_ERR, &ctx_info->curr_state);
	clear_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
	WARN_ON(!test_and_clear_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
	wake_up(&ctx_info->wait);
	__mmc_put_card(card);
}

/* invoked by block layer in softirq context */
@@ -3099,31 +3174,38 @@ void mmc_blk_cmdq_complete_rq(struct request *rq)
	else if (mrq->data && mrq->data->error)
		err = mrq->data->error;

	/* clear pending request */
	BUG_ON(!test_and_clear_bit(cmdq_req->tag,
				   &ctx_info->active_reqs));

	if (cmdq_req->cmdq_req_flags & DCMD)
		is_dcmd = true;
	else
		BUG_ON(!test_and_clear_bit(cmdq_req->tag,
			 &ctx_info->data_active_reqs));

	mmc_cmdq_post_req(host, cmdq_req->tag, err);
	if (err) {
		pr_err("%s: %s: txfr error: %d\n", mmc_hostname(mrq->host),
		       __func__, err);
	if (err || cmdq_req->resp_err) {
		pr_err("%s: %s: txfr error(%d)/resp_err(%d)\n",
				mmc_hostname(mrq->host), __func__, err,
				cmdq_req->resp_err);
		if (test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
			pr_err("%s: CQ in error state, ending current req: %d\n",
				__func__, err);
			blk_end_request_all(rq, err);
		} else {
			set_bit(CMDQ_STATE_ERR, &ctx_info->curr_state);
			BUG_ON(host->err_mrq != NULL);
			host->err_mrq = mrq;
			schedule_work(&mq->cmdq_err_work);
		}
		goto out;
	}
	/*
	 * In case of error CMDQ is expected to be either in halted
	 * or disable state so cannot receive any completion of
	 * other requests.
	 */
	BUG_ON(test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));

	/* clear pending request */
	BUG_ON(!test_and_clear_bit(cmdq_req->tag,
				   &ctx_info->active_reqs));
	if (cmdq_req->cmdq_req_flags & DCMD)
		is_dcmd = true;
	else
		BUG_ON(!test_and_clear_bit(cmdq_req->tag,
					 &ctx_info->data_active_reqs));

	mmc_cmdq_post_req(host, cmdq_req->tag, err);
	if (cmdq_req->cmdq_req_flags & DCMD) {
		clear_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx_info->curr_state);
		blk_end_request_all(rq, err);
@@ -3135,10 +3217,11 @@ void mmc_blk_cmdq_complete_rq(struct request *rq)
out:

	mmc_cmdq_clk_scaling_stop_busy(host, true, is_dcmd);
	if (!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state))
	if (!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
		wake_up(&ctx_info->wait);

		mmc_put_card(host->card);
	}

	if (!ctx_info->active_reqs)
		wake_up_interruptible(&host->cmdq_ctx.queue_empty_wq);

@@ -3153,7 +3236,6 @@ void mmc_blk_cmdq_req_done(struct mmc_request *mrq)
{
	struct request *req = mrq->req;

	mmc_host_clk_release(mrq->host);
	blk_complete_request(req);
}
EXPORT_SYMBOL(mmc_blk_cmdq_req_done);
+0 −1
Original line number Diff line number Diff line
@@ -882,7 +882,6 @@ skip_cqterri:
			mrq->cmdq_req->resp_arg = cmdq_readl(cq_host, CQCRA);
		}

		mmc->err_mrq = mrq;
		cmdq_finish_data(mmc, tag);
	}

+1 −0
Original line number Diff line number Diff line
@@ -256,6 +256,7 @@ struct mmc_cmdq_context_info {
#define	CMDQ_STATE_DCMD_ACTIVE 1
#define	CMDQ_STATE_HALT 2
#define	CMDQ_STATE_CQ_DISABLE 3
#define	CMDQ_STATE_REQ_TIMED_OUT 4
	wait_queue_head_t	queue_empty_wq;
	wait_queue_head_t	wait;
	int active_small_sector_read_reqs;