Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1e8e55b6 authored by Adrian Hunter's avatar Adrian Hunter Committed by Ulf Hansson
Browse files

mmc: block: Add CQE support



Add CQE support to the block driver, including:
    - optionally using DCMD for flush requests
    - "manually" issuing discard requests
    - issuing read / write requests to the CQE
    - supporting block-layer timeouts
    - handling recovery
    - supporting re-tuning

CQE offers 25% - 50% better random multi-threaded I/O.  There is a slight
(e.g. 2%) drop in sequential read speed but no observable change to sequential
write.

CQE automatically sends the commands to complete requests.  However it only
supports reads / writes and so-called "direct commands" (DCMD).  Furthermore
DCMD is limited to one command at a time, but discards require 3 commands.
That makes issuing discards through CQE very awkward, but some CQE's don't
support DCMD anyway.  So for discards, the existing non-CQE approach is
taken, where the mmc core code issues the 3 commands one at a time i.e.
mmc_erase(). Where DCMD is used, is for issuing flushes.

Signed-off-by: default avatarAdrian Hunter <adrian.hunter@intel.com>
Acked-by: default avatarLinus Walleij <linus.walleij@linaro.org>
Signed-off-by: default avatarUlf Hansson <ulf.hansson@linaro.org>
Tested-by: default avatarLinus Walleij <linus.walleij@linaro.org>
parent 81196976
Loading
Loading
Loading
Loading
+148 −2
Original line number Diff line number Diff line
@@ -112,6 +112,7 @@ struct mmc_blk_data {
#define MMC_BLK_WRITE		BIT(1)
#define MMC_BLK_DISCARD		BIT(2)
#define MMC_BLK_SECDISCARD	BIT(3)
#define MMC_BLK_CQE_RECOVERY	BIT(4)

	/*
	 * Only set in main mmc_blk_data associated
@@ -1730,6 +1731,138 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq,
		*do_data_tag_p = do_data_tag;
}

#define MMC_CQE_RETRIES 2

static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
{
	struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
	struct mmc_request *mrq = &mqrq->brq.mrq;
	struct request_queue *q = req->q;
	struct mmc_host *host = mq->card->host;
	unsigned long flags;
	bool put_card;
	int err;

	mmc_cqe_post_req(host, mrq);

	if (mrq->cmd && mrq->cmd->error)
		err = mrq->cmd->error;
	else if (mrq->data && mrq->data->error)
		err = mrq->data->error;
	else
		err = 0;

	if (err) {
		if (mqrq->retries++ < MMC_CQE_RETRIES)
			blk_mq_requeue_request(req, true);
		else
			blk_mq_end_request(req, BLK_STS_IOERR);
	} else if (mrq->data) {
		if (blk_update_request(req, BLK_STS_OK, mrq->data->bytes_xfered))
			blk_mq_requeue_request(req, true);
		else
			__blk_mq_end_request(req, BLK_STS_OK);
	} else {
		blk_mq_end_request(req, BLK_STS_OK);
	}

	spin_lock_irqsave(q->queue_lock, flags);

	mq->in_flight[mmc_issue_type(mq, req)] -= 1;

	put_card = (mmc_tot_in_flight(mq) == 0);

	mmc_cqe_check_busy(mq);

	spin_unlock_irqrestore(q->queue_lock, flags);

	if (!mq->cqe_busy)
		blk_mq_run_hw_queues(q, true);

	if (put_card)
		mmc_put_card(mq->card, &mq->ctx);
}

void mmc_blk_cqe_recovery(struct mmc_queue *mq)
{
	struct mmc_card *card = mq->card;
	struct mmc_host *host = card->host;
	int err;

	pr_debug("%s: CQE recovery start\n", mmc_hostname(host));

	err = mmc_cqe_recovery(host);
	if (err)
		mmc_blk_reset(mq->blkdata, host, MMC_BLK_CQE_RECOVERY);
	else
		mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY);

	pr_debug("%s: CQE recovery done\n", mmc_hostname(host));
}

static void mmc_blk_cqe_req_done(struct mmc_request *mrq)
{
	struct mmc_queue_req *mqrq = container_of(mrq, struct mmc_queue_req,
						  brq.mrq);
	struct request *req = mmc_queue_req_to_req(mqrq);
	struct request_queue *q = req->q;
	struct mmc_queue *mq = q->queuedata;

	/*
	 * Block layer timeouts race with completions which means the normal
	 * completion path cannot be used during recovery.
	 */
	if (mq->in_recovery)
		mmc_blk_cqe_complete_rq(mq, req);
	else
		blk_mq_complete_request(req);
}

static int mmc_blk_cqe_start_req(struct mmc_host *host, struct mmc_request *mrq)
{
	mrq->done		= mmc_blk_cqe_req_done;
	mrq->recovery_notifier	= mmc_cqe_recovery_notifier;

	return mmc_cqe_start_req(host, mrq);
}

static struct mmc_request *mmc_blk_cqe_prep_dcmd(struct mmc_queue_req *mqrq,
						 struct request *req)
{
	struct mmc_blk_request *brq = &mqrq->brq;

	memset(brq, 0, sizeof(*brq));

	brq->mrq.cmd = &brq->cmd;
	brq->mrq.tag = req->tag;

	return &brq->mrq;
}

static int mmc_blk_cqe_issue_flush(struct mmc_queue *mq, struct request *req)
{
	struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
	struct mmc_request *mrq = mmc_blk_cqe_prep_dcmd(mqrq, req);

	mrq->cmd->opcode = MMC_SWITCH;
	mrq->cmd->arg = (MMC_SWITCH_MODE_WRITE_BYTE << 24) |
			(EXT_CSD_FLUSH_CACHE << 16) |
			(1 << 8) |
			EXT_CSD_CMD_SET_NORMAL;
	mrq->cmd->flags = MMC_CMD_AC | MMC_RSP_R1B;

	return mmc_blk_cqe_start_req(mq->card->host, mrq);
}

static int mmc_blk_cqe_issue_rw_rq(struct mmc_queue *mq, struct request *req)
{
	struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);

	mmc_blk_data_prep(mq, mqrq, 0, NULL, NULL);

	return mmc_blk_cqe_start_req(mq->card->host, &mqrq->brq.mrq);
}

static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
			       struct mmc_card *card,
			       int disable_multi,
@@ -2038,6 +2171,9 @@ void mmc_blk_mq_complete(struct request *req)
{
	struct mmc_queue *mq = req->q->queuedata;

	if (mq->use_cqe)
		mmc_blk_cqe_complete_rq(mq, req);
	else
		mmc_blk_mq_complete_rq(mq, req);
}

@@ -2212,6 +2348,9 @@ static int mmc_blk_mq_issue_rw_rq(struct mmc_queue *mq,

static int mmc_blk_wait_for_idle(struct mmc_queue *mq, struct mmc_host *host)
{
	if (mq->use_cqe)
		return host->cqe_ops->cqe_wait_for_idle(host);

	return mmc_blk_rw_wait(mq, NULL);
}

@@ -2250,10 +2389,17 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req)
			return MMC_REQ_FAILED_TO_START;
		}
		return MMC_REQ_FINISHED;
	case MMC_ISSUE_DCMD:
	case MMC_ISSUE_ASYNC:
		switch (req_op(req)) {
		case REQ_OP_FLUSH:
			ret = mmc_blk_cqe_issue_flush(mq, req);
			break;
		case REQ_OP_READ:
		case REQ_OP_WRITE:
			if (mq->use_cqe)
				ret = mmc_blk_cqe_issue_rw_rq(mq, req);
			else
				ret = mmc_blk_mq_issue_rw_rq(mq, req);
			break;
		default:
+2 −0
Original line number Diff line number Diff line
@@ -7,6 +7,8 @@ struct request;

void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req);

void mmc_blk_cqe_recovery(struct mmc_queue *mq);

enum mmc_issued;

enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req);
+158 −4
Original line number Diff line number Diff line
@@ -40,18 +40,142 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
	return BLKPREP_OK;
}

static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq)
{
	/* Allow only 1 DCMD at a time */
	return mq->in_flight[MMC_ISSUE_DCMD];
}

void mmc_cqe_check_busy(struct mmc_queue *mq)
{
	if ((mq->cqe_busy & MMC_CQE_DCMD_BUSY) && !mmc_cqe_dcmd_busy(mq))
		mq->cqe_busy &= ~MMC_CQE_DCMD_BUSY;

	mq->cqe_busy &= ~MMC_CQE_QUEUE_FULL;
}

static inline bool mmc_cqe_can_dcmd(struct mmc_host *host)
{
	return host->caps2 & MMC_CAP2_CQE_DCMD;
}

enum mmc_issue_type mmc_cqe_issue_type(struct mmc_host *host,
				       struct request *req)
{
	switch (req_op(req)) {
	case REQ_OP_DRV_IN:
	case REQ_OP_DRV_OUT:
	case REQ_OP_DISCARD:
	case REQ_OP_SECURE_ERASE:
		return MMC_ISSUE_SYNC;
	case REQ_OP_FLUSH:
		return mmc_cqe_can_dcmd(host) ? MMC_ISSUE_DCMD : MMC_ISSUE_SYNC;
	default:
		return MMC_ISSUE_ASYNC;
	}
}

enum mmc_issue_type mmc_issue_type(struct mmc_queue *mq, struct request *req)
{
	struct mmc_host *host = mq->card->host;

	if (mq->use_cqe)
		return mmc_cqe_issue_type(host, req);

	if (req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_WRITE)
		return MMC_ISSUE_ASYNC;

	return MMC_ISSUE_SYNC;
}

static void __mmc_cqe_recovery_notifier(struct mmc_queue *mq)
{
	if (!mq->recovery_needed) {
		mq->recovery_needed = true;
		schedule_work(&mq->recovery_work);
	}
}

void mmc_cqe_recovery_notifier(struct mmc_request *mrq)
{
	struct mmc_queue_req *mqrq = container_of(mrq, struct mmc_queue_req,
						  brq.mrq);
	struct request *req = mmc_queue_req_to_req(mqrq);
	struct request_queue *q = req->q;
	struct mmc_queue *mq = q->queuedata;
	unsigned long flags;

	spin_lock_irqsave(q->queue_lock, flags);
	__mmc_cqe_recovery_notifier(mq);
	spin_unlock_irqrestore(q->queue_lock, flags);
}

static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
{
	struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
	struct mmc_request *mrq = &mqrq->brq.mrq;
	struct mmc_queue *mq = req->q->queuedata;
	struct mmc_host *host = mq->card->host;
	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
	bool recovery_needed = false;

	switch (issue_type) {
	case MMC_ISSUE_ASYNC:
	case MMC_ISSUE_DCMD:
		if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) {
			if (recovery_needed)
				__mmc_cqe_recovery_notifier(mq);
			return BLK_EH_RESET_TIMER;
		}
		/* No timeout */
		return BLK_EH_HANDLED;
	default:
		/* Timeout is handled by mmc core */
		return BLK_EH_RESET_TIMER;
	}
}

static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
						 bool reserved)
{
	return BLK_EH_RESET_TIMER;
	struct request_queue *q = req->q;
	struct mmc_queue *mq = q->queuedata;
	unsigned long flags;
	int ret;

	spin_lock_irqsave(q->queue_lock, flags);

	if (mq->recovery_needed || !mq->use_cqe)
		ret = BLK_EH_RESET_TIMER;
	else
		ret = mmc_cqe_timed_out(req);

	spin_unlock_irqrestore(q->queue_lock, flags);

	return ret;
}

static void mmc_mq_recovery_handler(struct work_struct *work)
{
	struct mmc_queue *mq = container_of(work, struct mmc_queue,
					    recovery_work);
	struct request_queue *q = mq->queue;

	mmc_get_card(mq->card, &mq->ctx);

	mq->in_recovery = true;

	mmc_blk_cqe_recovery(mq);

	mq->in_recovery = false;

	spin_lock_irq(q->queue_lock);
	mq->recovery_needed = false;
	spin_unlock_irq(q->queue_lock);

	mmc_put_card(mq->card, &mq->ctx);

	blk_mq_run_hw_queues(q, true);
}

static int mmc_queue_thread(void *d)
@@ -223,9 +347,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
	struct request_queue *q = req->q;
	struct mmc_queue *mq = q->queuedata;
	struct mmc_card *card = mq->card;
	struct mmc_host *host = card->host;
	enum mmc_issue_type issue_type;
	enum mmc_issued issued;
	bool get_card;
	bool get_card, cqe_retune_ok;
	int ret;

	if (mmc_card_removed(mq->card)) {
@@ -237,7 +362,19 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,

	spin_lock_irq(q->queue_lock);

	if (mq->recovery_needed) {
		spin_unlock_irq(q->queue_lock);
		return BLK_STS_RESOURCE;
	}

	switch (issue_type) {
	case MMC_ISSUE_DCMD:
		if (mmc_cqe_dcmd_busy(mq)) {
			mq->cqe_busy |= MMC_CQE_DCMD_BUSY;
			spin_unlock_irq(q->queue_lock);
			return BLK_STS_RESOURCE;
		}
		break;
	case MMC_ISSUE_ASYNC:
		break;
	default:
@@ -254,6 +391,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,

	mq->in_flight[issue_type] += 1;
	get_card = (mmc_tot_in_flight(mq) == 1);
	cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1);

	spin_unlock_irq(q->queue_lock);

@@ -265,6 +403,11 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
	if (get_card)
		mmc_get_card(card, &mq->ctx);

	if (mq->use_cqe) {
		host->retune_now = host->need_retune && cqe_retune_ok &&
				   !host->hold_retune;
	}

	blk_mq_start_request(req);

	issued = mmc_blk_mq_issue_rq(mq, req);
@@ -326,6 +469,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
	/* Initialize thread_sem even if it is not used */
	sema_init(&mq->thread_sem, 1);

	INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler);
	INIT_WORK(&mq->complete_work, mmc_blk_mq_complete_work);

	mutex_init(&mq->complete_lock);
@@ -375,9 +519,17 @@ static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth,
static int mmc_mq_init(struct mmc_queue *mq, struct mmc_card *card,
			 spinlock_t *lock)
{
	struct mmc_host *host = card->host;
	int q_depth;
	int ret;

	/*
	 * The queue depth for CQE must match the hardware because the request
	 * tag is used to index the hardware queue.
	 */
	if (mq->use_cqe)
		q_depth = min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth);
	else
		q_depth = MMC_QUEUE_DEPTH;

	ret = mmc_mq_init_queue(mq, q_depth, &mmc_mq_ops, lock);
@@ -408,7 +560,9 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,

	mq->card = card;

	if (mmc_host_use_blk_mq(host))
	mq->use_cqe = host->cqe_enabled;

	if (mq->use_cqe || mmc_host_use_blk_mq(host))
		return mmc_mq_init(mq, card, lock);

	mq->queue = blk_alloc_queue(GFP_KERNEL);
+18 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ enum mmc_issued {

enum mmc_issue_type {
	MMC_ISSUE_SYNC,
	MMC_ISSUE_DCMD,
	MMC_ISSUE_ASYNC,
	MMC_ISSUE_MAX,
};
@@ -92,8 +93,15 @@ struct mmc_queue {
	int			qcnt;

	int			in_flight[MMC_ISSUE_MAX];
	unsigned int		cqe_busy;
#define MMC_CQE_DCMD_BUSY	BIT(0)
#define MMC_CQE_QUEUE_FULL	BIT(1)
	bool			use_cqe;
	bool			recovery_needed;
	bool			in_recovery;
	bool			rw_wait;
	bool			waiting;
	struct work_struct	recovery_work;
	wait_queue_head_t	wait;
	struct request		*complete_req;
	struct mutex		complete_lock;
@@ -108,11 +116,21 @@ extern void mmc_queue_resume(struct mmc_queue *);
extern unsigned int mmc_queue_map_sg(struct mmc_queue *,
				     struct mmc_queue_req *);

void mmc_cqe_check_busy(struct mmc_queue *mq);
void mmc_cqe_recovery_notifier(struct mmc_request *mrq);

enum mmc_issue_type mmc_issue_type(struct mmc_queue *mq, struct request *req);

static inline int mmc_tot_in_flight(struct mmc_queue *mq)
{
	return mq->in_flight[MMC_ISSUE_SYNC] +
	       mq->in_flight[MMC_ISSUE_DCMD] +
	       mq->in_flight[MMC_ISSUE_ASYNC];
}

static inline int mmc_cqe_qcnt(struct mmc_queue *mq)
{
	return mq->in_flight[MMC_ISSUE_DCMD] +
	       mq->in_flight[MMC_ISSUE_ASYNC];
}