Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f906a6a0 authored by Jens Axboe's avatar Jens Axboe
Browse files

blk-mq: improve tag waiting setup for non-shared tags



If we run out of driver tags, we currently treat shared and non-shared
tags the same - both cases hook into the tag waitqueue. This is a bit
more costly than it needs to be on unshared tags, since we have to both
grab the hctx lock, and the waitqueue lock (and disable interrupts).
For the non-shared case, we can simply mark the queue as needing a
restart.

Split blk_mq_dispatch_wait_add() to account for both cases, and
rename it to blk_mq_mark_tag_wait() to better reflect what it
does now.

Without this patch, shared and non-shared performance is about the same
with 4 fio thread hammering on a single null_blk device (~410K, at 75%
sys). With the patch, the shared case is the same, but the non-shared
tags case runs at 431K at 71% sys.

Reviewed-by: default avatarOmar Sandoval <osandov@fb.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 15f7b41f
Loading
Loading
Loading
Loading
+55 −26
Original line number Diff line number Diff line
@@ -1006,13 +1006,26 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
	return 1;
}

static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx **hctx,
/*
 * Mark us waiting for a tag. For shared tags, this involves hooking us into
 * the tag wakeups. For non-shared tags, we can simply mark us nedeing a
 * restart. For both caes, take care to check the condition again after
 * marking us as waiting.
 */
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
				 struct request *rq)
{
	struct blk_mq_hw_ctx *this_hctx = *hctx;
	wait_queue_entry_t *wait = &this_hctx->dispatch_wait;
	bool shared_tags = (this_hctx->flags & BLK_MQ_F_TAG_SHARED) != 0;
	struct sbq_wait_state *ws;
	wait_queue_entry_t *wait;
	bool ret;

	if (!shared_tags) {
		if (!test_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state))
			set_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state);
	} else {
		wait = &this_hctx->dispatch_wait;
		if (!list_empty_careful(&wait->entry))
			return false;

@@ -1024,13 +1037,23 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx **hctx,

		ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
		add_wait_queue(&ws->wait, wait);
	}

	/*
	 * It's possible that a tag was freed in the window between the
	 * allocation failure and adding the hardware queue to the wait
	 * queue.
	 */
	if (!blk_mq_get_driver_tag(rq, hctx, false)) {
	ret = blk_mq_get_driver_tag(rq, hctx, false);

	if (!shared_tags) {
		/*
		 * Don't clear RESTART here, someone else could have set it.
		 * At most this will cost an extra queue run.
		 */
		return ret;
	} else {
		if (!ret) {
			spin_unlock(&this_hctx->lock);
			return false;
		}
@@ -1045,6 +1068,7 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx **hctx,
		spin_unlock(&this_hctx->lock);
		return true;
	}
}

bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
			     bool got_budget)
@@ -1076,9 +1100,14 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
			 * before we add this entry back on the dispatch list,
			 * we'll re-run it below.
			 */
			if (!blk_mq_dispatch_wait_add(&hctx, rq)) {
			if (!blk_mq_mark_tag_wait(&hctx, rq)) {
				if (got_budget)
					blk_mq_put_dispatch_budget(hctx);
				/*
				 * For non-shared tags, the RESTART check
				 * will suffice.
				 */
				if (hctx->flags & BLK_MQ_F_TAG_SHARED)
					no_tag = true;
				break;
			}