Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e3a2b3f9 authored by Jens Axboe's avatar Jens Axboe
Browse files

blk-mq: allow changing of queue depth through sysfs



For request_fn based devices, the block layer exports a 'nr_requests'
file through sysfs to allow adjusting of queue depth on the fly.
Currently this returns -EINVAL for blk-mq, since it's not wired up.
Wire this up for blk-mq, so that it now also always dynamic
adjustments of the allowed queue depth for any given block device
managed by blk-mq.

Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 64b14519
Loading
Loading
Loading
Loading
+41 −0
Original line number Diff line number Diff line
@@ -848,6 +848,47 @@ static void freed_request(struct request_list *rl, unsigned int flags)
		__freed_request(rl, sync ^ 1);
}

int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
{
	struct request_list *rl;

	spin_lock_irq(q->queue_lock);
	q->nr_requests = nr;
	blk_queue_congestion_threshold(q);

	/* congestion isn't cgroup aware and follows root blkcg for now */
	rl = &q->root_rl;

	if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
		blk_set_queue_congested(q, BLK_RW_SYNC);
	else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
		blk_clear_queue_congested(q, BLK_RW_SYNC);

	if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
		blk_set_queue_congested(q, BLK_RW_ASYNC);
	else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
		blk_clear_queue_congested(q, BLK_RW_ASYNC);

	blk_queue_for_each_rl(rl, q) {
		if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
			blk_set_rl_full(rl, BLK_RW_SYNC);
		} else {
			blk_clear_rl_full(rl, BLK_RW_SYNC);
			wake_up(&rl->wait[BLK_RW_SYNC]);
		}

		if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
			blk_set_rl_full(rl, BLK_RW_ASYNC);
		} else {
			blk_clear_rl_full(rl, BLK_RW_ASYNC);
			wake_up(&rl->wait[BLK_RW_ASYNC]);
		}
	}

	spin_unlock_irq(q->queue_lock);
	return 0;
}

/*
 * Determine if elevator data should be initialized when allocating the
 * request associated with @bio.
+57 −23
Original line number Diff line number Diff line
@@ -57,23 +57,13 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
}

/*
 * If a previously busy queue goes inactive, potential waiters could now
 * be allowed to queue. Wake them up and check.
 * Wakeup all potentially sleeping on normal (non-reserved) tags
 */
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
{
	struct blk_mq_tags *tags = hctx->tags;
	struct blk_mq_bitmap_tags *bt;
	int i, wake_index;

	if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
		return;

	atomic_dec(&tags->active_queues);

	/*
	 * Will only throttle depth on non-reserved tags
	 */
	bt = &tags->bitmap_tags;
	wake_index = bt->wake_index;
	for (i = 0; i < BT_WAIT_QUEUES; i++) {
@@ -86,6 +76,22 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
	}
}

/*
 * If a previously busy queue goes inactive, potential waiters could now
 * be allowed to queue. Wake them up and check.
 */
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
	struct blk_mq_tags *tags = hctx->tags;

	if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
		return;

	atomic_dec(&tags->active_queues);

	blk_mq_tag_wakeup_all(tags);
}

/*
 * For shared tag users, we track the number of currently active users
 * and attempt to provide a fair share of the tag depth for each of them.
@@ -408,6 +414,28 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
	return bt->depth - used;
}

static void bt_update_count(struct blk_mq_bitmap_tags *bt,
			    unsigned int depth)
{
	unsigned int tags_per_word = 1U << bt->bits_per_word;
	unsigned int map_depth = depth;

	if (depth) {
		int i;

		for (i = 0; i < bt->map_nr; i++) {
			bt->map[i].depth = min(map_depth, tags_per_word);
			map_depth -= bt->map[i].depth;
		}
	}

	bt->wake_cnt = BT_WAIT_BATCH;
	if (bt->wake_cnt > depth / 4)
		bt->wake_cnt = max(1U, depth / 4);

	bt->depth = depth;
}

static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
			int node, bool reserved)
{
@@ -420,7 +448,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
	 * condition.
	 */
	if (depth) {
		unsigned int nr, i, map_depth, tags_per_word;
		unsigned int nr, tags_per_word;

		tags_per_word = (1 << bt->bits_per_word);

@@ -444,11 +472,6 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
			return -ENOMEM;

		bt->map_nr = nr;
		map_depth = depth;
		for (i = 0; i < nr; i++) {
			bt->map[i].depth = min(map_depth, tags_per_word);
			map_depth -= tags_per_word;
		}
	}

	bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
@@ -460,11 +483,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
	for (i = 0; i < BT_WAIT_QUEUES; i++)
		init_waitqueue_head(&bt->bs[i].wait);

	bt->wake_cnt = BT_WAIT_BATCH;
	if (bt->wake_cnt > depth / 4)
		bt->wake_cnt = max(1U, depth / 4);

	bt->depth = depth;
	bt_update_count(bt, depth);
	return 0;
}

@@ -525,6 +544,21 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag)
	*tag = prandom_u32() % depth;
}

int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
{
	tdepth -= tags->nr_reserved_tags;
	if (tdepth > tags->nr_tags)
		return -EINVAL;

	/*
	 * Don't need (or can't) update reserved tags here, they remain
	 * static and should never need resizing.
	 */
	bt_update_count(&tags->bitmap_tags, tdepth);
	blk_mq_tag_wakeup_all(tags);
	return 0;
}

ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
{
	char *orig_page = page;
+1 −0
Original line number Diff line number Diff line
@@ -55,6 +55,7 @@ extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);

enum {
	BLK_MQ_TAG_CACHE_MIN	= 1,
+22 −0
Original line number Diff line number Diff line
@@ -1789,6 +1789,28 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
}
EXPORT_SYMBOL(blk_mq_free_tag_set);

int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
{
	struct blk_mq_tag_set *set = q->tag_set;
	struct blk_mq_hw_ctx *hctx;
	int i, ret;

	if (!set || nr > set->queue_depth)
		return -EINVAL;

	ret = 0;
	queue_for_each_hw_ctx(q, hctx, i) {
		ret = blk_mq_tag_update_depth(hctx->tags, nr);
		if (ret)
			break;
	}

	if (!ret)
		q->nr_requests = nr;

	return ret;
}

void blk_mq_disable_hotplug(void)
{
	mutex_lock(&all_q_mutex);
+1 −0
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@ void blk_mq_drain_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
void blk_mq_clone_flush_request(struct request *flush_rq,
		struct request *orig_rq);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);

/*
 * CPU hotplug helpers
Loading