Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5d2ee712 authored by Jens Axboe's avatar Jens Axboe
Browse files

sbitmap: optimize wakeup check



Even if we have no waiters on any of the sbitmap_queue wait states, we
still have to loop every entry to check. We do this for every IO, so
the cost adds up.

Shift a bit of the cost to the slow path, when we actually have waiters.
Wrap prepare_to_wait_exclusive() and finish_wait(), so we can maintain
an internal count of how many are currently active. Then we can simply
check this count in sbq_wake_ptr() and not have to loop if we don't
have any sleepers.

Convert the two users of sbitmap with waiting, blk-mq-tag and iSCSI.

Reviewed-by: default avatarOmar Sandoval <osandov@fb.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent ea86ea2c
Loading
Loading
Loading
Loading
+5 −6
Original line number Diff line number Diff line
@@ -110,7 +110,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
	struct sbitmap_queue *bt;
	struct sbq_wait_state *ws;
	DEFINE_WAIT(wait);
	DEFINE_SBQ_WAIT(wait);
	unsigned int tag_offset;
	bool drop_ctx;
	int tag;
@@ -154,8 +154,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
		if (tag != -1)
			break;

		prepare_to_wait_exclusive(&ws->wait, &wait,
						TASK_UNINTERRUPTIBLE);
		sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);

		tag = __blk_mq_get_tag(data, bt);
		if (tag != -1)
@@ -167,6 +166,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
		bt_prev = bt;
		io_schedule();

		sbitmap_finish_wait(bt, ws, &wait);

		data->ctx = blk_mq_get_ctx(data->q);
		data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
						data->ctx->cpu);
@@ -176,8 +177,6 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
		else
			bt = &tags->bitmap_tags;

		finish_wait(&ws->wait, &wait);

		/*
		 * If destination hw queue is changed, fake wake up on
		 * previous queue for compensating the wake up miss, so
@@ -192,7 +191,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
	if (drop_ctx && data->ctx)
		blk_mq_put_ctx(data->ctx);

	finish_wait(&ws->wait, &wait);
	sbitmap_finish_wait(bt, ws, &wait);

found_tag:
	return tag + tag_offset;
+7 −5
Original line number Diff line number Diff line
@@ -150,24 +150,26 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd)
static int iscsit_wait_for_tag(struct se_session *se_sess, int state, int *cpup)
{
	int tag = -1;
	DEFINE_WAIT(wait);
	DEFINE_SBQ_WAIT(wait);
	struct sbq_wait_state *ws;
	struct sbitmap_queue *sbq;

	if (state == TASK_RUNNING)
		return tag;

	ws = &se_sess->sess_tag_pool.ws[0];
	sbq = &se_sess->sess_tag_pool;
	ws = &sbq->ws[0];
	for (;;) {
		prepare_to_wait_exclusive(&ws->wait, &wait, state);
		sbitmap_prepare_to_wait(sbq, ws, &wait, state);
		if (signal_pending_state(state, current))
			break;
		tag = sbitmap_queue_get(&se_sess->sess_tag_pool, cpup);
		tag = sbitmap_queue_get(sbq, cpup);
		if (tag >= 0)
			break;
		schedule();
	}

	finish_wait(&ws->wait, &wait);
	sbitmap_finish_wait(sbq, ws, &wait);
	return tag;
}

+34 −0
Original line number Diff line number Diff line
@@ -135,6 +135,11 @@ struct sbitmap_queue {
	 */
	struct sbq_wait_state *ws;

	/*
	 * @ws_active: count of currently active ws waitqueues
	 */
	atomic_t ws_active;

	/**
	 * @round_robin: Allocate bits in strict round-robin order.
	 */
@@ -552,4 +557,33 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
 */
void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);

struct sbq_wait {
	int accounted;
	struct wait_queue_entry wait;
};

#define DEFINE_SBQ_WAIT(name)							\
	struct sbq_wait name = {						\
		.accounted = 0,							\
		.wait = {							\
			.private	= current,				\
			.func		= autoremove_wake_function,		\
			.entry		= LIST_HEAD_INIT((name).wait.entry),	\
		}								\
	}

/*
 * Wrapper around prepare_to_wait_exclusive(), which maintains some extra
 * internal state.
 */
void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
				struct sbq_wait_state *ws,
				struct sbq_wait *sbq_wait, int state);

/*
 * Must be paired with sbitmap_prepare_to_wait().
 */
void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
				struct sbq_wait *sbq_wait);

#endif /* __LINUX_SCALE_BITMAP_H */
+28 −0
Original line number Diff line number Diff line
@@ -394,6 +394,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
	sbq->min_shallow_depth = UINT_MAX;
	sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
	atomic_set(&sbq->wake_index, 0);
	atomic_set(&sbq->ws_active, 0);

	sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
	if (!sbq->ws) {
@@ -509,6 +510,9 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
{
	int i, wake_index;

	if (!atomic_read(&sbq->ws_active))
		return NULL;

	wake_index = atomic_read(&sbq->wake_index);
	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
		struct sbq_wait_state *ws = &sbq->ws[wake_index];
@@ -634,6 +638,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)

	seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
	seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
	seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active));

	seq_puts(m, "ws={\n");
	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
@@ -649,3 +654,26 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
	seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_show);

void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
			     struct sbq_wait_state *ws,
			     struct sbq_wait *sbq_wait, int state)
{
	if (!sbq_wait->accounted) {
		atomic_inc(&sbq->ws_active);
		sbq_wait->accounted = 1;
	}
	prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state);
}
EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait);

void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
			 struct sbq_wait *sbq_wait)
{
	finish_wait(&ws->wait, &sbq_wait->wait);
	if (sbq_wait->accounted) {
		atomic_dec(&sbq->ws_active);
		sbq_wait->accounted = 0;
	}
}
EXPORT_SYMBOL_GPL(sbitmap_finish_wait);