Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d48ece20 authored by Jianchao Wang's avatar Jianchao Wang Committed by Jens Axboe
Browse files

blk-mq: init hctx sched after update ctx and hctx mapping



Currently, when update nr_hw_queues, IO scheduler's init_hctx will
be invoked before the mapping between ctx and hctx is adapted
correctly by blk_mq_map_swqueue. The IO scheduler init_hctx (kyber)
may depend on this mapping and get wrong result and panic finally.
A simply way to fix this is that switch the IO scheduler to 'none'
before update the nr_hw_queues, and then switch it back after
update nr_hw_queues. blk_mq_sched_init_/exit_hctx are removed due
to nobody use them any more.

Signed-off-by: default avatarJianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent fcedba42
Loading
Loading
Loading
Loading
+0 −44
Original line number Diff line number Diff line
@@ -462,50 +462,6 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
		blk_mq_sched_free_tags(set, hctx, i);
}

int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			   unsigned int hctx_idx)
{
	struct elevator_queue *e = q->elevator;
	int ret;

	if (!e)
		return 0;

	ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
	if (ret)
		return ret;

	if (e->type->ops.mq.init_hctx) {
		ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
		if (ret) {
			blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
			return ret;
		}
	}

	blk_mq_debugfs_register_sched_hctx(q, hctx);

	return 0;
}

void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			    unsigned int hctx_idx)
{
	struct elevator_queue *e = q->elevator;

	if (!e)
		return;

	blk_mq_debugfs_unregister_sched_hctx(hctx);

	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
		e->type->ops.mq.exit_hctx(hctx, hctx_idx);
		hctx->sched_data = NULL;
	}

	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
}

int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
	struct blk_mq_hw_ctx *hctx;
+0 −5
Original line number Diff line number Diff line
@@ -28,11 +28,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);

int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			   unsigned int hctx_idx);
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
			    unsigned int hctx_idx);

static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
+84 −8
Original line number Diff line number Diff line
@@ -2147,8 +2147,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
	if (set->ops->exit_request)
		set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);

	blk_mq_sched_exit_hctx(q, hctx, hctx_idx);

	if (set->ops->exit_hctx)
		set->ops->exit_hctx(hctx, hctx_idx);

@@ -2216,12 +2214,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
		goto free_bitmap;

	if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
		goto exit_hctx;

	hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
	if (!hctx->fq)
		goto sched_exit_hctx;
		goto exit_hctx;

	if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
		goto free_fq;
@@ -2235,8 +2230,6 @@ static int blk_mq_init_hctx(struct request_queue *q,

 free_fq:
	kfree(hctx->fq);
 sched_exit_hctx:
	blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
 exit_hctx:
	if (set->ops->exit_hctx)
		set->ops->exit_hctx(hctx, hctx_idx);
@@ -2898,10 +2891,81 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
	return ret;
}

/*
 * request_queue and elevator_type pair.
 * It is just used by __blk_mq_update_nr_hw_queues to cache
 * the elevator_type associated with a request_queue.
 */
struct blk_mq_qe_pair {
	struct list_head node;
	struct request_queue *q;
	struct elevator_type *type;
};

/*
 * Cache the elevator_type in qe pair list and switch the
 * io scheduler to 'none'
 */
static bool blk_mq_elv_switch_none(struct list_head *head,
		struct request_queue *q)
{
	struct blk_mq_qe_pair *qe;

	if (!q->elevator)
		return true;

	qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
	if (!qe)
		return false;

	INIT_LIST_HEAD(&qe->node);
	qe->q = q;
	qe->type = q->elevator->type;
	list_add(&qe->node, head);

	mutex_lock(&q->sysfs_lock);
	/*
	 * After elevator_switch_mq, the previous elevator_queue will be
	 * released by elevator_release. The reference of the io scheduler
	 * module get by elevator_get will also be put. So we need to get
	 * a reference of the io scheduler module here to prevent it to be
	 * removed.
	 */
	__module_get(qe->type->elevator_owner);
	elevator_switch_mq(q, NULL);
	mutex_unlock(&q->sysfs_lock);

	return true;
}

static void blk_mq_elv_switch_back(struct list_head *head,
		struct request_queue *q)
{
	struct blk_mq_qe_pair *qe;
	struct elevator_type *t = NULL;

	list_for_each_entry(qe, head, node)
		if (qe->q == q) {
			t = qe->type;
			break;
		}

	if (!t)
		return;

	list_del(&qe->node);
	kfree(qe);

	mutex_lock(&q->sysfs_lock);
	elevator_switch_mq(q, t);
	mutex_unlock(&q->sysfs_lock);
}

static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
							int nr_hw_queues)
{
	struct request_queue *q;
	LIST_HEAD(head);

	lockdep_assert_held(&set->tag_list_lock);

@@ -2912,6 +2976,14 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,

	list_for_each_entry(q, &set->tag_list, tag_set_list)
		blk_mq_freeze_queue(q);
	/*
	 * Switch IO scheduler to 'none', cleaning up the data associated
	 * with the previous scheduler. We will switch back once we are done
	 * updating the new sw to hw queue mappings.
	 */
	list_for_each_entry(q, &set->tag_list, tag_set_list)
		if (!blk_mq_elv_switch_none(&head, q))
			goto switch_back;

	set->nr_hw_queues = nr_hw_queues;
	blk_mq_update_queue_map(set);
@@ -2920,6 +2992,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
		blk_mq_queue_reinit(q);
	}

switch_back:
	list_for_each_entry(q, &set->tag_list, tag_set_list)
		blk_mq_elv_switch_back(&head, q);

	list_for_each_entry(q, &set->tag_list, tag_set_list)
		blk_mq_unfreeze_queue(q);
}
+2 −0
Original line number Diff line number Diff line
@@ -234,6 +234,8 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq

int elevator_init(struct request_queue *);
int elevator_init_mq(struct request_queue *q);
int elevator_switch_mq(struct request_queue *q,
			      struct elevator_type *new_e);
void elevator_exit(struct request_queue *, struct elevator_queue *);
int elv_register_queue(struct request_queue *q);
void elv_unregister_queue(struct request_queue *q);
+12 −8
Original line number Diff line number Diff line
@@ -933,16 +933,13 @@ void elv_unregister(struct elevator_type *e)
}
EXPORT_SYMBOL_GPL(elv_unregister);

static int elevator_switch_mq(struct request_queue *q,
int elevator_switch_mq(struct request_queue *q,
			      struct elevator_type *new_e)
{
	int ret;

	lockdep_assert_held(&q->sysfs_lock);

	blk_mq_freeze_queue(q);
	blk_mq_quiesce_queue(q);

	if (q->elevator) {
		if (q->elevator->registered)
			elv_unregister_queue(q);
@@ -968,8 +965,6 @@ static int elevator_switch_mq(struct request_queue *q,
		blk_add_trace_msg(q, "elv switch: none");

out:
	blk_mq_unquiesce_queue(q);
	blk_mq_unfreeze_queue(q);
	return ret;
}

@@ -1021,8 +1016,17 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)

	lockdep_assert_held(&q->sysfs_lock);

	if (q->mq_ops)
		return elevator_switch_mq(q, new_e);
	if (q->mq_ops) {
		blk_mq_freeze_queue(q);
		blk_mq_quiesce_queue(q);

		err = elevator_switch_mq(q, new_e);

		blk_mq_unquiesce_queue(q);
		blk_mq_unfreeze_queue(q);

		return err;
	}

	/*
	 * Turn on BYPASS and drain all requests w/ elevator private data.