Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e4043dcf authored by Jens Axboe's avatar Jens Axboe
Browse files

blk-mq: ensure that hardware queues are always run on the mapped CPUs



Instead of providing soft mappings with no guarantees on hardware
queues always being run on the right CPU, switch to a hard mapping
guarantee that ensure that we always run the hardware queue on
(one of, if more) the mapped CPU.

Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 8ab14595
Loading
Loading
Loading
Loading
+51 −15
Original line number Diff line number Diff line
@@ -209,11 +209,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
			break;
		}

		if (gfp & __GFP_WAIT) {
			__blk_mq_run_hw_queue(hctx);
			blk_mq_put_ctx(ctx);
		} else {
			blk_mq_put_ctx(ctx);
		if (!(gfp & __GFP_WAIT))
			break;
		}

		__blk_mq_run_hw_queue(hctx);
		blk_mq_wait_for_tags(hctx->tags);
	} while (1);

@@ -514,6 +517,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
	LIST_HEAD(rq_list);
	int bit, queued;

	WARN_ON(!preempt_count());

	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
		return;

@@ -606,10 +611,22 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
		return;

	if (!async)
	if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask))
		__blk_mq_run_hw_queue(hctx);
	else
	else if (hctx->queue->nr_hw_queues == 1)
		kblockd_schedule_delayed_work(&hctx->delayed_work, 0);
	else {
		unsigned int cpu;

		/*
		 * It'd be great if the workqueue API had a way to pass
		 * in a mask and had some smarts for more clever placement
		 * than the first CPU. Or we could round-robin here. For now,
		 * just queue on the first CPU.
		 */
		cpu = cpumask_first(hctx->cpumask);
		kblockd_schedule_delayed_work_on(cpu, &hctx->delayed_work, 0);
	}
}

void blk_mq_run_queues(struct request_queue *q, bool async)
@@ -623,7 +640,9 @@ void blk_mq_run_queues(struct request_queue *q, bool async)
		    test_bit(BLK_MQ_S_STOPPED, &hctx->state))
			continue;

		preempt_disable();
		blk_mq_run_hw_queue(hctx, async);
		preempt_enable();
	}
}
EXPORT_SYMBOL(blk_mq_run_queues);
@@ -648,7 +667,10 @@ EXPORT_SYMBOL(blk_mq_stop_hw_queues);
void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
{
	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);

	preempt_disable();
	__blk_mq_run_hw_queue(hctx);
	preempt_enable();
}
EXPORT_SYMBOL(blk_mq_start_hw_queue);

@@ -662,7 +684,9 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q)
			continue;

		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
		preempt_disable();
		blk_mq_run_hw_queue(hctx, true);
		preempt_enable();
	}
}
EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
@@ -672,7 +696,10 @@ static void blk_mq_work_fn(struct work_struct *work)
	struct blk_mq_hw_ctx *hctx;

	hctx = container_of(work, struct blk_mq_hw_ctx, delayed_work.work);

	preempt_disable();
	__blk_mq_run_hw_queue(hctx);
	preempt_enable();
}

static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
@@ -716,10 +743,10 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
		spin_unlock(&ctx->lock);
	}

	blk_mq_put_ctx(current_ctx);

	if (run_queue)
		blk_mq_run_hw_queue(hctx, async);

	blk_mq_put_ctx(current_ctx);
}

static void blk_mq_insert_requests(struct request_queue *q,
@@ -755,9 +782,8 @@ static void blk_mq_insert_requests(struct request_queue *q,
	}
	spin_unlock(&ctx->lock);

	blk_mq_put_ctx(current_ctx);

	blk_mq_run_hw_queue(hctx, from_schedule);
	blk_mq_put_ctx(current_ctx);
}

static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -876,7 +902,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)

	if (unlikely(is_flush_fua)) {
		blk_mq_bio_to_request(rq, bio);
		blk_mq_put_ctx(ctx);
		blk_insert_flush(rq);
		goto run_queue;
	}
@@ -914,7 +939,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
	}

	spin_unlock(&ctx->lock);
	blk_mq_put_ctx(ctx);

	/*
	 * For a SYNC request, send it to the hardware immediately. For an
@@ -923,6 +947,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
	 */
run_queue:
	blk_mq_run_hw_queue(hctx, !is_sync || is_flush_fua);
	blk_mq_put_ctx(ctx);
}

/*
@@ -990,9 +1015,9 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
	blk_mq_hctx_mark_pending(hctx, ctx);

	spin_unlock(&ctx->lock);
	blk_mq_put_ctx(ctx);

	blk_mq_run_hw_queue(hctx, true);
	blk_mq_put_ctx(ctx);
}

static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx,
@@ -1255,12 +1280,13 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
		__ctx->queue = q;

		/* If the cpu isn't online, the cpu is mapped to first hctx */
		hctx = q->mq_ops->map_queue(q, i);
		hctx->nr_ctx++;

		if (!cpu_online(i))
			continue;

		hctx = q->mq_ops->map_queue(q, i);
		cpumask_set_cpu(i, hctx->cpumask);
		hctx->nr_ctx++;

		/*
		 * Set local node, IFF we have more than one hw queue. If
		 * not, we remain on the home node of the device
@@ -1277,6 +1303,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
	struct blk_mq_ctx *ctx;

	queue_for_each_hw_ctx(q, hctx, i) {
		cpumask_clear(hctx->cpumask);
		hctx->nr_ctx = 0;
	}

@@ -1285,7 +1312,11 @@ static void blk_mq_map_swqueue(struct request_queue *q)
	 */
	queue_for_each_ctx(q, ctx, i) {
		/* If the cpu isn't online, the cpu is mapped to first hctx */
		if (!cpu_online(i))
			continue;

		hctx = q->mq_ops->map_queue(q, i);
		cpumask_set_cpu(i, hctx->cpumask);
		ctx->index_hw = hctx->nr_ctx;
		hctx->ctxs[hctx->nr_ctx++] = ctx;
	}
@@ -1329,6 +1360,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
		if (!hctxs[i])
			goto err_hctxs;

		if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL))
			goto err_hctxs;

		hctxs[i]->numa_node = NUMA_NO_NODE;
		hctxs[i]->queue_num = i;
	}
@@ -1392,6 +1426,7 @@ err_hctxs:
	for (i = 0; i < reg->nr_hw_queues; i++) {
		if (!hctxs[i])
			break;
		free_cpumask_var(hctxs[i]->cpumask);
		reg->ops->free_hctx(hctxs[i], i);
	}
	kfree(hctxs);
@@ -1413,6 +1448,7 @@ void blk_mq_free_queue(struct request_queue *q)
		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
		if (q->mq_ops->exit_hctx)
			q->mq_ops->exit_hctx(hctx, i);
		free_cpumask_var(hctx->cpumask);
		q->mq_ops->free_hctx(hctx, i);
	}

+1 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ struct blk_mq_hw_ctx {

	unsigned long		state;		/* BLK_MQ_S_* flags */
	struct delayed_work	delayed_work;
	cpumask_var_t		cpumask;

	unsigned long		flags;		/* BLK_MQ_F_* flags */