Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1cf7e9c6 authored by Jens Axboe's avatar Jens Axboe
Browse files

virtio_blk: blk-mq support



Switch virtio-blk from the dual support for old-style requests and bios
to use the block-multiqueue.

Acked-by: default avatarAsias He <asias@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 1355b37f
Loading
Loading
Loading
Loading
+65 −257
Original line number Original line Diff line number Diff line
@@ -11,12 +11,11 @@
#include <linux/string_helpers.h>
#include <linux/string_helpers.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_cmnd.h>
#include <linux/idr.h>
#include <linux/idr.h>
#include <linux/blk-mq.h>
#include <linux/numa.h>


#define PART_BITS 4
#define PART_BITS 4


static bool use_bio;
module_param(use_bio, bool, S_IRUGO);

static int major;
static int major;
static DEFINE_IDA(vd_index_ida);
static DEFINE_IDA(vd_index_ida);


@@ -26,13 +25,11 @@ struct virtio_blk
{
{
	struct virtio_device *vdev;
	struct virtio_device *vdev;
	struct virtqueue *vq;
	struct virtqueue *vq;
	wait_queue_head_t queue_wait;
	spinlock_t vq_lock;


	/* The disk structure for the kernel. */
	/* The disk structure for the kernel. */
	struct gendisk *disk;
	struct gendisk *disk;


	mempool_t *pool;

	/* Process context for config space updates */
	/* Process context for config space updates */
	struct work_struct config_work;
	struct work_struct config_work;


@@ -47,31 +44,17 @@ struct virtio_blk


	/* Ida index - used to track minor number allocations. */
	/* Ida index - used to track minor number allocations. */
	int index;
	int index;

	/* Scatterlist: can be too big for stack. */
	struct scatterlist sg[/*sg_elems*/];
};
};


struct virtblk_req
struct virtblk_req
{
{
	struct request *req;
	struct request *req;
	struct bio *bio;
	struct virtio_blk_outhdr out_hdr;
	struct virtio_blk_outhdr out_hdr;
	struct virtio_scsi_inhdr in_hdr;
	struct virtio_scsi_inhdr in_hdr;
	struct work_struct work;
	struct virtio_blk *vblk;
	int flags;
	u8 status;
	u8 status;
	struct scatterlist sg[];
	struct scatterlist sg[];
};
};


enum {
	VBLK_IS_FLUSH		= 1,
	VBLK_REQ_FLUSH		= 2,
	VBLK_REQ_DATA		= 4,
	VBLK_REQ_FUA		= 8,
};

static inline int virtblk_result(struct virtblk_req *vbr)
static inline int virtblk_result(struct virtblk_req *vbr)
{
{
	switch (vbr->status) {
	switch (vbr->status) {
@@ -84,22 +67,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
	}
	}
}
}


static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
						    gfp_t gfp_mask)
{
	struct virtblk_req *vbr;

	vbr = mempool_alloc(vblk->pool, gfp_mask);
	if (!vbr)
		return NULL;

	vbr->vblk = vblk;
	if (use_bio)
		sg_init_table(vbr->sg, vblk->sg_elems);

	return vbr;
}

static int __virtblk_add_req(struct virtqueue *vq,
static int __virtblk_add_req(struct virtqueue *vq,
			     struct virtblk_req *vbr,
			     struct virtblk_req *vbr,
			     struct scatterlist *data_sg,
			     struct scatterlist *data_sg,
@@ -143,83 +110,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
}
}


static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
{
	struct virtio_blk *vblk = vbr->vblk;
	DEFINE_WAIT(wait);
	int ret;

	spin_lock_irq(vblk->disk->queue->queue_lock);
	while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
						 have_data)) < 0)) {
		prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
					  TASK_UNINTERRUPTIBLE);

		spin_unlock_irq(vblk->disk->queue->queue_lock);
		io_schedule();
		spin_lock_irq(vblk->disk->queue->queue_lock);

		finish_wait(&vblk->queue_wait, &wait);
	}

	virtqueue_kick(vblk->vq);
	spin_unlock_irq(vblk->disk->queue->queue_lock);
}

static void virtblk_bio_send_flush(struct virtblk_req *vbr)
{
	vbr->flags |= VBLK_IS_FLUSH;
	vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
	vbr->out_hdr.sector = 0;
	vbr->out_hdr.ioprio = 0;

	virtblk_add_req(vbr, false);
}

static void virtblk_bio_send_data(struct virtblk_req *vbr)
{
	struct virtio_blk *vblk = vbr->vblk;
	struct bio *bio = vbr->bio;
	bool have_data;

	vbr->flags &= ~VBLK_IS_FLUSH;
	vbr->out_hdr.type = 0;
	vbr->out_hdr.sector = bio->bi_sector;
	vbr->out_hdr.ioprio = bio_prio(bio);

	if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
		have_data = true;
		if (bio->bi_rw & REQ_WRITE)
			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
		else
			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
	} else
		have_data = false;

	virtblk_add_req(vbr, have_data);
}

static void virtblk_bio_send_data_work(struct work_struct *work)
{
	struct virtblk_req *vbr;

	vbr = container_of(work, struct virtblk_req, work);

	virtblk_bio_send_data(vbr);
}

static void virtblk_bio_send_flush_work(struct work_struct *work)
{
	struct virtblk_req *vbr;

	vbr = container_of(work, struct virtblk_req, work);

	virtblk_bio_send_flush(vbr);
}

static inline void virtblk_request_done(struct virtblk_req *vbr)
static inline void virtblk_request_done(struct virtblk_req *vbr)
{
{
	struct virtio_blk *vblk = vbr->vblk;
	struct request *req = vbr->req;
	struct request *req = vbr->req;
	int error = virtblk_result(vbr);
	int error = virtblk_result(vbr);


@@ -231,90 +123,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
		req->errors = (error != 0);
		req->errors = (error != 0);
	}
	}


	__blk_end_request_all(req, error);
	blk_mq_end_io(req, error);
	mempool_free(vbr, vblk->pool);
}

static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
{
	struct virtio_blk *vblk = vbr->vblk;

	if (vbr->flags & VBLK_REQ_DATA) {
		/* Send out the actual write data */
		INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
		queue_work(virtblk_wq, &vbr->work);
	} else {
		bio_endio(vbr->bio, virtblk_result(vbr));
		mempool_free(vbr, vblk->pool);
	}
}

static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
{
	struct virtio_blk *vblk = vbr->vblk;

	if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
		/* Send out a flush before end the bio */
		vbr->flags &= ~VBLK_REQ_DATA;
		INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
		queue_work(virtblk_wq, &vbr->work);
	} else {
		bio_endio(vbr->bio, virtblk_result(vbr));
		mempool_free(vbr, vblk->pool);
	}
}

static inline void virtblk_bio_done(struct virtblk_req *vbr)
{
	if (unlikely(vbr->flags & VBLK_IS_FLUSH))
		virtblk_bio_flush_done(vbr);
	else
		virtblk_bio_data_done(vbr);
}
}


static void virtblk_done(struct virtqueue *vq)
static void virtblk_done(struct virtqueue *vq)
{
{
	struct virtio_blk *vblk = vq->vdev->priv;
	struct virtio_blk *vblk = vq->vdev->priv;
	bool bio_done = false, req_done = false;
	bool req_done = false;
	struct virtblk_req *vbr;
	struct virtblk_req *vbr;
	unsigned long flags;
	unsigned long flags;
	unsigned int len;
	unsigned int len;


	spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
	spin_lock_irqsave(&vblk->vq_lock, flags);
	do {
	do {
		virtqueue_disable_cb(vq);
		virtqueue_disable_cb(vq);
		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
			if (vbr->bio) {
				virtblk_bio_done(vbr);
				bio_done = true;
			} else {
			virtblk_request_done(vbr);
			virtblk_request_done(vbr);
			req_done = true;
			req_done = true;
		}
		}
		}
	} while (!virtqueue_enable_cb(vq));
	} while (!virtqueue_enable_cb(vq));
	spin_unlock_irqrestore(&vblk->vq_lock, flags);

	/* In case queue is stopped waiting for more buffers. */
	/* In case queue is stopped waiting for more buffers. */
	if (req_done)
	if (req_done)
		blk_start_queue(vblk->disk->queue);
		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
	spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);

	if (bio_done)
		wake_up(&vblk->queue_wait);
}
}


static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
		   struct request *req)
{
{
	struct virtio_blk *vblk = hctx->queue->queuedata;
	struct virtblk_req *vbr = req->special;
	unsigned long flags;
	unsigned int num;
	unsigned int num;
	struct virtblk_req *vbr;
	const bool last = (req->cmd_flags & REQ_END) != 0;


	vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
	if (!vbr)
		/* When another request finishes we'll try again. */
		return false;


	vbr->req = req;
	vbr->req = req;
	vbr->bio = NULL;
	if (req->cmd_flags & REQ_FLUSH) {
	if (req->cmd_flags & REQ_FLUSH) {
		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
		vbr->out_hdr.sector = 0;
		vbr->out_hdr.sector = 0;
@@ -342,7 +187,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
		}
		}
	}
	}


	num = blk_rq_map_sg(q, vbr->req, vblk->sg);
	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
	if (num) {
	if (num) {
		if (rq_data_dir(vbr->req) == WRITE)
		if (rq_data_dir(vbr->req) == WRITE)
			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
@@ -350,63 +195,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
	}
	}


	if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
	spin_lock_irqsave(&vblk->vq_lock, flags);
		mempool_free(vbr, vblk->pool);
	if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
		return false;
		spin_unlock_irqrestore(&vblk->vq_lock, flags);
	}
		blk_mq_stop_hw_queue(hctx);

	return true;
}

static void virtblk_request(struct request_queue *q)
{
	struct virtio_blk *vblk = q->queuedata;
	struct request *req;
	unsigned int issued = 0;

	while ((req = blk_peek_request(q)) != NULL) {
		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);

		/* If this request fails, stop queue and wait for something to
		   finish to restart it. */
		if (!do_req(q, vblk, req)) {
			blk_stop_queue(q);
			break;
		}
		blk_start_request(req);
		issued++;
	}

	if (issued)
		virtqueue_kick(vblk->vq);
		virtqueue_kick(vblk->vq);
		return BLK_MQ_RQ_QUEUE_BUSY;
	}
	}
	spin_unlock_irqrestore(&vblk->vq_lock, flags);


static void virtblk_make_request(struct request_queue *q, struct bio *bio)
	if (last)
{
		virtqueue_kick(vblk->vq);
	struct virtio_blk *vblk = q->queuedata;
	return BLK_MQ_RQ_QUEUE_OK;
	struct virtblk_req *vbr;

	BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);

	vbr = virtblk_alloc_req(vblk, GFP_NOIO);
	if (!vbr) {
		bio_endio(bio, -ENOMEM);
		return;
	}

	vbr->bio = bio;
	vbr->flags = 0;
	if (bio->bi_rw & REQ_FLUSH)
		vbr->flags |= VBLK_REQ_FLUSH;
	if (bio->bi_rw & REQ_FUA)
		vbr->flags |= VBLK_REQ_FUA;
	if (bio->bi_size)
		vbr->flags |= VBLK_REQ_DATA;

	if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
		virtblk_bio_send_flush(vbr);
	else
		virtblk_bio_send_data(vbr);
}
}


/* return id (s/n) string for *disk to *id_str
/* return id (s/n) string for *disk to *id_str
@@ -680,12 +480,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
	__ATTR(cache_type, S_IRUGO|S_IWUSR,
	__ATTR(cache_type, S_IRUGO|S_IWUSR,
	       virtblk_cache_type_show, virtblk_cache_type_store);
	       virtblk_cache_type_show, virtblk_cache_type_store);


static struct blk_mq_ops virtio_mq_ops = {
	.queue_rq	= virtio_queue_rq,
	.map_queue	= blk_mq_map_queue,
	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
	.free_hctx	= blk_mq_free_single_hw_queue,
};

static struct blk_mq_reg virtio_mq_reg = {
	.ops		= &virtio_mq_ops,
	.nr_hw_queues	= 1,
	.queue_depth	= 64,
	.numa_node	= NUMA_NO_NODE,
	.flags		= BLK_MQ_F_SHOULD_MERGE,
};

static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
			     struct request *rq, unsigned int nr)
{
	struct virtio_blk *vblk = data;
	struct virtblk_req *vbr = rq->special;

	sg_init_table(vbr->sg, vblk->sg_elems);
}

static int virtblk_probe(struct virtio_device *vdev)
static int virtblk_probe(struct virtio_device *vdev)
{
{
	struct virtio_blk *vblk;
	struct virtio_blk *vblk;
	struct request_queue *q;
	struct request_queue *q;
	int err, index;
	int err, index;
	int pool_size;


	u64 cap;
	u64 cap;
	u32 v, blk_size, sg_elems, opt_io_size;
	u32 v, blk_size, sg_elems, opt_io_size;
@@ -709,17 +532,14 @@ static int virtblk_probe(struct virtio_device *vdev)


	/* We need an extra sg elements at head and tail. */
	/* We need an extra sg elements at head and tail. */
	sg_elems += 2;
	sg_elems += 2;
	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
	if (!vblk) {
	if (!vblk) {
		err = -ENOMEM;
		err = -ENOMEM;
		goto out_free_index;
		goto out_free_index;
	}
	}


	init_waitqueue_head(&vblk->queue_wait);
	vblk->vdev = vdev;
	vblk->vdev = vdev;
	vblk->sg_elems = sg_elems;
	vblk->sg_elems = sg_elems;
	sg_init_table(vblk->sg, vblk->sg_elems);
	mutex_init(&vblk->config_lock);
	mutex_init(&vblk->config_lock);


	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
@@ -728,31 +548,27 @@ static int virtblk_probe(struct virtio_device *vdev)
	err = init_vq(vblk);
	err = init_vq(vblk);
	if (err)
	if (err)
		goto out_free_vblk;
		goto out_free_vblk;

	spin_lock_init(&vblk->vq_lock);
	pool_size = sizeof(struct virtblk_req);
	if (use_bio)
		pool_size += sizeof(struct scatterlist) * sg_elems;
	vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
	if (!vblk->pool) {
		err = -ENOMEM;
		goto out_free_vq;
	}


	/* FIXME: How many partitions?  How long is a piece of string? */
	/* FIXME: How many partitions?  How long is a piece of string? */
	vblk->disk = alloc_disk(1 << PART_BITS);
	vblk->disk = alloc_disk(1 << PART_BITS);
	if (!vblk->disk) {
	if (!vblk->disk) {
		err = -ENOMEM;
		err = -ENOMEM;
		goto out_mempool;
		goto out_free_vq;
	}
	}


	q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
	virtio_mq_reg.cmd_size =
		sizeof(struct virtblk_req) +
		sizeof(struct scatterlist) * sg_elems;

	q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
	if (!q) {
	if (!q) {
		err = -ENOMEM;
		err = -ENOMEM;
		goto out_put_disk;
		goto out_put_disk;
	}
	}


	if (use_bio)
	blk_mq_init_commands(q, virtblk_init_vbr, vblk);
		blk_queue_make_request(q, virtblk_make_request);

	q->queuedata = vblk;
	q->queuedata = vblk;


	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@@ -857,8 +673,6 @@ static int virtblk_probe(struct virtio_device *vdev)
	blk_cleanup_queue(vblk->disk->queue);
	blk_cleanup_queue(vblk->disk->queue);
out_put_disk:
out_put_disk:
	put_disk(vblk->disk);
	put_disk(vblk->disk);
out_mempool:
	mempool_destroy(vblk->pool);
out_free_vq:
out_free_vq:
	vdev->config->del_vqs(vdev);
	vdev->config->del_vqs(vdev);
out_free_vblk:
out_free_vblk:
@@ -890,7 +704,6 @@ static void virtblk_remove(struct virtio_device *vdev)


	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
	put_disk(vblk->disk);
	put_disk(vblk->disk);
	mempool_destroy(vblk->pool);
	vdev->config->del_vqs(vdev);
	vdev->config->del_vqs(vdev);
	kfree(vblk);
	kfree(vblk);


@@ -914,10 +727,7 @@ static int virtblk_freeze(struct virtio_device *vdev)


	flush_work(&vblk->config_work);
	flush_work(&vblk->config_work);


	spin_lock_irq(vblk->disk->queue->queue_lock);
	blk_mq_stop_hw_queues(vblk->disk->queue);
	blk_stop_queue(vblk->disk->queue);
	spin_unlock_irq(vblk->disk->queue->queue_lock);
	blk_sync_queue(vblk->disk->queue);


	vdev->config->del_vqs(vdev);
	vdev->config->del_vqs(vdev);
	return 0;
	return 0;
@@ -930,11 +740,9 @@ static int virtblk_restore(struct virtio_device *vdev)


	vblk->config_enable = true;
	vblk->config_enable = true;
	ret = init_vq(vdev->priv);
	ret = init_vq(vdev->priv);
	if (!ret) {
	if (!ret)
		spin_lock_irq(vblk->disk->queue->queue_lock);
		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
		blk_start_queue(vblk->disk->queue);

		spin_unlock_irq(vblk->disk->queue->queue_lock);
	}
	return ret;
	return ret;
}
}
#endif
#endif