Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bc07c10a authored by Ming Lei's avatar Ming Lei Committed by Jens Axboe
Browse files

block: loop: support DIO & AIO

There are at least 3 advantages to use direct I/O and AIO on
read/write loop's backing file:

1) double cache can be avoided, then memory usage gets
decreased a lot

2) not like user space direct I/O, there isn't cost of
pinning pages

3) avoid context switch for obtaining good throughput
- in buffered file read, random I/O top throughput is often obtained
only if they are submitted concurrently from lots of tasks; but for
sequential I/O, most of times they can be hit from page cache, so
concurrent submissions often introduce unnecessary context switch
and can't improve throughput much. There was such discussion[1]
to use non-blocking I/O to improve the problem for application.
- with direct I/O and AIO, concurrent submissions can be
avoided and random read throughput can't be affected meantime

xfstests(-g auto, ext4) is basically passed when running with
direct I/O(aio), one exception is generic/232, but it failed in
loop buffered I/O(4.2-rc6-next-20150814) too.

Follows the fio test result for performance purpose:
	4 jobs fio test inside ext4 file system over loop block

1) How to run
	- KVM: 4 VCPUs, 2G RAM
	- linux kernel: 4.2-rc6-next-20150814(base) with the patchset
	- the loop block is over one image on SSD.
	- linux psync, 4 jobs, size 1500M, ext4 over loop block
	- test result: IOPS from fio output

2) Throughput(IOPS) becomes a bit better with direct I/O(aio)
        -------------------------------------------------------------
        test cases          |randread   |read   |randwrite  |write  |
        -------------------------------------------------------------
        base                |8015       |113811 |67442      |106978
        -------------------------------------------------------------
        base+loop aio       |8136       |125040 |67811      |111376
        -------------------------------------------------------------

- somehow, it should be caused by more page cache avaiable for
application or one extra page copy is avoided in case of direct I/O

3) context switch
        - context switch decreased by ~50% with loop direct I/O(aio)
	compared with loop buffered I/O(4.2-rc6-next-20150814)

4) memory usage from /proc/meminfo
        -------------------------------------------------------------
                                   | Buffers       | Cached
        -------------------------------------------------------------
        base                       | > 760MB       | ~950MB
        -------------------------------------------------------------
        base+loop direct I/O(aio)  | < 5MB         | ~1.6GB
        -------------------------------------------------------------

- so there are much more page caches available for application with
direct I/O

[1] https://lwn.net/Articles/612483/



Signed-off-by: default avatarMing Lei <ming.lei@canonical.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent ab1cb278
Loading
Loading
Loading
Loading
+95 −3
Original line number Diff line number Diff line
@@ -445,6 +445,90 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq)
	return ret;
}

static inline void handle_partial_read(struct loop_cmd *cmd, long bytes)
{
	if (bytes < 0 || (cmd->rq->cmd_flags & REQ_WRITE))
		return;

	if (unlikely(bytes < blk_rq_bytes(cmd->rq))) {
		struct bio *bio = cmd->rq->bio;

		bio_advance(bio, bytes);
		zero_fill_bio(bio);
	}
}

static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{
	struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
	struct request *rq = cmd->rq;

	handle_partial_read(cmd, ret);

	if (ret > 0)
		ret = 0;
	else if (ret < 0)
		ret = -EIO;

	rq->errors = ret;
	blk_mq_complete_request(rq);
}

static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
		     loff_t pos, bool rw)
{
	struct iov_iter iter;
	struct bio_vec *bvec;
	struct bio *bio = cmd->rq->bio;
	struct file *file = lo->lo_backing_file;
	int ret;

	/* nomerge for loop request queue */
	WARN_ON(cmd->rq->bio != cmd->rq->biotail);

	bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
	iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
		      bio_segments(bio), blk_rq_bytes(cmd->rq));

	cmd->iocb.ki_pos = pos;
	cmd->iocb.ki_filp = file;
	cmd->iocb.ki_complete = lo_rw_aio_complete;
	cmd->iocb.ki_flags = IOCB_DIRECT;

	if (rw == WRITE)
		ret = file->f_op->write_iter(&cmd->iocb, &iter);
	else
		ret = file->f_op->read_iter(&cmd->iocb, &iter);

	if (ret != -EIOCBQUEUED)
		cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
	return 0;
}


static inline int lo_rw_simple(struct loop_device *lo,
		struct request *rq, loff_t pos, bool rw)
{
	struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);

	if (cmd->use_aio)
		return lo_rw_aio(lo, cmd, pos, rw);

	/*
	 * lo_write_simple and lo_read_simple should have been covered
	 * by io submit style function like lo_rw_aio(), one blocker
	 * is that lo_read_simple() need to call flush_dcache_page after
	 * the page is written from kernel, and it isn't easy to handle
	 * this in io submit style function which submits all segments
	 * of the req at one time. And direct read IO doesn't need to
	 * run flush_dcache_page().
	 */
	if (rw == WRITE)
		return lo_write_simple(lo, rq, pos);
	else
		return lo_read_simple(lo, rq, pos);
}

static int do_req_filebacked(struct loop_device *lo, struct request *rq)
{
	loff_t pos;
@@ -460,13 +544,13 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
		else if (lo->transfer)
			ret = lo_write_transfer(lo, rq, pos);
		else
			ret = lo_write_simple(lo, rq, pos);
			ret = lo_rw_simple(lo, rq, pos, WRITE);

	} else {
		if (lo->transfer)
			ret = lo_read_transfer(lo, rq, pos);
		else
			ret = lo_read_simple(lo, rq, pos);
			ret = lo_rw_simple(lo, rq, pos, READ);
	}

	return ret;
@@ -1570,6 +1654,12 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
	if (lo->lo_state != Lo_bound)
		return -EIO;

	if (lo->use_dio && !(cmd->rq->cmd_flags & (REQ_FLUSH |
					REQ_DISCARD)))
		cmd->use_aio = true;
	else
		cmd->use_aio = false;

	queue_kthread_work(&lo->worker, &cmd->work);

	return BLK_MQ_RQ_QUEUE_OK;
@@ -1589,6 +1679,8 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
 failed:
	if (ret)
		cmd->rq->errors = -EIO;
	/* complete non-aio request */
	if (!cmd->use_aio || ret)
		blk_mq_complete_request(cmd->rq);
}

+2 −0
Original line number Diff line number Diff line
@@ -69,6 +69,8 @@ struct loop_cmd {
	struct kthread_work work;
	struct request *rq;
	struct list_head list;
	bool use_aio;           /* use AIO interface to handle I/O */
	struct kiocb iocb;
};

/* Support for loadable transfer modules */