Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 18772641 authored by Andi Kleen's avatar Andi Kleen Committed by root
Browse files

direct-io: separate map_bh from dio



Only a single b_private field in the map_bh buffer head is needed after
the submission path. Move map_bh separately to avoid storing
this information in the long term slab.

This avoids the weird 104 byte hole in struct dio_submit which also needed
to be memseted early.

Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 6e8267f5
Loading
Loading
Loading
Loading
+37 −29
Original line number Diff line number Diff line
@@ -119,6 +119,7 @@ struct dio {
	loff_t i_size;			/* i_size when submitted */
	dio_iodone_t *end_io;		/* IO completion function */

	void *private;			/* copy from map_bh.b_private */

	/* BIO completion state */
	spinlock_t bio_lock;		/* protects BIO fields below */
@@ -133,7 +134,6 @@ struct dio {
	struct kiocb *iocb;		/* kiocb */
	ssize_t result;                 /* IO result */

	struct buffer_head map_bh;	/* last get_block() result */
	/*
	 * pages[] (and any fields placed after it) are not zeroed out at
	 * allocation time.  Don't add new fields after pages[] unless you
@@ -301,7 +301,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is

	if (dio->end_io && dio->result) {
		dio->end_io(dio->iocb, offset, transferred,
			    dio->map_bh.b_private, ret, is_async);
			    dio->private, ret, is_async);
	} else {
		if (is_async)
			aio_complete(dio->iocb, ret, 0);
@@ -574,10 +574,10 @@ static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
 * buffer_mapped().  However the direct-io code will only process holes one
 * block at a time - it will repeatedly call get_block() as it walks the hole.
 */
static int get_more_blocks(struct dio *dio, struct dio_submit *sdio)
static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
			   struct buffer_head *map_bh)
{
	int ret;
	struct buffer_head *map_bh = &dio->map_bh;
	sector_t fs_startblk;	/* Into file, in filesystem-sized blocks */
	unsigned long fs_count;	/* Number of filesystem-sized blocks */
	unsigned long dio_count;/* Number of dio_block-sized blocks */
@@ -621,6 +621,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio)

		ret = (*sdio->get_block)(dio->inode, fs_startblk,
						map_bh, create);

		/* Store for completion */
		dio->private = map_bh->b_private;
	}
	return ret;
}
@@ -629,7 +632,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio)
 * There is no bio.  Make one now.
 */
static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
		       sector_t start_sector)
		       sector_t start_sector, struct buffer_head *map_bh)
{
	sector_t sector;
	int ret, nr_pages;
@@ -638,10 +641,10 @@ static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
	if (ret)
		goto out;
	sector = start_sector << (sdio->blkbits - 9);
	nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev));
	nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev));
	nr_pages = min(nr_pages, BIO_MAX_PAGES);
	BUG_ON(nr_pages <= 0);
	dio_bio_alloc(dio, sdio, dio->map_bh.b_bdev, sector, nr_pages);
	dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
	sdio->boundary = 0;
out:
	return ret;
@@ -686,7 +689,8 @@ static int dio_bio_add_page(struct dio_submit *sdio)
 * The caller of this function is responsible for removing cur_page from the
 * dio, and for dropping the refcount which came from that presence.
 */
static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio)
static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
			     struct buffer_head *map_bh)
{
	int ret = 0;

@@ -721,14 +725,14 @@ static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio)
	}

	if (sdio->bio == NULL) {
		ret = dio_new_bio(dio, sdio, sdio->cur_page_block);
		ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
		if (ret)
			goto out;
	}

	if (dio_bio_add_page(sdio) != 0) {
		dio_bio_submit(dio, sdio);
		ret = dio_new_bio(dio, sdio, sdio->cur_page_block);
		ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
		if (ret == 0) {
			ret = dio_bio_add_page(sdio);
			BUG_ON(ret != 0);
@@ -757,7 +761,8 @@ static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio)
 */
static int
submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
		unsigned offset, unsigned len, sector_t blocknr)
		    unsigned offset, unsigned len, sector_t blocknr,
		    struct buffer_head *map_bh)
{
	int ret = 0;

@@ -782,7 +787,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
		 * avoid metadata seeks.
		 */
		if (sdio->boundary) {
			ret = dio_send_cur_page(dio, sdio);
			ret = dio_send_cur_page(dio, sdio, map_bh);
			page_cache_release(sdio->cur_page);
			sdio->cur_page = NULL;
		}
@@ -793,7 +798,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
	 * If there's a deferred page already there then send it.
	 */
	if (sdio->cur_page) {
		ret = dio_send_cur_page(dio, sdio);
		ret = dio_send_cur_page(dio, sdio, map_bh);
		page_cache_release(sdio->cur_page);
		sdio->cur_page = NULL;
		if (ret)
@@ -815,16 +820,16 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 * file blocks.  Only called for S_ISREG files - blockdevs do not set
 * buffer_new
 */
static void clean_blockdev_aliases(struct dio *dio)
static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
{
	unsigned i;
	unsigned nblocks;

	nblocks = dio->map_bh.b_size >> dio->inode->i_blkbits;
	nblocks = map_bh->b_size >> dio->inode->i_blkbits;

	for (i = 0; i < nblocks; i++) {
		unmap_underlying_metadata(dio->map_bh.b_bdev,
					dio->map_bh.b_blocknr + i);
		unmap_underlying_metadata(map_bh->b_bdev,
					  map_bh->b_blocknr + i);
	}
}

@@ -837,7 +842,8 @@ static void clean_blockdev_aliases(struct dio *dio)
 * `end' is zero if we're doing the start of the IO, 1 at the end of the
 * IO.
 */
static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end,
			   struct buffer_head *map_bh)
{
	unsigned dio_blocks_per_fs_block;
	unsigned this_chunk_blocks;	/* In dio_blocks */
@@ -845,7 +851,7 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
	struct page *page;

	sdio->start_zero_done = 1;
	if (!sdio->blkfactor || !buffer_new(&dio->map_bh))
	if (!sdio->blkfactor || !buffer_new(map_bh))
		return;

	dio_blocks_per_fs_block = 1 << sdio->blkfactor;
@@ -865,7 +871,7 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)

	page = ZERO_PAGE(0);
	if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
				sdio->next_block_for_io))
				sdio->next_block_for_io, map_bh))
		return;

	sdio->next_block_for_io += this_chunk_blocks;
@@ -887,13 +893,13 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
 * it should set b_size to PAGE_SIZE or more inside get_block().  This gives
 * fine alignment but still allows this function to work in PAGE_SIZE units.
 */
static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
			struct buffer_head *map_bh)
{
	const unsigned blkbits = sdio->blkbits;
	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
	struct page *page;
	unsigned block_in_page;
	struct buffer_head *map_bh = &dio->map_bh;
	int ret = 0;

	/* The I/O can start at any block offset within the first page */
@@ -919,7 +925,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
				unsigned long blkmask;
				unsigned long dio_remainder;

				ret = get_more_blocks(dio, sdio);
				ret = get_more_blocks(dio, sdio, map_bh);
				if (ret) {
					page_cache_release(page);
					goto out;
@@ -932,7 +938,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
				sdio->next_block_for_io =
					map_bh->b_blocknr << sdio->blkfactor;
				if (buffer_new(map_bh))
					clean_blockdev_aliases(dio);
					clean_blockdev_aliases(dio, map_bh);

				if (!sdio->blkfactor)
					goto do_holes;
@@ -991,7 +997,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
			 * we must zero out the start of this block.
			 */
			if (unlikely(sdio->blkfactor && !sdio->start_zero_done))
				dio_zero_block(dio, sdio, 0);
				dio_zero_block(dio, sdio, 0, map_bh);

			/*
			 * Work out, in this_chunk_blocks, how much disk we
@@ -1011,7 +1017,8 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
			ret = submit_page_section(dio, sdio, page,
						  offset_in_page,
						  this_chunk_bytes,
						  sdio->next_block_for_io);
						  sdio->next_block_for_io,
						  map_bh);
			if (ret) {
				page_cache_release(page);
				goto out;
@@ -1047,6 +1054,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
	ssize_t ret = 0;
	ssize_t ret2;
	size_t bytes;
	struct buffer_head map_bh = { 0, };

	dio->inode = inode;
	dio->rw = rw;
@@ -1101,7 +1109,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
		sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
		sdio->curr_user_address = user_addr;
	
		ret = do_direct_IO(dio, sdio);
		ret = do_direct_IO(dio, sdio, &map_bh);

		dio->result += iov[seg].iov_len -
			((sdio->final_block_in_request - sdio->block_in_file) <<
@@ -1124,10 +1132,10 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
	 * There may be some unwritten disk at the end of a part-written
	 * fs-block-sized block.  Go zero that now.
	 */
	dio_zero_block(dio, sdio, 1);
	dio_zero_block(dio, sdio, 1, &map_bh);

	if (sdio->cur_page) {
		ret2 = dio_send_cur_page(dio, sdio);
		ret2 = dio_send_cur_page(dio, sdio, &map_bh);
		if (ret == 0)
			ret = ret2;
		page_cache_release(sdio->cur_page);