Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a549984b authored by Theodore Ts'o's avatar Theodore Ts'o
Browse files

ext4: revert "ext4: use io_end for multiple bios"



This reverts commit 4eec708d.

Multiple users have reported crashes which is apparently caused by
this commit.  Thanks to Dmitry Monakhov for bisecting it.

Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: Jan Kara <jack@suse.cz>
parent e6155736
Loading
Loading
Loading
Loading
+2 −6
Original line number Diff line number Diff line
@@ -209,7 +209,6 @@ typedef struct ext4_io_end {
	ssize_t			size;		/* size of the extent */
	struct kiocb		*iocb;		/* iocb struct for AIO */
	int			result;		/* error value for AIO */
	atomic_t		count;		/* reference counter */
} ext4_io_end_t;

struct ext4_io_submit {
@@ -2651,14 +2650,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,

/* page-io.c */
extern int __init ext4_init_pageio(void);
extern void ext4_add_complete_io(ext4_io_end_t *io_end);
extern void ext4_exit_pageio(void);
extern void ext4_ioend_shutdown(struct inode *);
extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
extern int ext4_put_io_end(ext4_io_end_t *io_end);
extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
extern void ext4_io_submit_init(struct ext4_io_submit *io,
				struct writeback_control *wbc);
extern void ext4_end_io_work(struct work_struct *work);
extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
+38 −47
Original line number Diff line number Diff line
@@ -1487,10 +1487,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
	struct ext4_io_submit io_submit;

	BUG_ON(mpd->next_page <= mpd->first_page);
	ext4_io_submit_init(&io_submit, mpd->wbc);
	io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
	if (!io_submit.io_end)
		return -ENOMEM;
	memset(&io_submit, 0, sizeof(io_submit));
	/*
	 * We need to start from the first_page to the next_page - 1
	 * to make sure we also write the mapped dirty buffer_heads.
@@ -1578,8 +1575,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
		pagevec_release(&pvec);
	}
	ext4_io_submit(&io_submit);
	/* Drop io_end reference we got from init */
	ext4_put_io_end_defer(io_submit.io_end);
	return ret;
}

@@ -2238,16 +2233,9 @@ static int ext4_writepage(struct page *page,
		 */
		return __ext4_journalled_writepage(page, len);

	ext4_io_submit_init(&io_submit, wbc);
	io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
	if (!io_submit.io_end) {
		redirty_page_for_writepage(wbc, page);
		return -ENOMEM;
	}
	memset(&io_submit, 0, sizeof(io_submit));
	ret = ext4_bio_write_page(&io_submit, page, len, wbc);
	ext4_io_submit(&io_submit);
	/* Drop io_end reference we got from init */
	ext4_put_io_end_defer(io_submit.io_end);
	return ret;
}

@@ -3078,13 +3066,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
	struct inode *inode = file_inode(iocb->ki_filp);
        ext4_io_end_t *io_end = iocb->private;

	/* if not async direct IO just return */
	if (!io_end) {
		inode_dio_done(inode);
		if (is_async)
			aio_complete(iocb, ret, 0);
		return;
	}
	/* if not async direct IO or dio with 0 bytes write, just return */
	if (!io_end || !size)
		goto out;

	ext_debug("ext4_end_io_dio(): io_end 0x%p "
		  "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3092,13 +3076,25 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
		  size);

	iocb->private = NULL;

	/* if not aio dio with unwritten extents, just free io and return */
	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
		ext4_free_io_end(io_end);
out:
		inode_dio_done(inode);
		if (is_async)
			aio_complete(iocb, ret, 0);
		return;
	}

	io_end->offset = offset;
	io_end->size = size;
	if (is_async) {
		io_end->iocb = iocb;
		io_end->result = ret;
	}
	ext4_put_io_end_defer(io_end);

	ext4_add_complete_io(io_end);
}

/*
@@ -3132,7 +3128,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
	get_block_t *get_block_func = NULL;
	int dio_flags = 0;
	loff_t final_size = offset + count;
	ext4_io_end_t *io_end = NULL;

	/* Use the old path for reads and writes beyond i_size. */
	if (rw != WRITE || final_size > inode->i_size)
@@ -3171,16 +3166,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
	iocb->private = NULL;
	ext4_inode_aio_set(inode, NULL);
	if (!is_sync_kiocb(iocb)) {
		io_end = ext4_init_io_end(inode, GFP_NOFS);
		ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
		if (!io_end) {
			ret = -ENOMEM;
			goto retake_lock;
		}
		io_end->flag |= EXT4_IO_END_DIRECT;
		/*
		 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
		 */
		iocb->private = ext4_get_io_end(io_end);
		iocb->private = io_end;
		/*
		 * we save the io structure for current async direct
		 * IO, so that later ext4_map_blocks() could flag the
@@ -3204,27 +3196,26 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
				   NULL,
				   dio_flags);

	/*
	 * Put our reference to io_end. This can free the io_end structure e.g.
	 * in sync IO case or in case of error. It can even perform extent
	 * conversion if all bios we submitted finished before we got here.
	 * Note that in that case iocb->private can be already set to NULL
	 * here.
	 */
	if (io_end) {
	if (iocb->private)
		ext4_inode_aio_set(inode, NULL);
		ext4_put_io_end(io_end);
	/*
		 * In case of error or no write ext4_end_io_dio() was not
		 * called so we have to put iocb's reference.
	 * The io_end structure takes a reference to the inode, that
	 * structure needs to be destroyed and the reference to the
	 * inode need to be dropped, when IO is complete, even with 0
	 * byte write, or failed.
	 *
	 * In the successful AIO DIO case, the io_end structure will
	 * be destroyed and the reference to the inode will be dropped
	 * after the end_io call back function is called.
	 *
	 * In the case there is 0 byte write, or error case, since VFS
	 * direct IO won't invoke the end_io call back function, we
	 * need to free the end_io structure here.
	 */
		if (ret <= 0 && ret != -EIOCBQUEUED) {
			WARN_ON(iocb->private != io_end);
			ext4_put_io_end(io_end);
	if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
		ext4_free_io_end(iocb->private);
		iocb->private = NULL;
		}
	}
	if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
	} else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
						EXT4_STATE_DIO_UNWRITTEN)) {
		int err;
		/*
+45 −76
Original line number Diff line number Diff line
@@ -61,28 +61,15 @@ void ext4_ioend_shutdown(struct inode *inode)
		cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
}

static void ext4_release_io_end(ext4_io_end_t *io_end)
void ext4_free_io_end(ext4_io_end_t *io)
{
	BUG_ON(!list_empty(&io_end->list));
	BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
	BUG_ON(!io);
	BUG_ON(!list_empty(&io->list));
	BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);

	if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
		wake_up_all(ext4_ioend_wq(io_end->inode));
	if (io_end->flag & EXT4_IO_END_DIRECT)
		inode_dio_done(io_end->inode);
	if (io_end->iocb)
		aio_complete(io_end->iocb, io_end->result, 0);
	kmem_cache_free(io_end_cachep, io_end);
}

static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
{
	struct inode *inode = io_end->inode;

	io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
	/* Wake up anyone waiting on unwritten extent conversion */
	if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
		wake_up_all(ext4_ioend_wq(inode));
	if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
		wake_up_all(ext4_ioend_wq(io->inode));
	kmem_cache_free(io_end_cachep, io);
}

/* check a range of space and convert unwritten extents to written. */
@@ -105,8 +92,13 @@ static int ext4_end_io(ext4_io_end_t *io)
			 "(inode %lu, offset %llu, size %zd, error %d)",
			 inode->i_ino, offset, size, ret);
	}
	ext4_clear_io_unwritten_flag(io);
	ext4_release_io_end(io);
	/* Wake up anyone waiting on unwritten extent conversion */
	if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
		wake_up_all(ext4_ioend_wq(inode));
	if (io->flag & EXT4_IO_END_DIRECT)
		inode_dio_done(inode);
	if (io->iocb)
		aio_complete(io->iocb, io->result, 0);
	return ret;
}

@@ -137,7 +129,7 @@ static void dump_completed_IO(struct inode *inode)
}

/* Add the io_end to per-inode completed end_io list. */
static void ext4_add_complete_io(ext4_io_end_t *io_end)
void ext4_add_complete_io(ext4_io_end_t *io_end)
{
	struct ext4_inode_info *ei = EXT4_I(io_end->inode);
	struct workqueue_struct *wq;
@@ -174,6 +166,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
		err = ext4_end_io(io);
		if (unlikely(!ret && err))
			ret = err;
		io->flag &= ~EXT4_IO_END_UNWRITTEN;
		ext4_free_io_end(io);
	}
	return ret;
}
@@ -205,43 +199,10 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
		atomic_inc(&EXT4_I(inode)->i_ioend_count);
		io->inode = inode;
		INIT_LIST_HEAD(&io->list);
		atomic_set(&io->count, 1);
	}
	return io;
}

void ext4_put_io_end_defer(ext4_io_end_t *io_end)
{
	if (atomic_dec_and_test(&io_end->count)) {
		if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
			ext4_release_io_end(io_end);
			return;
		}
		ext4_add_complete_io(io_end);
	}
}

int ext4_put_io_end(ext4_io_end_t *io_end)
{
	int err = 0;

	if (atomic_dec_and_test(&io_end->count)) {
		if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
			err = ext4_convert_unwritten_extents(io_end->inode,
						io_end->offset, io_end->size);
			ext4_clear_io_unwritten_flag(io_end);
		}
		ext4_release_io_end(io_end);
	}
	return err;
}

ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
{
	atomic_inc(&io_end->count);
	return io_end;
}

/*
 * Print an buffer I/O error compatible with the fs/buffer.c.  This
 * provides compatibility with dmesg scrapers that look for a specific
@@ -324,7 +285,12 @@ static void ext4_end_bio(struct bio *bio, int error)
			     bi_sector >> (inode->i_blkbits - 9));
	}

	ext4_put_io_end_defer(io_end);
	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
		ext4_free_io_end(io_end);
		return;
	}

	ext4_add_complete_io(io_end);
}

void ext4_io_submit(struct ext4_io_submit *io)
@@ -338,37 +304,40 @@ void ext4_io_submit(struct ext4_io_submit *io)
		bio_put(io->io_bio);
	}
	io->io_bio = NULL;
}

void ext4_io_submit_init(struct ext4_io_submit *io,
			 struct writeback_control *wbc)
{
	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
	io->io_bio = NULL;
	io->io_op = 0;
	io->io_end = NULL;
}

static int io_submit_init_bio(struct ext4_io_submit *io,
static int io_submit_init(struct ext4_io_submit *io,
			  struct inode *inode,
			  struct writeback_control *wbc,
			  struct buffer_head *bh)
{
	ext4_io_end_t *io_end;
	struct page *page = bh->b_page;
	int nvecs = bio_get_nr_vecs(bh->b_bdev);
	struct bio *bio;

	io_end = ext4_init_io_end(inode, GFP_NOFS);
	if (!io_end)
		return -ENOMEM;
	bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
	bio->bi_bdev = bh->b_bdev;
	bio->bi_private = io->io_end = io_end;
	bio->bi_end_io = ext4_end_bio;
	bio->bi_private = ext4_get_io_end(io->io_end);
	if (!io->io_end->size)
		io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
				     + bh_offset(bh);

	io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);

	io->io_bio = bio;
	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
	io->io_next_block = bh->b_blocknr;
	return 0;
}

static int io_submit_add_bh(struct ext4_io_submit *io,
			    struct inode *inode,
			    struct writeback_control *wbc,
			    struct buffer_head *bh)
{
	ext4_io_end_t *io_end;
@@ -379,18 +348,18 @@ submit_and_retry:
		ext4_io_submit(io);
	}
	if (io->io_bio == NULL) {
		ret = io_submit_init_bio(io, bh);
		ret = io_submit_init(io, inode, wbc, bh);
		if (ret)
			return ret;
	}
	ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
	if (ret != bh->b_size)
		goto submit_and_retry;
	io_end = io->io_end;
	if (test_clear_buffer_uninit(bh))
		ext4_set_io_unwritten_flag(inode, io_end);
	io_end->size += bh->b_size;
	io->io_end->size += bh->b_size;
	io->io_next_block++;
	ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
	if (ret != bh->b_size)
		goto submit_and_retry;
	return 0;
}

@@ -462,7 +431,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
	do {
		if (!buffer_async_write(bh))
			continue;
		ret = io_submit_add_bh(io, inode, bh);
		ret = io_submit_add_bh(io, inode, wbc, bh);
		if (ret) {
			/*
			 * We only get here on ENOMEM.  Not much else