Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0bd2d5ec authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o
Browse files

ext4: rip out DAX handling from direct IO path



Reads and writes for DAX inodes should no longer end up in direct IO
code. Rip out the support and add a warning.

Reviewed-by: default avatarRoss Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent e2ae766c
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -2452,8 +2452,6 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
			     struct buffer_head *bh_result, int create);
int ext4_dax_get_block(struct inode *inode, sector_t iblock,
		       struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
		   struct buffer_head *bh_result, int create);
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
+15 −82
Original line number Diff line number Diff line
@@ -3280,46 +3280,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
}

#ifdef CONFIG_FS_DAX
/*
 * Get block function for DAX IO and mmap faults. It takes care of converting
 * unwritten extents to written ones and initializes new / converted blocks
 * to zeros.
 */
int ext4_dax_get_block(struct inode *inode, sector_t iblock,
		       struct buffer_head *bh_result, int create)
{
	int ret;

	ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create);
	if (!create)
		return _ext4_get_block(inode, iblock, bh_result, 0);

	ret = ext4_get_block_trans(inode, iblock, bh_result,
				   EXT4_GET_BLOCKS_PRE_IO |
				   EXT4_GET_BLOCKS_CREATE_ZERO);
	if (ret < 0)
		return ret;

	if (buffer_unwritten(bh_result)) {
		/*
		 * We are protected by i_mmap_sem or i_mutex so we know block
		 * cannot go away from under us even though we dropped
		 * i_data_sem. Convert extent to written and write zeros there.
		 */
		ret = ext4_get_block_trans(inode, iblock, bh_result,
					   EXT4_GET_BLOCKS_CONVERT |
					   EXT4_GET_BLOCKS_CREATE_ZERO);
		if (ret < 0)
			return ret;
	}
	/*
	 * At least for now we have to clear BH_New so that DAX code
	 * doesn't attempt to zero blocks again in a racy way.
	 */
	clear_buffer_new(bh_result);
	return 0;
}

static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
			    unsigned flags, struct iomap *iomap)
{
@@ -3473,14 +3433,6 @@ struct iomap_ops ext4_iomap_ops = {
	.iomap_end		= ext4_iomap_end,
};

#else
/* Just define empty function, it will never get called. */
int ext4_dax_get_block(struct inode *inode, sector_t iblock,
		       struct buffer_head *bh_result, int create)
{
	BUG();
	return 0;
}
#endif

static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@ -3602,19 +3554,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
	iocb->private = NULL;
	if (overwrite)
		get_block_func = ext4_dio_get_block_overwrite;
	else if (IS_DAX(inode)) {
		/*
		 * We can avoid zeroing for aligned DAX writes beyond EOF. Other
		 * writes need zeroing either because they can race with page
		 * faults or because they use partial blocks.
		 */
		if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
		    ext4_aligned_io(inode, offset, count))
			get_block_func = ext4_dio_get_block;
		else
			get_block_func = ext4_dax_get_block;
		dio_flags = DIO_LOCKING;
	} else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
	else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
		   round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
		get_block_func = ext4_dio_get_block;
		dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
@@ -3628,14 +3568,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
#ifdef CONFIG_EXT4_FS_ENCRYPTION
	BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
#endif
	if (IS_DAX(inode)) {
		ret = dax_do_io(iocb, inode, iter, get_block_func,
				ext4_end_io_dio, dio_flags);
	} else
		ret = __blockdev_direct_IO(iocb, inode,
					   inode->i_sb->s_bdev, iter,
					   get_block_func,
					   ext4_end_io_dio, NULL, dio_flags);
	ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
				   get_block_func, ext4_end_io_dio, NULL,
				   dio_flags);

	if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
						EXT4_STATE_DIO_UNWRITTEN)) {
@@ -3704,6 +3639,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
{
	struct address_space *mapping = iocb->ki_filp->f_mapping;
	struct inode *inode = mapping->host;
	size_t count = iov_iter_count(iter);
	ssize_t ret;

	/*
@@ -3712,19 +3648,12 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
	 * we are protected against page writeback as well.
	 */
	inode_lock_shared(inode);
	if (IS_DAX(inode)) {
		ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
	} else {
		size_t count = iov_iter_count(iter);

	ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
					   iocb->ki_pos + count);
	if (ret)
		goto out_unlock;
	ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
					   iter, ext4_dio_get_block,
					   NULL, NULL, 0);
	}
				   iter, ext4_dio_get_block, NULL, NULL, 0);
out_unlock:
	inode_unlock_shared(inode);
	return ret;
@@ -3753,6 +3682,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
	if (ext4_has_inline_data(inode))
		return 0;

	/* DAX uses iomap path now */
	if (WARN_ON_ONCE(IS_DAX(inode)))
		return 0;

	trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
	if (iov_iter_rw(iter) == READ)
		ret = ext4_direct_IO_read(iocb, iter);