Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b47ec80b authored by Dave Chinner's avatar Dave Chinner
Browse files

Merge branch 'xfs-4.8-split-dax-dio' into for-next

parents bbfeb614 16d4d435
Loading
Loading
Loading
Loading
+5 −19
Original line number Diff line number Diff line
@@ -1303,7 +1303,7 @@ xfs_get_blocks_dax_fault(
 * whereas if we have flags set we will always be called in task context
 * (i.e. from a workqueue).
 */
STATIC int
int
xfs_end_io_direct_write(
	struct kiocb		*iocb,
	loff_t			offset,
@@ -1374,24 +1374,10 @@ xfs_vm_direct_IO(
	struct kiocb		*iocb,
	struct iov_iter		*iter)
{
	struct inode		*inode = iocb->ki_filp->f_mapping->host;
	dio_iodone_t		*endio = NULL;
	int			flags = 0;
	struct block_device	*bdev;

	if (iov_iter_rw(iter) == WRITE) {
		endio = xfs_end_io_direct_write;
		flags = DIO_ASYNC_EXTEND;
	}

	if (IS_DAX(inode)) {
		return dax_do_io(iocb, inode, iter,
				 xfs_get_blocks_direct, endio, 0);
	}

	bdev = xfs_find_bdev_for_inode(inode);
	return  __blockdev_direct_IO(iocb, inode, bdev, iter,
			xfs_get_blocks_direct, endio, NULL, flags);
	/*
	 * We just need the method present so that open/fcntl allow direct I/O.
	 */
	return -EINVAL;
}

STATIC sector_t
+3 −0
Original line number Diff line number Diff line
@@ -60,6 +60,9 @@ int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
int	xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
			         struct buffer_head *map_bh, int create);

int	xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
		ssize_t size, void *private);

extern void xfs_count_page_state(struct page *, int *, int *);
extern struct block_device *xfs_find_bdev_for_inode(struct inode *);

+176 −56
Original line number Diff line number Diff line
@@ -239,48 +239,35 @@ xfs_file_fsync(
}

STATIC ssize_t
xfs_file_read_iter(
xfs_file_dio_aio_read(
	struct kiocb		*iocb,
	struct iov_iter		*to)
{
	struct file		*file = iocb->ki_filp;
	struct inode		*inode = file->f_mapping->host;
	struct address_space	*mapping = iocb->ki_filp->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	size_t			size = iov_iter_count(to);
	loff_t			isize = i_size_read(inode);
	size_t			count = iov_iter_count(to);
	struct iov_iter		data;
	struct xfs_buftarg	*target;
	ssize_t			ret = 0;
	int			ioflags = 0;
	xfs_fsize_t		n;
	loff_t			pos = iocb->ki_pos;

	XFS_STATS_INC(mp, xs_read_calls);
	trace_xfs_file_direct_read(ip, count, iocb->ki_pos);

	if (unlikely(iocb->ki_flags & IOCB_DIRECT))
		ioflags |= XFS_IO_ISDIRECT;
	if (file->f_mode & FMODE_NOCMTIME)
		ioflags |= XFS_IO_INVIS;
	if (!count)
		return 0; /* skip atime */

	if (XFS_IS_REALTIME_INODE(ip))
		target = ip->i_mount->m_rtdev_targp;
	else
		target = ip->i_mount->m_ddev_targp;

	if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
		xfs_buftarg_t	*target =
			XFS_IS_REALTIME_INODE(ip) ?
				mp->m_rtdev_targp : mp->m_ddev_targp;
	/* DIO must be aligned to device logical sector size */
		if ((pos | size) & target->bt_logical_sectormask) {
			if (pos == i_size_read(inode))
	if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
		if (iocb->ki_pos == isize)
			return 0;
		return -EINVAL;
	}
	}

	n = mp->m_super->s_maxbytes - pos;
	if (n <= 0 || size == 0)
		return 0;

	if (n < size)
		size = n;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	/*
	 * Locking is a bit tricky here. If we take an exclusive lock for direct
@@ -293,7 +280,7 @@ xfs_file_read_iter(
	 * serialisation.
	 */
	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
	if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
	if (mapping->nrpages) {
		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);

@@ -308,8 +295,8 @@ xfs_file_read_iter(
		 * flush and reduce the chances of repeated iolock cycles going
		 * forward.
		 */
		if (inode->i_mapping->nrpages) {
			ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
		if (mapping->nrpages) {
			ret = filemap_write_and_wait(mapping);
			if (ret) {
				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
				return ret;
@@ -320,20 +307,95 @@ xfs_file_read_iter(
			 * we fail to invalidate a page, but this should never
			 * happen on XFS. Warn if it does fail.
			 */
			ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
			ret = invalidate_inode_pages2(mapping);
			WARN_ON_ONCE(ret);
			ret = 0;
		}
		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
	}

	trace_xfs_file_read(ip, size, pos, ioflags);
	data = *to;
	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
			xfs_get_blocks_direct, NULL, NULL, 0);
	if (ret > 0) {
		iocb->ki_pos += ret;
		iov_iter_advance(to, ret);
	}
	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);

	file_accessed(iocb->ki_filp);
	return ret;
}

STATIC ssize_t
xfs_file_dax_read(
	struct kiocb		*iocb,
	struct iov_iter		*to)
{
	struct address_space	*mapping = iocb->ki_filp->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct iov_iter		data = *to;
	size_t			count = iov_iter_count(to);
	ssize_t			ret = 0;

	trace_xfs_file_dax_read(ip, count, iocb->ki_pos);

	if (!count)
		return 0; /* skip atime */

	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
	if (ret > 0) {
		iocb->ki_pos += ret;
		iov_iter_advance(to, ret);
	}
	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);

	file_accessed(iocb->ki_filp);
	return ret;
}

STATIC ssize_t
xfs_file_buffered_aio_read(
	struct kiocb		*iocb,
	struct iov_iter		*to)
{
	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
	ssize_t			ret;

	trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);

	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
	ret = generic_file_read_iter(iocb, to);
	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);

	return ret;
}

STATIC ssize_t
xfs_file_read_iter(
	struct kiocb		*iocb,
	struct iov_iter		*to)
{
	struct inode		*inode = file_inode(iocb->ki_filp);
	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
	ssize_t			ret = 0;

	XFS_STATS_INC(mp, xs_read_calls);

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	if (IS_DAX(inode))
		ret = xfs_file_dax_read(iocb, to);
	else if (iocb->ki_flags & IOCB_DIRECT)
		ret = xfs_file_dio_aio_read(iocb, to);
	else
		ret = xfs_file_buffered_aio_read(iocb, to);

	if (ret > 0)
		XFS_STATS_ADD(mp, xs_read_bytes, ret);

	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
	return ret;
}

@@ -346,18 +408,14 @@ xfs_file_splice_read(
	unsigned int		flags)
{
	struct xfs_inode	*ip = XFS_I(infilp->f_mapping->host);
	int			ioflags = 0;
	ssize_t			ret;

	XFS_STATS_INC(ip->i_mount, xs_read_calls);

	if (infilp->f_mode & FMODE_NOCMTIME)
		ioflags |= XFS_IO_INVIS;

	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return -EIO;

	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
	trace_xfs_file_splice_read(ip, count, *ppos);

	/*
	 * DAX inodes cannot ues the page cache for splice, so we have to push
@@ -553,8 +611,7 @@ xfs_file_dio_aio_write(
					mp->m_rtdev_targp : mp->m_ddev_targp;

	/* DIO must be aligned to device logical sector size */
	if (!IS_DAX(inode) &&
	    ((iocb->ki_pos | count) & target->bt_logical_sectormask))
	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
		return -EINVAL;

	/* "unaligned" here means not aligned to a filesystem block */
@@ -593,7 +650,7 @@ xfs_file_dio_aio_write(
	end = iocb->ki_pos + count - 1;

	/*
	 * See xfs_file_read_iter() for why we do a full-file flush here.
	 * See xfs_file_dio_aio_read() for why we do a full-file flush here.
	 */
	if (mapping->nrpages) {
		ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
@@ -620,10 +677,12 @@ xfs_file_dio_aio_write(
		iolock = XFS_IOLOCK_SHARED;
	}

	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);

	data = *from;
	ret = mapping->a_ops->direct_IO(iocb, &data);
	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
			xfs_get_blocks_direct, xfs_end_io_direct_write,
			NULL, DIO_ASYNC_EXTEND);

	/* see generic_file_direct_write() for why this is necessary */
	if (mapping->nrpages) {
@@ -640,10 +699,70 @@ xfs_file_dio_aio_write(
	xfs_rw_iunlock(ip, iolock);

	/*
	 * No fallback to buffered IO on errors for XFS. DAX can result in
	 * partial writes, but direct IO will either complete fully or fail.
	 * No fallback to buffered IO on errors for XFS, direct IO will either
	 * complete fully or fail.
	 */
	ASSERT(ret < 0 || ret == count);
	return ret;
}

STATIC ssize_t
xfs_file_dax_write(
	struct kiocb		*iocb,
	struct iov_iter		*from)
{
	struct address_space	*mapping = iocb->ki_filp->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	ssize_t			ret = 0;
	int			unaligned_io = 0;
	int			iolock;
	struct iov_iter		data;

	/* "unaligned" here means not aligned to a filesystem block */
	if ((iocb->ki_pos & mp->m_blockmask) ||
	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
		unaligned_io = 1;
		iolock = XFS_IOLOCK_EXCL;
	} else if (mapping->nrpages) {
		iolock = XFS_IOLOCK_EXCL;
	} else {
		iolock = XFS_IOLOCK_SHARED;
	}
	xfs_rw_ilock(ip, iolock);

	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
	if (ret)
		goto out;

	/*
	 * Yes, even DAX files can have page cache attached to them:  A zeroed
	 * page is inserted into the pagecache when we have to serve a write
	 * fault on a hole.  It should never be dirtied and can simply be
	 * dropped from the pagecache once we get real data for the page.
	 */
	ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
	if (mapping->nrpages) {
		ret = invalidate_inode_pages2(mapping);
		WARN_ON_ONCE(ret);
	}

	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
		iolock = XFS_IOLOCK_SHARED;
	}

	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);

	data = *from;
	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
			xfs_end_io_direct_write, 0);
	if (ret > 0) {
		iocb->ki_pos += ret;
		iov_iter_advance(from, ret);
	}
out:
	xfs_rw_iunlock(ip, iolock);
	return ret;
}

@@ -670,8 +789,7 @@ xfs_file_buffered_aio_write(
	current->backing_dev_info = inode_to_bdi(inode);

write_retry:
	trace_xfs_file_buffered_write(ip, iov_iter_count(from),
				      iocb->ki_pos, 0);
	trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
	ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
	if (likely(ret >= 0))
		iocb->ki_pos += ret;
@@ -726,7 +844,9 @@ xfs_file_write_iter(
	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return -EIO;

	if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
	if (IS_DAX(inode))
		ret = xfs_file_dax_write(iocb, from);
	else if (iocb->ki_flags & IOCB_DIRECT)
		ret = xfs_file_dio_aio_write(iocb, from);
	else
		ret = xfs_file_buffered_aio_write(iocb, from);
+0 −10
Original line number Diff line number Diff line
@@ -473,14 +473,4 @@ do { \

extern struct kmem_zone	*xfs_inode_zone;

/*
 * Flags for read/write calls
 */
#define XFS_IO_ISDIRECT	0x00001		/* bypass page cache */
#define XFS_IO_INVIS	0x00002		/* don't update inode timestamps */

#define XFS_IO_FLAGS \
	{ XFS_IO_ISDIRECT,	"DIRECT" }, \
	{ XFS_IO_INVIS,		"INVIS"}

#endif	/* __XFS_INODE_H__ */
+8 −14
Original line number Diff line number Diff line
@@ -595,13 +595,12 @@ xfs_attrmulti_by_handle(

int
xfs_ioc_space(
	struct xfs_inode	*ip,
	struct inode		*inode,
	struct file		*filp,
	int			ioflags,
	unsigned int		cmd,
	xfs_flock64_t		*bf)
{
	struct inode		*inode = file_inode(filp);
	struct xfs_inode	*ip = XFS_I(inode);
	struct iattr		iattr;
	enum xfs_prealloc_flags	flags = 0;
	uint			iolock = XFS_IOLOCK_EXCL;
@@ -626,7 +625,7 @@ xfs_ioc_space(

	if (filp->f_flags & O_DSYNC)
		flags |= XFS_PREALLOC_SYNC;
	if (ioflags & XFS_IO_INVIS)
	if (filp->f_mode & FMODE_NOCMTIME)
		flags |= XFS_PREALLOC_INVISIBLE;

	error = mnt_want_write_file(filp);
@@ -1464,8 +1463,7 @@ xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)

STATIC int
xfs_ioc_getbmap(
	struct xfs_inode	*ip,
	int			ioflags,
	struct file		*file,
	unsigned int		cmd,
	void			__user *arg)
{
@@ -1479,10 +1477,10 @@ xfs_ioc_getbmap(
		return -EINVAL;

	bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
	if (ioflags & XFS_IO_INVIS)
	if (file->f_mode & FMODE_NOCMTIME)
		bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;

	error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
	error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, xfs_getbmap_format,
			    (__force struct getbmap *)arg+1);
	if (error)
		return error;
@@ -1630,12 +1628,8 @@ xfs_file_ioctl(
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	void			__user *arg = (void __user *)p;
	int			ioflags = 0;
	int			error;

	if (filp->f_mode & FMODE_NOCMTIME)
		ioflags |= XFS_IO_INVIS;

	trace_xfs_file_ioctl(ip);

	switch (cmd) {
@@ -1654,7 +1648,7 @@ xfs_file_ioctl(

		if (copy_from_user(&bf, arg, sizeof(bf)))
			return -EFAULT;
		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
		return xfs_ioc_space(filp, cmd, &bf);
	}
	case XFS_IOC_DIOINFO: {
		struct dioattr	da;
@@ -1713,7 +1707,7 @@ xfs_file_ioctl(

	case XFS_IOC_GETBMAP:
	case XFS_IOC_GETBMAPA:
		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
		return xfs_ioc_getbmap(filp, cmd, arg);

	case XFS_IOC_GETBMAPX:
		return xfs_ioc_getbmapx(ip, arg);
Loading