Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cc714660 authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: add dedupe range vfs function



Define a VFS function which allows userspace to request that the
kernel reflink a range of blocks between two files if the ranges'
contents match.  The function fits the new VFS ioctl that standardizes
the checking for the btrfs EXTENT SAME ioctl.

Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 9fe26045
Loading
Loading
Loading
Loading
+44 −4
Original line number Original line Diff line number Diff line
@@ -1010,7 +1010,8 @@ xfs_file_share_range(
	loff_t		pos_in,
	loff_t		pos_in,
	struct file	*file_out,
	struct file	*file_out,
	loff_t		pos_out,
	loff_t		pos_out,
	u64		len)
	u64		len,
	bool		is_dedupe)
{
{
	struct inode	*inode_in;
	struct inode	*inode_in;
	struct inode	*inode_out;
	struct inode	*inode_out;
@@ -1019,6 +1020,7 @@ xfs_file_share_range(
	loff_t		isize;
	loff_t		isize;
	int		same_inode;
	int		same_inode;
	loff_t		blen;
	loff_t		blen;
	unsigned int	flags = 0;


	inode_in = file_inode(file_in);
	inode_in = file_inode(file_in);
	inode_out = file_inode(file_out);
	inode_out = file_inode(file_out);
@@ -1056,6 +1058,15 @@ xfs_file_share_range(
	    pos_in + len > isize)
	    pos_in + len > isize)
		return -EINVAL;
		return -EINVAL;


	/* Don't allow dedupe past EOF in the dest file */
	if (is_dedupe) {
		loff_t	disize;

		disize = i_size_read(inode_out);
		if (pos_out >= disize || pos_out + len > disize)
			return -EINVAL;
	}

	/* If we're linking to EOF, continue to the block boundary. */
	/* If we're linking to EOF, continue to the block boundary. */
	if (pos_in + len == isize)
	if (pos_in + len == isize)
		blen = ALIGN(isize, bs) - pos_in;
		blen = ALIGN(isize, bs) - pos_in;
@@ -1079,8 +1090,10 @@ xfs_file_share_range(
	if (ret)
	if (ret)
		goto out_unlock;
		goto out_unlock;


	if (is_dedupe)
		flags |= XFS_REFLINK_DEDUPE;
	ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
	ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
			pos_out, len);
			pos_out, len, flags);
	if (ret < 0)
	if (ret < 0)
		goto out_unlock;
		goto out_unlock;


@@ -1100,7 +1113,7 @@ xfs_file_copy_range(
	int		error;
	int		error;


	error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
	error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
				     len);
				     len, false);
	if (error)
	if (error)
		return error;
		return error;
	return len;
	return len;
@@ -1115,7 +1128,33 @@ xfs_file_clone_range(
	u64		len)
	u64		len)
{
{
	return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
	return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
				     len);
				     len, false);
}

#define XFS_MAX_DEDUPE_LEN	(16 * 1024 * 1024)
STATIC ssize_t
xfs_file_dedupe_range(
	struct file	*src_file,
	u64		loff,
	u64		len,
	struct file	*dst_file,
	u64		dst_loff)
{
	int		error;

	/*
	 * Limit the total length we will dedupe for each operation.
	 * This is intended to bound the total time spent in this
	 * ioctl to something sane.
	 */
	if (len > XFS_MAX_DEDUPE_LEN)
		len = XFS_MAX_DEDUPE_LEN;

	error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
				     len, true);
	if (error)
		return error;
	return len;
}
}


STATIC int
STATIC int
@@ -1779,6 +1818,7 @@ const struct file_operations xfs_file_operations = {
	.fallocate	= xfs_file_fallocate,
	.fallocate	= xfs_file_fallocate,
	.copy_file_range = xfs_file_copy_range,
	.copy_file_range = xfs_file_copy_range,
	.clone_file_range = xfs_file_clone_range,
	.clone_file_range = xfs_file_clone_range,
	.dedupe_file_range = xfs_file_dedupe_range,
};
};


const struct file_operations xfs_dir_file_operations = {
const struct file_operations xfs_dir_file_operations = {
+126 −1
Original line number Original line Diff line number Diff line
@@ -1149,6 +1149,111 @@ xfs_reflink_remap_blocks(
	return error;
	return error;
}
}


/*
 * Read a page's worth of file data into the page cache.  Return the page
 * locked.
 */
static struct page *
xfs_get_page(
	struct inode	*inode,
	xfs_off_t	offset)
{
	struct address_space	*mapping;
	struct page		*page;
	pgoff_t			n;

	n = offset >> PAGE_SHIFT;
	mapping = inode->i_mapping;
	page = read_mapping_page(mapping, n, NULL);
	if (IS_ERR(page))
		return page;
	if (!PageUptodate(page)) {
		put_page(page);
		return ERR_PTR(-EIO);
	}
	lock_page(page);
	return page;
}

/*
 * Compare extents of two files to see if they are the same.
 */
static int
xfs_compare_extents(
	struct inode	*src,
	xfs_off_t	srcoff,
	struct inode	*dest,
	xfs_off_t	destoff,
	xfs_off_t	len,
	bool		*is_same)
{
	xfs_off_t	src_poff;
	xfs_off_t	dest_poff;
	void		*src_addr;
	void		*dest_addr;
	struct page	*src_page;
	struct page	*dest_page;
	xfs_off_t	cmp_len;
	bool		same;
	int		error;

	error = -EINVAL;
	same = true;
	while (len) {
		src_poff = srcoff & (PAGE_SIZE - 1);
		dest_poff = destoff & (PAGE_SIZE - 1);
		cmp_len = min(PAGE_SIZE - src_poff,
			      PAGE_SIZE - dest_poff);
		cmp_len = min(cmp_len, len);
		ASSERT(cmp_len > 0);

		trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len,
				XFS_I(dest), destoff);

		src_page = xfs_get_page(src, srcoff);
		if (IS_ERR(src_page)) {
			error = PTR_ERR(src_page);
			goto out_error;
		}
		dest_page = xfs_get_page(dest, destoff);
		if (IS_ERR(dest_page)) {
			error = PTR_ERR(dest_page);
			unlock_page(src_page);
			put_page(src_page);
			goto out_error;
		}
		src_addr = kmap_atomic(src_page);
		dest_addr = kmap_atomic(dest_page);

		flush_dcache_page(src_page);
		flush_dcache_page(dest_page);

		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
			same = false;

		kunmap_atomic(dest_addr);
		kunmap_atomic(src_addr);
		unlock_page(dest_page);
		unlock_page(src_page);
		put_page(dest_page);
		put_page(src_page);

		if (!same)
			break;

		srcoff += cmp_len;
		destoff += cmp_len;
		len -= cmp_len;
	}

	*is_same = same;
	return 0;

out_error:
	trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_);
	return error;
}

/*
/*
 * Link a range of blocks from one file to another.
 * Link a range of blocks from one file to another.
 */
 */
@@ -1158,12 +1263,14 @@ xfs_reflink_remap_range(
	xfs_off_t		srcoff,
	xfs_off_t		srcoff,
	struct xfs_inode	*dest,
	struct xfs_inode	*dest,
	xfs_off_t		destoff,
	xfs_off_t		destoff,
	xfs_off_t		len)
	xfs_off_t		len,
	unsigned int		flags)
{
{
	struct xfs_mount	*mp = src->i_mount;
	struct xfs_mount	*mp = src->i_mount;
	xfs_fileoff_t		sfsbno, dfsbno;
	xfs_fileoff_t		sfsbno, dfsbno;
	xfs_filblks_t		fsblen;
	xfs_filblks_t		fsblen;
	int			error;
	int			error;
	bool			is_same;


	if (!xfs_sb_version_hasreflink(&mp->m_sb))
	if (!xfs_sb_version_hasreflink(&mp->m_sb))
		return -EOPNOTSUPP;
		return -EOPNOTSUPP;
@@ -1175,6 +1282,9 @@ xfs_reflink_remap_range(
	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
		return -EINVAL;
		return -EINVAL;


	if (flags & ~XFS_REFLINK_ALL)
		return -EINVAL;

	trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
	trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);


	/* Lock both files against IO */
	/* Lock both files against IO */
@@ -1186,6 +1296,21 @@ xfs_reflink_remap_range(
		xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
		xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
	}
	}


	/*
	 * Check that the extents are the same.
	 */
	if (flags & XFS_REFLINK_DEDUPE) {
		is_same = false;
		error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
				destoff, len, &is_same);
		if (error)
			goto out_error;
		if (!is_same) {
			error = -EBADE;
			goto out_error;
		}
	}

	error = xfs_reflink_set_inode_flag(src, dest);
	error = xfs_reflink_set_inode_flag(src, dest);
	if (error)
	if (error)
		goto out_error;
		goto out_error;
+4 −1
Original line number Original line Diff line number Diff line
@@ -43,7 +43,10 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
		xfs_off_t count);
		xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
#define XFS_REFLINK_DEDUPE	1	/* only reflink if contents match */
#define XFS_REFLINK_ALL		(XFS_REFLINK_DEDUPE)
extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
		struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len);
		struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
		unsigned int flags);


#endif /* __XFS_REFLINK_H */
#endif /* __XFS_REFLINK_H */