Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c2aa1a44 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'xfs-4.20-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull vfs dedup fixes from Dave Chinner:
 "This reworks the vfs data cloning infrastructure.

  We discovered many issues with these interfaces late in the 4.19 cycle
  - the worst of them (data corruption, setuid stripping) were fixed for
  XFS in 4.19-rc8, but a larger rework of the infrastructure fixing all
  the problems was needed. That rework is the contents of this pull
  request.

  Rework the vfs_clone_file_range and vfs_dedupe_file_range
  infrastructure to use a common .remap_file_range method and supply
  generic bounds and sanity checking functions that are shared with the
  data write path. The current VFS infrastructure has problems with
  rlimit, LFS file sizes, file time stamps, maximum filesystem file
  sizes, stripping setuid bits, etc and so they are addressed in these
  commits.

  We also introduce the ability for the ->remap_file_range methods to
  return short clones so that clones for vfs_copy_file_range() don't get
  rejected if the entire range can't be cloned. It also allows
  filesystems to sliently skip deduplication of partial EOF blocks if
  they are not capable of doing so without requiring errors to be thrown
  to userspace.

  Existing filesystems are converted to user the new remap_file_range
  method, and both XFS and ocfs2 are modified to make use of the new
  generic checking infrastructure"

* tag 'xfs-4.20-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (28 commits)
  xfs: remove [cm]time update from reflink calls
  xfs: remove xfs_reflink_remap_range
  xfs: remove redundant remap partial EOF block checks
  xfs: support returning partial reflink results
  xfs: clean up xfs_reflink_remap_blocks call site
  xfs: fix pagecache truncation prior to reflink
  ocfs2: remove ocfs2_reflink_remap_range
  ocfs2: support partial clone range and dedupe range
  ocfs2: fix pagecache truncation prior to reflink
  ocfs2: truncate page cache for clone destination file before remapping
  vfs: clean up generic_remap_file_range_prep return value
  vfs: hide file range comparison function
  vfs: enable remap callers that can handle short operations
  vfs: plumb remap flags through the vfs dedupe functions
  vfs: plumb remap flags through the vfs clone functions
  vfs: make remap_file_range functions take and return bytes completed
  vfs: remap helper should update destination inode metadata
  vfs: pass remap flags to generic_remap_checks
  vfs: pass remap flags to generic_remap_file_range_prep
  vfs: combine the clone and dedupe into a single remap_file_range
  ...
parents b69f9e17 bf4a1fcf
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -623,6 +623,11 @@ in your dentry operations instead.
	On success you get a new struct file sharing the mount/dentry with the
	original, on failure - ERR_PTR().
--
[mandatory]
	->clone_file_range() and ->dedupe_file_range have been replaced with
	->remap_file_range().  See Documentation/filesystems/vfs.txt for more
	information.
--
[recommended]
	->lookup() instances doing an equivalent of
		if (IS_ERR(inode))
+15 −7
Original line number Diff line number Diff line
@@ -883,8 +883,9 @@ struct file_operations {
	unsigned (*mmap_capabilities)(struct file *);
#endif
	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
	loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
				   struct file *file_out, loff_t pos_out,
				   loff_t len, unsigned int remap_flags);
	int (*fadvise)(struct file *, loff_t, loff_t, int);
};

@@ -960,11 +961,18 @@ otherwise noted.

  copy_file_range: called by the copy_file_range(2) system call.

  clone_file_range: called by the ioctl(2) system call for FICLONERANGE and
	FICLONE commands.

  dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
	command.
  remap_file_range: called by the ioctl(2) system call for FICLONERANGE and
	FICLONE and FIDEDUPERANGE commands to remap file ranges.  An
	implementation should remap len bytes at pos_in of the source file into
	the dest file at pos_out.  Implementations must handle callers passing
	in len == 0; this means "remap to the end of the source file".  The
	return value should the number of bytes remapped, or the usual
	negative error code if errors occurred before any bytes were remapped.
	The remap_flags parameter accepts REMAP_FILE_* flags.  If
	REMAP_FILE_DEDUP is set then the implementation must only remap if the
	requested file ranges have identical contents.  If REMAP_CAN_SHORTEN is
	set, the caller is ok with the implementation shortening the request
	length to satisfy alignment or EOF requirements (or any other reason).

  fadvise: possibly called by the fadvise64() system call.

+3 −5
Original line number Diff line number Diff line
@@ -3201,9 +3201,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
				struct btrfs_ioctl_space_info *space);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
			       struct btrfs_ioctl_balance_args *bargs);
int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
			    struct file *dst_file, loff_t dst_loff,
			    u64 olen);

/* file.c */
int __init btrfs_auto_defrag_init(void);
@@ -3233,8 +3230,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
		      size_t num_pages, loff_t pos, size_t write_bytes,
		      struct extent_state **cached);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
			   struct file *file_out, loff_t pos_out, u64 len);
loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
			      struct file *file_out, loff_t pos_out,
			      loff_t len, unsigned int remap_flags);

/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
+1 −2
Original line number Diff line number Diff line
@@ -3298,8 +3298,7 @@ const struct file_operations btrfs_file_operations = {
#ifdef CONFIG_COMPAT
	.compat_ioctl	= btrfs_compat_ioctl,
#endif
	.clone_file_range = btrfs_clone_file_range,
	.dedupe_file_range = btrfs_dedupe_file_range,
	.remap_file_range = btrfs_remap_file_range,
};

void __cold btrfs_auto_defrag_exit(void)
+27 −23
Original line number Diff line number Diff line
@@ -3629,26 +3629,6 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
	return ret;
}

int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
			    struct file *dst_file, loff_t dst_loff,
			    u64 olen)
{
	struct inode *src = file_inode(src_file);
	struct inode *dst = file_inode(dst_file);
	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;

	if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
		/*
		 * Btrfs does not support blocksize < page_size. As a
		 * result, btrfs_cmp_data() won't correctly handle
		 * this situation without an update.
		 */
		return -EINVAL;
	}

	return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
}

static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
				     struct inode *inode,
				     u64 endoff,
@@ -4350,10 +4330,34 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
	return ret;
}

int btrfs_clone_file_range(struct file *src_file, loff_t off,
		struct file *dst_file, loff_t destoff, u64 len)
loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
		struct file *dst_file, loff_t destoff, loff_t len,
		unsigned int remap_flags)
{
	return btrfs_clone_files(dst_file, src_file, off, len, destoff);
	int ret;

	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
		return -EINVAL;

	if (remap_flags & REMAP_FILE_DEDUP) {
		struct inode *src = file_inode(src_file);
		struct inode *dst = file_inode(dst_file);
		u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;

		if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
			/*
			 * Btrfs does not support blocksize < page_size. As a
			 * result, btrfs_cmp_data() won't correctly handle
			 * this situation without an update.
			 */
			return -EINVAL;
		}

		ret = btrfs_extent_same(src, off, len, dst, destoff);
	} else {
		ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
	}
	return ret < 0 ? ret : len;
}

static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
Loading