Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cdd1fedf authored by Dan Fuhry's avatar Dan Fuhry Committed by Filipe Manana
Browse files

btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT



Two new flags, RENAME_EXCHANGE and RENAME_WHITEOUT, provide for new
behavior in the renameat2() syscall. This behavior is primarily used by
overlayfs. This patch adds support for these flags to btrfs, enabling it to
be used as a fully functional upper layer for overlayfs.

RENAME_EXCHANGE support was written by Davide Italiano originally
submitted on 2 April 2015.

Signed-off-by: default avatarDavide Italiano <dccitaliano@gmail.com>
Signed-off-by: default avatarDan Fuhry <dfuhry@datto.com>
[ remove unlikely ]
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
parent c4aba954
Loading
Loading
Loading
Loading
+257 −7
Original line number Diff line number Diff line
@@ -9394,8 +9394,244 @@ static int btrfs_getattr(struct vfsmount *mnt,
	return 0;
}

static int btrfs_rename_exchange(struct inode *old_dir,
			      struct dentry *old_dentry,
			      struct inode *new_dir,
			      struct dentry *new_dentry)
{
	struct btrfs_trans_handle *trans;
	struct btrfs_root *root = BTRFS_I(old_dir)->root;
	struct btrfs_root *dest = BTRFS_I(new_dir)->root;
	struct inode *new_inode = new_dentry->d_inode;
	struct inode *old_inode = old_dentry->d_inode;
	struct timespec ctime = CURRENT_TIME;
	struct dentry *parent;
	u64 old_ino = btrfs_ino(old_inode);
	u64 new_ino = btrfs_ino(new_inode);
	u64 old_idx = 0;
	u64 new_idx = 0;
	u64 root_objectid;
	int ret;

	/* we only allow rename subvolume link between subvolumes */
	if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
		return -EXDEV;

	/* close the race window with snapshot create/destroy ioctl */
	if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
		down_read(&root->fs_info->subvol_sem);
	if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
		down_read(&dest->fs_info->subvol_sem);

	/*
	 * We want to reserve the absolute worst case amount of items.  So if
	 * both inodes are subvols and we need to unlink them then that would
	 * require 4 item modifications, but if they are both normal inodes it
	 * would require 5 item modifications, so we'll assume their normal
	 * inodes.  So 5 * 2 is 10, plus 2 for the new links, so 12 total items
	 * should cover the worst case number of items we'll modify.
	 */
	trans = btrfs_start_transaction(root, 12);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out_notrans;
	}

	/*
	 * We need to find a free sequence number both in the source and
	 * in the destination directory for the exchange.
	 */
	ret = btrfs_set_inode_index(new_dir, &old_idx);
	if (ret)
		goto out_fail;
	ret = btrfs_set_inode_index(old_dir, &new_idx);
	if (ret)
		goto out_fail;

	BTRFS_I(old_inode)->dir_index = 0ULL;
	BTRFS_I(new_inode)->dir_index = 0ULL;

	/* Reference for the source. */
	if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
		/* force full log commit if subvolume involved. */
		btrfs_set_log_full_commit(root->fs_info, trans);
	} else {
		ret = btrfs_insert_inode_ref(trans, dest,
					     new_dentry->d_name.name,
					     new_dentry->d_name.len,
					     old_ino,
					     btrfs_ino(new_dir), old_idx);
		if (ret)
			goto out_fail;
		btrfs_pin_log_trans(root);
	}

	/* And now for the dest. */
	if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
		/* force full log commit if subvolume involved. */
		btrfs_set_log_full_commit(dest->fs_info, trans);
	} else {
		ret = btrfs_insert_inode_ref(trans, root,
					     old_dentry->d_name.name,
					     old_dentry->d_name.len,
					     new_ino,
					     btrfs_ino(old_dir), new_idx);
		if (ret)
			goto out_fail;
		btrfs_pin_log_trans(dest);
	}

	/* Update inode version and ctime/mtime. */
	inode_inc_iversion(old_dir);
	inode_inc_iversion(new_dir);
	inode_inc_iversion(old_inode);
	inode_inc_iversion(new_inode);
	old_dir->i_ctime = old_dir->i_mtime = ctime;
	new_dir->i_ctime = new_dir->i_mtime = ctime;
	old_inode->i_ctime = ctime;
	new_inode->i_ctime = ctime;

	if (old_dentry->d_parent != new_dentry->d_parent) {
		btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
		btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
	}

	/* src is a subvolume */
	if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
		root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
		ret = btrfs_unlink_subvol(trans, root, old_dir,
					  root_objectid,
					  old_dentry->d_name.name,
					  old_dentry->d_name.len);
	} else { /* src is an inode */
		ret = __btrfs_unlink_inode(trans, root, old_dir,
					   old_dentry->d_inode,
					   old_dentry->d_name.name,
					   old_dentry->d_name.len);
		if (!ret)
			ret = btrfs_update_inode(trans, root, old_inode);
	}
	if (ret) {
		btrfs_abort_transaction(trans, root, ret);
		goto out_fail;
	}

	/* dest is a subvolume */
	if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
		root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
		ret = btrfs_unlink_subvol(trans, dest, new_dir,
					  root_objectid,
					  new_dentry->d_name.name,
					  new_dentry->d_name.len);
	} else { /* dest is an inode */
		ret = __btrfs_unlink_inode(trans, dest, new_dir,
					   new_dentry->d_inode,
					   new_dentry->d_name.name,
					   new_dentry->d_name.len);
		if (!ret)
			ret = btrfs_update_inode(trans, dest, new_inode);
	}
	if (ret) {
		btrfs_abort_transaction(trans, root, ret);
		goto out_fail;
	}

	ret = btrfs_add_link(trans, new_dir, old_inode,
			     new_dentry->d_name.name,
			     new_dentry->d_name.len, 0, old_idx);
	if (ret) {
		btrfs_abort_transaction(trans, root, ret);
		goto out_fail;
	}

	ret = btrfs_add_link(trans, old_dir, new_inode,
			     old_dentry->d_name.name,
			     old_dentry->d_name.len, 0, new_idx);
	if (ret) {
		btrfs_abort_transaction(trans, root, ret);
		goto out_fail;
	}

	if (old_inode->i_nlink == 1)
		BTRFS_I(old_inode)->dir_index = old_idx;
	if (new_inode->i_nlink == 1)
		BTRFS_I(new_inode)->dir_index = new_idx;

	if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
		parent = new_dentry->d_parent;
		btrfs_log_new_name(trans, old_inode, old_dir, parent);
		btrfs_end_log_trans(root);
	}
	if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
		parent = old_dentry->d_parent;
		btrfs_log_new_name(trans, new_inode, new_dir, parent);
		btrfs_end_log_trans(dest);
	}
out_fail:
	ret = btrfs_end_transaction(trans, root);
out_notrans:
	if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
		up_read(&dest->fs_info->subvol_sem);
	if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
		up_read(&root->fs_info->subvol_sem);

	return ret;
}

static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
				     struct btrfs_root *root,
				     struct inode *dir,
				     struct dentry *dentry)
{
	int ret;
	struct inode *inode;
	u64 objectid;
	u64 index;

	ret = btrfs_find_free_ino(root, &objectid);
	if (ret)
		return ret;

	inode = btrfs_new_inode(trans, root, dir,
				dentry->d_name.name,
				dentry->d_name.len,
				btrfs_ino(dir),
				objectid,
				S_IFCHR | WHITEOUT_MODE,
				&index);

	if (IS_ERR(inode)) {
		ret = PTR_ERR(inode);
		return ret;
	}

	inode->i_op = &btrfs_special_inode_operations;
	init_special_inode(inode, inode->i_mode,
		WHITEOUT_DEV);

	ret = btrfs_init_inode_security(trans, inode, dir,
				&dentry->d_name);
	if (ret)
		return ret;

	ret = btrfs_add_nondir(trans, dir, dentry,
				inode, 0, index);
	if (ret)
		return ret;

	ret = btrfs_update_inode(trans, root, inode);
	if (ret)
		return ret;

	unlock_new_inode(inode);
	iput(inode);

	return 0;
}

static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
			   struct inode *new_dir, struct dentry *new_dentry)
			   struct inode *new_dir, struct dentry *new_dentry,
			   unsigned int flags)
{
	struct btrfs_trans_handle *trans;
	struct btrfs_root *root = BTRFS_I(old_dir)->root;
@@ -9457,7 +9693,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
	 * We want to reserve the absolute worst case amount of items.  So if
	 * both inodes are subvols and we need to unlink them then that would
	 * require 4 item modifications, but if they are both normal inodes it
	 * would require 5 item modifications, so we'll assume their normal
	 * would require 5 item modifications, so we'll assume they are normal
	 * inodes.  So 5 * 2 is 10, plus 1 for the new link, so 11 total items
	 * should cover the worst case number of items we'll modify.
	 */
@@ -9561,6 +9797,16 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
		btrfs_end_log_trans(root);
		log_pinned = false;
	}

	if (flags & RENAME_WHITEOUT) {
		ret = btrfs_whiteout_for_rename(trans, root, old_dir,
						old_dentry);

		if (ret) {
			btrfs_abort_transaction(trans, root, ret);
			goto out_fail;
		}
	}
out_fail:
	/*
	 * If we have pinned the log and an error happened, we unpin tasks
@@ -9596,10 +9842,14 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
			 struct inode *new_dir, struct dentry *new_dentry,
			 unsigned int flags)
{
	if (flags & ~RENAME_NOREPLACE)
	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
		return -EINVAL;

	return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
	if (flags & RENAME_EXCHANGE)
		return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
					  new_dentry);

	return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}

static void btrfs_run_delalloc_work(struct btrfs_work *work)