Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f959492f authored by Filipe Manana's avatar Filipe Manana Committed by Chris Mason
Browse files

Btrfs: send, fix more issues related to directory renames



This is a continuation of the previous changes titled:

   Btrfs: fix incremental send's decision to delay a dir move/rename
   Btrfs: part 2, fix incremental send's decision to delay a dir move/rename

There's a few more cases where a directory rename/move must be delayed which was
previously overlooked. If our immediate ancestor has a lower inode number than
ours and it doesn't have a delayed rename/move operation associated to it, it
doesn't mean there isn't any non-direct ancestor of our current inode that needs
to be renamed/moved before our current inode (i.e. with a higher inode number
than ours).

So we can't stop the search if our immediate ancestor has a lower inode number than
ours, we need to navigate the directory hierarchy upwards until we hit the root or:

1) find an ancestor with an higher inode number that was renamed/moved in the send
   root too (or already has a pending rename/move registered);
2) find an ancestor that is a new directory (higher inode number than ours and
   exists only in the send root).

Reproducer for case 1)

    $ mkfs.btrfs -f /dev/sdd
    $ mount /dev/sdd /mnt

    $ mkdir -p /mnt/a/b
    $ mkdir -p /mnt/a/c/d
    $ mkdir /mnt/a/b/e
    $ mkdir /mnt/a/c/d/f
    $ mv /mnt/a/b /mnt/a/c/d/2b
    $ mkdir /mnt/a/x
    $ mkdir /mnt/a/y

    $ btrfs subvolume snapshot -r /mnt /mnt/snap1
    $ btrfs send /mnt/snap1 -f /tmp/base.send

    $ mv /mnt/a/x /mnt/a/y
    $ mv /mnt/a/c/d/2b/e /mnt/a/c/d/2b/2e
    $ mv /mnt/a/c/d /mnt/a/h/2d
    $ mv /mnt/a/c /mnt/a/h/2d/2b/2c

    $ btrfs subvolume snapshot -r /mnt /mnt/snap2
    $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/incremental.send

Simple reproducer for case 2)

    $ mkfs.btrfs -f /dev/sdd
    $ mount /dev/sdd /mnt

    $ mkdir -p /mnt/a/b
    $ mkdir /mnt/a/c
    $ mv /mnt/a/b /mnt/a/c/b2
    $ mkdir /mnt/a/e

    $ btrfs subvolume snapshot -r /mnt /mnt/snap1
    $ btrfs send /mnt/snap1 -f /tmp/base.send

    $ mv /mnt/a/c/b2 /mnt/a/e/b3
    $ mkdir /mnt/a/e/b3/f
    $ mkdir /mnt/a/h
    $ mv /mnt/a/c /mnt/a/e/b3/f/c2
    $ mv /mnt/a/e /mnt/a/h/e2

    $ btrfs subvolume snapshot -r /mnt /mnt/snap2
    $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/incremental.send

Another simple reproducer for case 2)

    $ mkfs.btrfs -f /dev/sdd
    $ mount /dev/sdd /mnt

    $ mkdir -p /mnt/a/b
    $ mkdir /mnt/a/c
    $ mkdir /mnt/a/b/d
    $ mkdir /mnt/a/c/e

    $ btrfs subvolume snapshot -r /mnt /mnt/snap1
    $ btrfs send /mnt/snap1 -f /tmp/base.send

    $ mkdir /mnt/a/b/d/f
    $ mkdir /mnt/a/b/g
    $ mv /mnt/a/c/e /mnt/a/b/g/e2
    $ mv /mnt/a/c /mnt/a/b/d/f/c2
    $ mv /mnt/a/b/d/f /mnt/a/b/g/e2/f2

    $ btrfs subvolume snapshot -r /mnt /mnt/snap2
    $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/incremental.send

More complex reproducer for case 2)

    $ mkfs.btrfs -f /dev/sdd
    $ mount /dev/sdd /mnt

    $ mkdir -p /mnt/a/b
    $ mkdir -p /mnt/a/c/d
    $ mkdir /mnt/a/b/e
    $ mkdir /mnt/a/c/d/f
    $ mv /mnt/a/b /mnt/a/c/d/2b
    $ mkdir /mnt/a/x
    $ mkdir /mnt/a/y

    $ btrfs subvolume snapshot -r /mnt /mnt/snap1
    $ btrfs send /mnt/snap1 -f /tmp/base.send

    $ mv /mnt/a/x /mnt/a/y
    $ mv /mnt/a/c/d/2b/e /mnt/a/c/d/2b/2e
    $ mv /mnt/a/c/d /mnt/a/h/2d
    $ mv /mnt/a/c /mnt/a/h/2d/2b/2c

    $ btrfs subvolume snapshot -r /mnt /mnt/snap2
    $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/incremental.send

For both cases the incremental send would enter an infinite loop when building
path strings.

While solving these cases, this change also re-implements the code to detect
when directory moves/renames should be delayed. Instead of dealing with several
specific cases separately, it's now more generic handling all cases with a simple
detection algorithm and if when applying a delayed move/rename there's a path loop
detected, it further delays the move/rename registering a new ancestor inode as
the dependency inode (so our rename happens after that ancestor is renamed).

Tests for these cases is being added to xfstests too.

Signed-off-by: default avatarFilipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent a10c4076
Loading
Loading
Loading
Loading
+96 −94
Original line number Original line Diff line number Diff line
@@ -2940,7 +2940,9 @@ static void free_waiting_dir_move(struct send_ctx *sctx,
static int add_pending_dir_move(struct send_ctx *sctx,
static int add_pending_dir_move(struct send_ctx *sctx,
				u64 ino,
				u64 ino,
				u64 ino_gen,
				u64 ino_gen,
				u64 parent_ino)
				u64 parent_ino,
				struct list_head *new_refs,
				struct list_head *deleted_refs)
{
{
	struct rb_node **p = &sctx->pending_dir_moves.rb_node;
	struct rb_node **p = &sctx->pending_dir_moves.rb_node;
	struct rb_node *parent = NULL;
	struct rb_node *parent = NULL;
@@ -2972,12 +2974,12 @@ static int add_pending_dir_move(struct send_ctx *sctx,
		}
		}
	}
	}


	list_for_each_entry(cur, &sctx->deleted_refs, list) {
	list_for_each_entry(cur, deleted_refs, list) {
		ret = dup_ref(cur, &pm->update_refs);
		ret = dup_ref(cur, &pm->update_refs);
		if (ret < 0)
		if (ret < 0)
			goto out;
			goto out;
	}
	}
	list_for_each_entry(cur, &sctx->new_refs, list) {
	list_for_each_entry(cur, new_refs, list) {
		ret = dup_ref(cur, &pm->update_refs);
		ret = dup_ref(cur, &pm->update_refs);
		if (ret < 0)
		if (ret < 0)
			goto out;
			goto out;
@@ -3020,6 +3022,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
	return NULL;
	return NULL;
}
}


static int path_loop(struct send_ctx *sctx, struct fs_path *name,
		     u64 ino, u64 gen, u64 *ancestor_ino)
{
	int ret = 0;
	u64 parent_inode = 0;
	u64 parent_gen = 0;
	u64 start_ino = ino;

	*ancestor_ino = 0;
	while (ino != BTRFS_FIRST_FREE_OBJECTID) {
		fs_path_reset(name);

		if (is_waiting_for_rm(sctx, ino))
			break;
		if (is_waiting_for_move(sctx, ino)) {
			if (*ancestor_ino == 0)
				*ancestor_ino = ino;
			ret = get_first_ref(sctx->parent_root, ino,
					    &parent_inode, &parent_gen, name);
		} else {
			ret = __get_cur_name_and_parent(sctx, ino, gen,
							&parent_inode,
							&parent_gen, name);
			if (ret > 0) {
				ret = 0;
				break;
			}
		}
		if (ret < 0)
			break;
		if (parent_inode == start_ino) {
			ret = 1;
			if (*ancestor_ino == 0)
				*ancestor_ino = ino;
			break;
		}
		ino = parent_inode;
		gen = parent_gen;
	}
	return ret;
}

static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
{
{
	struct fs_path *from_path = NULL;
	struct fs_path *from_path = NULL;
@@ -3031,6 +3075,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
	struct waiting_dir_move *dm = NULL;
	struct waiting_dir_move *dm = NULL;
	u64 rmdir_ino = 0;
	u64 rmdir_ino = 0;
	int ret;
	int ret;
	u64 ancestor = 0;


	name = fs_path_alloc();
	name = fs_path_alloc();
	from_path = fs_path_alloc();
	from_path = fs_path_alloc();
@@ -3057,11 +3102,25 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
	if (ret < 0)
	if (ret < 0)
		goto out;
		goto out;


	sctx->send_progress = sctx->cur_ino + 1;
	ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
	if (ret) {
		LIST_HEAD(deleted_refs);
		ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
		ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
					   &pm->update_refs, &deleted_refs);
		if (ret < 0)
			goto out;
		if (rmdir_ino) {
			dm = get_waiting_dir_move(sctx, pm->ino);
			ASSERT(dm);
			dm->rmdir_ino = rmdir_ino;
		}
		goto out;
	}
	fs_path_reset(name);
	fs_path_reset(name);
	to_path = name;
	to_path = name;
	name = NULL;
	name = NULL;

	sctx->send_progress = sctx->cur_ino + 1;
	ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
	ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
	if (ret < 0)
	if (ret < 0)
		goto out;
		goto out;
@@ -3185,127 +3244,74 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
static int wait_for_parent_move(struct send_ctx *sctx,
static int wait_for_parent_move(struct send_ctx *sctx,
				struct recorded_ref *parent_ref)
				struct recorded_ref *parent_ref)
{
{
	int ret;
	int ret = 0;
	u64 ino = parent_ref->dir;
	u64 ino = parent_ref->dir;
	u64 parent_ino_before, parent_ino_after;
	u64 parent_ino_before, parent_ino_after;
	u64 old_gen;
	struct fs_path *path_before = NULL;
	struct fs_path *path_before = NULL;
	struct fs_path *path_after = NULL;
	struct fs_path *path_after = NULL;
	int len1, len2;
	int len1, len2;
	int register_upper_dirs;
	u64 gen;

	if (is_waiting_for_move(sctx, ino))
		return 1;

	if (parent_ref->dir <= sctx->cur_ino)
		return 0;

	ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen,
			     NULL, NULL, NULL, NULL);
	if (ret == -ENOENT)
		return 0;
	else if (ret < 0)
		return ret;

	if (parent_ref->dir_gen != old_gen)
		return 0;

	path_before = fs_path_alloc();
	if (!path_before)
		return -ENOMEM;

	ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
			    NULL, path_before);
	if (ret == -ENOENT) {
		ret = 0;
		goto out;
	} else if (ret < 0) {
		goto out;
	}


	path_after = fs_path_alloc();
	path_after = fs_path_alloc();
	if (!path_after) {
	path_before = fs_path_alloc();
	if (!path_after || !path_before) {
		ret = -ENOMEM;
		ret = -ENOMEM;
		goto out;
		goto out;
	}
	}


	ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
			    &gen, path_after);
	if (ret == -ENOENT) {
		ret = 0;
		goto out;
	} else if (ret < 0) {
		goto out;
	}

	len1 = fs_path_len(path_before);
	len2 = fs_path_len(path_after);
	if (parent_ino_before != parent_ino_after || len1 != len2 ||
	     memcmp(path_before->start, path_after->start, len1)) {
		ret = 1;
		goto out;
	}
	ret = 0;

	/*
	/*
	 * Ok, our new most direct ancestor has a higher inode number but
	 * Our current directory inode may not yet be renamed/moved because some
	 * wasn't moved/renamed. So maybe some of the new ancestors higher in
	 * ancestor (immediate or not) has to be renamed/moved first. So find if
	 * the hierarchy have an higher inode number too *and* were renamed
	 * such ancestor exists and make sure our own rename/move happens after
	 * or moved - in this case we need to wait for the ancestor's rename
	 * that ancestor is processed.
	 * or move operation before we can do the move/rename for the current
	 * inode.
	 */
	 */
	register_upper_dirs = 0;
	while (ino > BTRFS_FIRST_FREE_OBJECTID) {
	ino = parent_ino_after;
		if (is_waiting_for_move(sctx, ino)) {
again:
			ret = 1;
	while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) {
			break;
		u64 parent_gen;
		}


		fs_path_reset(path_before);
		fs_path_reset(path_before);
		fs_path_reset(path_after);
		fs_path_reset(path_after);


		ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
		ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
				    &parent_gen, path_after);
				    NULL, path_after);
		if (ret < 0)
		if (ret < 0)
			goto out;
			goto out;
		ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
		ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
				    NULL, path_before);
				    NULL, path_before);
		if (ret == -ENOENT) {
		if (ret < 0 && ret != -ENOENT) {
			ret = 0;
			break;
		} else if (ret < 0) {
			goto out;
			goto out;
		} else if (ret == -ENOENT) {
			ret = 1;
			break;
		}
		}


		len1 = fs_path_len(path_before);
		len1 = fs_path_len(path_before);
		len2 = fs_path_len(path_after);
		len2 = fs_path_len(path_after);
		if (parent_ino_before != parent_ino_after || len1 != len2 ||
		if (ino > sctx->cur_ino &&
		    memcmp(path_before->start, path_after->start, len1)) {
		    (parent_ino_before != parent_ino_after || len1 != len2 ||
		     memcmp(path_before->start, path_after->start, len1))) {
			ret = 1;
			ret = 1;
			if (register_upper_dirs) {
			break;
			break;
			} else {
				register_upper_dirs = 1;
				ino = parent_ref->dir;
				gen = parent_ref->dir_gen;
				goto again;
		}
		}
		} else if (register_upper_dirs) {
			ret = add_pending_dir_move(sctx, ino, gen,
						   parent_ino_after);
			if (ret < 0 && ret != -EEXIST)
				goto out;
		}

		ino = parent_ino_after;
		ino = parent_ino_after;
		gen = parent_gen;
	}
	}


out:
out:
	fs_path_free(path_before);
	fs_path_free(path_before);
	fs_path_free(path_after);
	fs_path_free(path_after);


	if (ret == 1) {
		ret = add_pending_dir_move(sctx,
					   sctx->cur_ino,
					   sctx->cur_inode_gen,
					   ino,
					   &sctx->new_refs,
					   &sctx->deleted_refs);
		if (!ret)
			ret = 1;
	}

	return ret;
	return ret;
}
}


@@ -3466,10 +3472,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
				if (ret < 0)
				if (ret < 0)
					goto out;
					goto out;
				if (ret) {
				if (ret) {
					ret = add_pending_dir_move(sctx,
							   sctx->cur_ino,
							   sctx->cur_inode_gen,
							   cur->dir);
					*pending_move = 1;
					*pending_move = 1;
				} else {
				} else {
					ret = send_rename(sctx, valid_path,
					ret = send_rename(sctx, valid_path,