Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4f764e51 authored by Filipe Manana's avatar Filipe Manana Committed by Chris Mason
Browse files

Btrfs: remove deleted xattrs on fsync log replay



If we deleted xattrs from a file and fsynced the file, after a log replay
the xattrs would remain associated to the file. This was an unexpected
behaviour and differs from what other filesystems do, such as for example
xfs and ext3/4.

Fix this by, on fsync log replay, check if every xattr in the fs/subvol
tree (that belongs to a logged inode) has a matching xattr in the log,
and if it does not, delete it from the fs/subvol tree. This is a similar
approach to what we do for dentries when we replay a directory from the
fsync log.

This issue is trivial to reproduce, and the following excerpt from my
test for xfstests triggers the issue:

  _crash_and_mount()
  {
       # Simulate a crash/power loss.
       _load_flakey_table $FLAKEY_DROP_WRITES
       _unmount_flakey
       _load_flakey_table $FLAKEY_ALLOW_WRITES
       _mount_flakey
  }

  rm -f $seqres.full

  _scratch_mkfs >> $seqres.full 2>&1
  _init_flakey
  _mount_flakey

  # Create out test file and add 3 xattrs to it.
  touch $SCRATCH_MNT/foobar
  $SETFATTR_PROG -n user.attr1 -v val1 $SCRATCH_MNT/foobar
  $SETFATTR_PROG -n user.attr2 -v val2 $SCRATCH_MNT/foobar
  $SETFATTR_PROG -n user.attr3 -v val3 $SCRATCH_MNT/foobar

  # Make sure everything is durably persisted.
  sync

  # Now delete the second xattr and fsync the inode.
  $SETFATTR_PROG -x user.attr2 $SCRATCH_MNT/foobar
  $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar

  _crash_and_mount

  # After the fsync log is replayed, the file should have only 2 xattrs, the ones
  # named user.attr1 and user.attr3. The btrfs fsync log replay bug left the file
  # with the 3 xattrs that we had before deleting the second one and fsyncing the
  # file.
  echo "xattr names and values after first fsync log replay:"
  $GETFATTR_PROG --absolute-names --dump $SCRATCH_MNT/foobar | _filter_scratch

  # Now write some data to our file, fsync it, remove the first xattr, add a new
  # hard link to our file and commit the fsync log by fsyncing some other new
  # file. This is to verify that after log replay our first xattr does not exist
  # anymore.
  echo "hello world!" >> $SCRATCH_MNT/foobar
  $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar
  $SETFATTR_PROG -x user.attr1 $SCRATCH_MNT/foobar
  ln $SCRATCH_MNT/foobar $SCRATCH_MNT/foobar_link
  touch $SCRATCH_MNT/qwerty
  $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/qwerty

  _crash_and_mount

  # Now only the xattr with name user.attr3 should be set in our file.
  echo "xattr names and values after second fsync log replay:"
  $GETFATTR_PROG --absolute-names --dump $SCRATCH_MNT/foobar | _filter_scratch

  status=0
  exit

The expected golden output, which is produced with this patch applied or
when testing against xfs or ext3/4, is:

  xattr names and values after first fsync log replay:
  # file: SCRATCH_MNT/foobar
  user.attr1="val1"
  user.attr3="val3"

  xattr names and values after second fsync log replay:
  # file: SCRATCH_MNT/foobar
  user.attr3="val3"

Without this patch applied, the output is:

  xattr names and values after first fsync log replay:
  # file: SCRATCH_MNT/foobar
  user.attr1="val1"
  user.attr2="val2"
  user.attr3="val3"

  xattr names and values after second fsync log replay:
  # file: SCRATCH_MNT/foobar
  user.attr1="val1"
  user.attr2="val2"
  user.attr3="val3"

A patch with a test case for xfstests follows soon.

Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent 9b305632
Loading
Loading
Loading
Loading
+109 −14
Original line number Original line Diff line number Diff line
@@ -1951,6 +1951,104 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
	return ret;
	return ret;
}
}


static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct btrfs_root *log,
			      struct btrfs_path *path,
			      const u64 ino)
{
	struct btrfs_key search_key;
	struct btrfs_path *log_path;
	int i;
	int nritems;
	int ret;

	log_path = btrfs_alloc_path();
	if (!log_path)
		return -ENOMEM;

	search_key.objectid = ino;
	search_key.type = BTRFS_XATTR_ITEM_KEY;
	search_key.offset = 0;
again:
	ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
	if (ret < 0)
		goto out;
process_leaf:
	nritems = btrfs_header_nritems(path->nodes[0]);
	for (i = path->slots[0]; i < nritems; i++) {
		struct btrfs_key key;
		struct btrfs_dir_item *di;
		struct btrfs_dir_item *log_di;
		u32 total_size;
		u32 cur;

		btrfs_item_key_to_cpu(path->nodes[0], &key, i);
		if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
			ret = 0;
			goto out;
		}

		di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
		total_size = btrfs_item_size_nr(path->nodes[0], i);
		cur = 0;
		while (cur < total_size) {
			u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
			u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
			u32 this_len = sizeof(*di) + name_len + data_len;
			char *name;

			name = kmalloc(name_len, GFP_NOFS);
			if (!name) {
				ret = -ENOMEM;
				goto out;
			}
			read_extent_buffer(path->nodes[0], name,
					   (unsigned long)(di + 1), name_len);

			log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
						    name, name_len, 0);
			btrfs_release_path(log_path);
			if (!log_di) {
				/* Doesn't exist in log tree, so delete it. */
				btrfs_release_path(path);
				di = btrfs_lookup_xattr(trans, root, path, ino,
							name, name_len, -1);
				kfree(name);
				if (IS_ERR(di)) {
					ret = PTR_ERR(di);
					goto out;
				}
				ASSERT(di);
				ret = btrfs_delete_one_dir_name(trans, root,
								path, di);
				if (ret)
					goto out;
				btrfs_release_path(path);
				search_key = key;
				goto again;
			}
			kfree(name);
			if (IS_ERR(log_di)) {
				ret = PTR_ERR(log_di);
				goto out;
			}
			cur += this_len;
			di = (struct btrfs_dir_item *)((char *)di + this_len);
		}
	}
	ret = btrfs_next_leaf(root, path);
	if (ret > 0)
		ret = 0;
	else if (ret == 0)
		goto process_leaf;
out:
	btrfs_free_path(log_path);
	btrfs_release_path(path);
	return ret;
}


/*
/*
 * deletion replay happens before we copy any new directory items
 * deletion replay happens before we copy any new directory items
 * out of the log or out of backreferences from inodes.  It
 * out of the log or out of backreferences from inodes.  It
@@ -2104,6 +2202,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,


			inode_item = btrfs_item_ptr(eb, i,
			inode_item = btrfs_item_ptr(eb, i,
					    struct btrfs_inode_item);
					    struct btrfs_inode_item);
			ret = replay_xattr_deletes(wc->trans, root, log,
						   path, key.objectid);
			if (ret)
				break;
			mode = btrfs_inode_mode(eb, inode_item);
			mode = btrfs_inode_mode(eb, inode_item);
			if (S_ISDIR(mode)) {
			if (S_ISDIR(mode)) {
				ret = replay_dir_deletes(wc->trans,
				ret = replay_dir_deletes(wc->trans,
@@ -4072,10 +4174,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
	if (S_ISDIR(inode->i_mode)) {
	if (S_ISDIR(inode->i_mode)) {
		int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
		int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;


		if (inode_only == LOG_INODE_EXISTS) {
		if (inode_only == LOG_INODE_EXISTS)
			max_key_type = BTRFS_INODE_EXTREF_KEY;
			max_key_type = BTRFS_XATTR_ITEM_KEY;
			max_key.type = max_key_type;
		}
		ret = drop_objectid_items(trans, log, path, ino, max_key_type);
		ret = drop_objectid_items(trans, log, path, ino, max_key_type);
	} else {
	} else {
		if (inode_only == LOG_INODE_EXISTS) {
		if (inode_only == LOG_INODE_EXISTS) {
@@ -4100,7 +4200,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
		if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
		if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
			     &BTRFS_I(inode)->runtime_flags)) {
			     &BTRFS_I(inode)->runtime_flags)) {
			if (inode_only == LOG_INODE_EXISTS) {
			if (inode_only == LOG_INODE_EXISTS) {
				max_key.type = BTRFS_INODE_EXTREF_KEY;
				max_key.type = BTRFS_XATTR_ITEM_KEY;
				ret = drop_objectid_items(trans, log, path, ino,
				ret = drop_objectid_items(trans, log, path, ino,
							  max_key.type);
							  max_key.type);
			} else {
			} else {
@@ -4111,17 +4211,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
				ret = btrfs_truncate_inode_items(trans, log,
				ret = btrfs_truncate_inode_items(trans, log,
								 inode, 0, 0);
								 inode, 0, 0);
			}
			}
		} else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
		} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
					      &BTRFS_I(inode)->runtime_flags) ||
					      &BTRFS_I(inode)->runtime_flags) ||
			   inode_only == LOG_INODE_EXISTS) {
			   inode_only == LOG_INODE_EXISTS) {
			if (inode_only == LOG_INODE_ALL) {
			if (inode_only == LOG_INODE_ALL)
				clear_bit(BTRFS_INODE_COPY_EVERYTHING,
					  &BTRFS_I(inode)->runtime_flags);
				fast_search = true;
				fast_search = true;
			max_key.type = BTRFS_XATTR_ITEM_KEY;
			max_key.type = BTRFS_XATTR_ITEM_KEY;
			} else {
				max_key.type = BTRFS_INODE_EXTREF_KEY;
			}
			ret = drop_objectid_items(trans, log, path, ino,
			ret = drop_objectid_items(trans, log, path, ino,
						  max_key.type);
						  max_key.type);
		} else {
		} else {