Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 17d217fe authored by Yan Zheng's avatar Yan Zheng Committed by Chris Mason
Browse files

Btrfs: fix nodatasum handling in balancing code



Checksums on data can be disabled by mount option, so it's
possible some data extents don't have checksums or have
invalid checksums. This causes trouble for data relocation.
This patch contains following things to make data relocation
work.

1) make nodatasum/nodatacow mount option only affects new
files. Checksums and COW on data are only controlled by the
inode flags.

2) check the existence of checksum in the nodatacow checker.
If checksums exist, force COW the data extent. This ensure that
checksum for a given block is either valid or does not exist.

3) update data relocation code to properly handle the case
of checksum missing.

Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
parent e4404d6e
Loading
Loading
Loading
Loading
+3 −6
Original line number Diff line number Diff line
@@ -124,8 +124,7 @@ static int check_compressed_csum(struct inode *inode,
	u32 csum;
	u32 *cb_sum = &cb->sums;

	if (btrfs_test_opt(root, NODATASUM) ||
	    btrfs_test_flag(inode, NODATASUM))
	if (btrfs_test_flag(inode, NODATASUM))
		return 0;

	for (i = 0; i < cb->nr_pages; i++) {
@@ -671,8 +670,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
			 */
			atomic_inc(&cb->pending_bios);

			if (!btrfs_test_opt(root, NODATASUM) &&
			    !btrfs_test_flag(inode, NODATASUM)) {
			if (!btrfs_test_flag(inode, NODATASUM)) {
				btrfs_lookup_bio_sums(root, inode, comp_bio,
						      sums);
			}
@@ -699,8 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
	ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
	BUG_ON(ret);

	if (!btrfs_test_opt(root, NODATASUM) &&
	    !btrfs_test_flag(inode, NODATASUM)) {
	if (!btrfs_test_flag(inode, NODATASUM)) {
		btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
	}

+4 −1
Original line number Diff line number Diff line
@@ -1702,7 +1702,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
			struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, u64 bytenr);
			  struct btrfs_root *root, u64 objectid, u64 bytenr);
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root);
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
@@ -1789,6 +1789,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
			       struct extent_buffer *buf, u64 orig_start);
int btrfs_add_dead_reloc_root(struct btrfs_root *root);
int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
/* ctree.c */
int btrfs_previous_item(struct btrfs_root *root,
@@ -1994,6 +1995,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
			struct btrfs_root *root, struct btrfs_path *path,
			u64 isize);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start,
			     u64 end, struct list_head *list);
/* inode.c */

/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
+46 −4
Original line number Diff line number Diff line
@@ -1359,7 +1359,7 @@ int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
}

int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, u64 bytenr)
			  struct btrfs_root *root, u64 objectid, u64 bytenr)
{
	struct btrfs_root *extent_root = root->fs_info->extent_root;
	struct btrfs_path *path;
@@ -1418,8 +1418,9 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
		ref_item = btrfs_item_ptr(leaf, path->slots[0],
					  struct btrfs_extent_ref);
		ref_root = btrfs_ref_root(leaf, ref_item);
		if (ref_root != root->root_key.objectid &&
		    ref_root != BTRFS_TREE_LOG_OBJECTID) {
		if ((ref_root != root->root_key.objectid &&
		     ref_root != BTRFS_TREE_LOG_OBJECTID) ||
		     objectid != btrfs_ref_objectid(leaf, ref_item)) {
			ret = 1;
			goto out;
		}
@@ -5367,7 +5368,6 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root,
				if (ret)
					goto out;
			}
			btrfs_record_root_in_trans(found_root);
			ret = replace_one_extent(trans, found_root,
						path, extent_key,
						&first_key, ref_path,
@@ -5534,6 +5534,7 @@ static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
	} else {
		BUG_ON(1);
	}
	BTRFS_I(inode)->index_cnt = group->key.objectid;

	err = btrfs_orphan_add(trans, inode);
out:
@@ -5546,6 +5547,47 @@ static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
	return inode;
}

int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
{

	struct btrfs_ordered_sum *sums;
	struct btrfs_sector_sum *sector_sum;
	struct btrfs_ordered_extent *ordered;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct list_head list;
	size_t offset;
	int ret;
	u64 disk_bytenr;

	INIT_LIST_HEAD(&list);

	ordered = btrfs_lookup_ordered_extent(inode, file_pos);
	BUG_ON(ordered->file_offset != file_pos || ordered->len != len);

	disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
	ret = btrfs_lookup_csums_range(root, disk_bytenr,
				       disk_bytenr + len - 1, &list);

	while (!list_empty(&list)) {
		sums = list_entry(list.next, struct btrfs_ordered_sum, list);
		list_del_init(&sums->list);

		sector_sum = sums->sums;
		sums->bytenr = ordered->start;

		offset = 0;
		while (offset < sums->len) {
			sector_sum->bytenr += ordered->start - disk_bytenr;
			sector_sum++;
			offset += root->sectorsize;
		}

		btrfs_add_ordered_sum(inode, ordered, sums);
	}
	btrfs_put_ordered_extent(ordered);
	return 0;
}

int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
{
	struct btrfs_trans_handle *trans;
+1 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
#define EXTENT_ORDERED (1 << 9)
#define EXTENT_ORDERED_METADATA (1 << 10)
#define EXTENT_BOUNDARY (1 << 11)
#define EXTENT_NODATASUM (1 << 12)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)

/* flags for bio submission */
+111 −3
Original line number Diff line number Diff line
@@ -140,6 +140,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
	return ret;
}


int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
			  struct bio *bio, u32 *dst)
{
@@ -185,9 +186,16 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
				if (ret == -ENOENT || ret == -EFBIG)
					ret = 0;
				sum = 0;
				printk("no csum found for inode %lu start "
				       "%llu\n", inode->i_ino,
				if (BTRFS_I(inode)->root->root_key.objectid ==
				    BTRFS_DATA_RELOC_TREE_OBJECTID) {
					set_extent_bits(io_tree, offset,
						offset + bvec->bv_len - 1,
						EXTENT_NODATASUM, GFP_NOFS);
				} else {
					printk("no csum found for inode %lu "
					       "start %llu\n", inode->i_ino,
					       (unsigned long long)offset);
				}
				item = NULL;
				btrfs_release_path(root, path);
				goto found;
@@ -228,6 +236,106 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
	return 0;
}

int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
			     struct list_head *list)
{
	struct btrfs_key key;
	struct btrfs_path *path;
	struct extent_buffer *leaf;
	struct btrfs_ordered_sum *sums;
	struct btrfs_sector_sum *sector_sum;
	struct btrfs_csum_item *item;
	unsigned long offset;
	int ret;
	size_t size;
	u64 csum_end;
	u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);

	path = btrfs_alloc_path();
	BUG_ON(!path);

	key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
	key.offset = start;
	key.type = BTRFS_EXTENT_CSUM_KEY;

	ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
				&key, path, 0, 0);
	if (ret < 0)
		goto fail;
	if (ret > 0 && path->slots[0] > 0) {
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
		if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
		    key.type == BTRFS_EXTENT_CSUM_KEY) {
			offset = (start - key.offset) >>
				 root->fs_info->sb->s_blocksize_bits;
			if (offset * csum_size <
			    btrfs_item_size_nr(leaf, path->slots[0] - 1))
				path->slots[0]--;
		}
	}

	while (start <= end) {
		leaf = path->nodes[0];
		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
			ret = btrfs_next_leaf(root->fs_info->csum_root, path);
			if (ret < 0)
				goto fail;
			if (ret > 0)
				break;
			leaf = path->nodes[0];
		}

		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
		    key.type != BTRFS_EXTENT_CSUM_KEY)
			break;

		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		if (key.offset > end)
			break;

		if (key.offset > start)
			start = key.offset;

		size = btrfs_item_size_nr(leaf, path->slots[0]);
		csum_end = key.offset + (size / csum_size) * root->sectorsize;

		size = min(csum_end, end + 1) - start;
		sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS);
		BUG_ON(!sums);

		sector_sum = sums->sums;
		sums->bytenr = start;
		sums->len = size;

		offset = (start - key.offset) >>
			 root->fs_info->sb->s_blocksize_bits;
		offset *= csum_size;

		item = btrfs_item_ptr(path->nodes[0], path->slots[0],
				      struct btrfs_csum_item);
		while (size > 0) {
			read_extent_buffer(path->nodes[0], &sector_sum->sum,
					   ((unsigned long)item) + offset,
					   csum_size);
			sector_sum->bytenr = start;

			size -= root->sectorsize;
			start += root->sectorsize;
			offset += csum_size;
			sector_sum++;
		}
		list_add_tail(&sums->list, list);

		path->slots[0]++;
	}
	ret = 0;
fail:
	btrfs_free_path(path);
	return ret;
}

int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
		       struct bio *bio, u64 file_start, int contig)
{
Loading