Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1259ab75 authored by Chris Mason's avatar Chris Mason
Browse files

Btrfs: Handle write errors on raid1 and raid10



When duplicate copies exist, writes are allowed to fail to one of those
copies.  This changeset includes a few changes that allow the FS to
continue even when some IOs fail.

It also adds verification of the parent generation number for btree blocks.
This generation is stored in the pointer to a block, and it ensures
that missed writes to are detected.

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent ca7a79ad
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -379,7 +379,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,

		cur = btrfs_find_tree_block(root, blocknr, blocksize);
		if (cur)
			uptodate = btrfs_buffer_uptodate(cur);
			uptodate = btrfs_buffer_uptodate(cur, gen);
		else
			uptodate = 0;
		if (!cur || !uptodate) {
+51 −5
Original line number Diff line number Diff line
@@ -205,6 +205,33 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
	return 0;
}

static int verify_parent_transid(struct extent_io_tree *io_tree,
				 struct extent_buffer *eb, u64 parent_transid)
{
	int ret;

	if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
		return 0;

	lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS);
	if (extent_buffer_uptodate(io_tree, eb) &&
	    btrfs_header_generation(eb) == parent_transid) {
		ret = 0;
		goto out;
	}
	printk("parent transid verify failed on %llu wanted %llu found %llu\n",
	       (unsigned long long)eb->start,
	       (unsigned long long)parent_transid,
	       (unsigned long long)btrfs_header_generation(eb));
	ret = 1;
out:
	clear_extent_buffer_uptodate(io_tree, eb);
	unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
		      GFP_NOFS);
	return ret;

}

static int btree_read_extent_buffer_pages(struct btrfs_root *root,
					  struct extent_buffer *eb,
					  u64 start, u64 parent_transid)
@@ -218,7 +245,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
	while (1) {
		ret = read_extent_buffer_pages(io_tree, eb, start, 1,
					       btree_get_extent, mirror_num);
		if (!ret)
		if (!ret &&
		    !verify_parent_transid(io_tree, eb, parent_transid))
			return ret;

		num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
@@ -330,6 +358,13 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
		ret = -EIO;
		goto err;
	}
	if (memcmp_extent_buffer(eb, root->fs_info->fsid,
				 (unsigned long)btrfs_header_fsid(eb),
				 BTRFS_FSID_SIZE)) {
		printk("bad fsid on block %Lu\n", eb->start);
		ret = -EIO;
		goto err;
	}
	found_level = btrfs_header_level(eb);

	ret = csum_tree_block(root, eb, 1);
@@ -1363,7 +1398,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
					"I/O error on %s\n",
				       bdevname(bh->b_bdev, b));
		}
		set_buffer_write_io_error(bh);
		/* note, we dont' set_buffer_write_io_error because we have
		 * our own ways of dealing with the IO errors
		 */
		clear_buffer_uptodate(bh);
	}
	unlock_buffer(bh);
@@ -1459,7 +1496,8 @@ int write_all_supers(struct btrfs_root *root)
				ret = submit_bh(WRITE, bh);
				BUG_ON(ret);
				wait_on_buffer(bh);
				BUG_ON(!buffer_uptodate(bh));
				if (!buffer_uptodate(bh))
					total_errors++;
			} else {
				total_errors++;
			}
@@ -1607,10 +1645,18 @@ int close_ctree(struct btrfs_root *root)
	return 0;
}

int btrfs_buffer_uptodate(struct extent_buffer *buf)
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
{
	int ret;
	struct inode *btree_inode = buf->first_page->mapping->host;
	return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf);

	ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf);
	if (!ret)
		return ret;

	ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf,
				    parent_transid);
	return !ret;
}

int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
+1 −1
Original line number Diff line number Diff line
@@ -56,7 +56,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root,
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
int wait_on_tree_block_writeback(struct btrfs_root *root,
				 struct extent_buffer *buf);
+2 −2
Original line number Diff line number Diff line
@@ -1366,7 +1366,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
	if (!pending) {
		buf = btrfs_find_tree_block(root, bytenr, num_bytes);
		if (buf) {
			if (btrfs_buffer_uptodate(buf)) {
			if (btrfs_buffer_uptodate(buf, 0)) {
				u64 transid =
				    root->fs_info->running_transaction->transid;
				u64 header_transid =
@@ -2151,7 +2151,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
			continue;
		}
		next = btrfs_find_tree_block(root, bytenr, blocksize);
		if (!next || !btrfs_buffer_uptodate(next)) {
		if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
			free_extent_buffer(next);
			reada_walk_down(root, cur, path->slots[*level]);

+41 −8
Original line number Diff line number Diff line
@@ -1366,7 +1366,7 @@ static int end_bio_extent_writepage(struct bio *bio,
				   unsigned int bytes_done, int err)
#endif
{
	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
	int uptodate = err == 0;
	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
	struct extent_state *state = bio->bi_private;
	struct extent_io_tree *tree = state->tree;
@@ -1375,6 +1375,7 @@ static int end_bio_extent_writepage(struct bio *bio,
	u64 end;
	u64 cur;
	int whole_page;
	int ret;
	unsigned long flags;

#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
@@ -1395,17 +1396,30 @@ static int end_bio_extent_writepage(struct bio *bio,
		if (--bvec >= bio->bi_io_vec)
			prefetchw(&bvec->bv_page->flags);

		if (tree->ops && tree->ops->writepage_end_io_hook) {
			ret = tree->ops->writepage_end_io_hook(page, start,
						       end, state);
			if (ret)
				uptodate = 0;
		}

		if (!uptodate && tree->ops &&
		    tree->ops->writepage_io_failed_hook) {
			ret = tree->ops->writepage_io_failed_hook(bio, page,
							 start, end, state);
			if (ret == 0) {
				state = NULL;
				uptodate = (err == 0);
				continue;
			}
		}

		if (!uptodate) {
			clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
			ClearPageUptodate(page);
			SetPageError(page);
		}

		if (tree->ops && tree->ops->writepage_end_io_hook) {
			tree->ops->writepage_end_io_hook(page, start, end,
							 state);
		}

		/*
		 * bios can get merged in funny ways, and so we need to
		 * be careful with the state variable.  We know the
@@ -2073,9 +2087,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
		} else {
			ret = 0;
		}
		if (ret)
		if (ret) {
			SetPageError(page);
		else {
		} else {
			unsigned long max_nr = end_index + 1;
			set_range_writeback(tree, cur, cur + iosize - 1);
			if (!PageWriteback(page)) {
@@ -2948,6 +2962,25 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
}
EXPORT_SYMBOL(set_extent_buffer_dirty);

int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
				struct extent_buffer *eb)
{
	unsigned long i;
	struct page *page;
	unsigned long num_pages;

	num_pages = num_extent_pages(eb->start, eb->len);
	eb->flags &= ~EXTENT_UPTODATE;

	clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
			      GFP_NOFS);
	for (i = 0; i < num_pages; i++) {
		page = extent_buffer_page(eb, i);
		ClearPageUptodate(page);
	}
	return 0;
}

int set_extent_buffer_uptodate(struct extent_io_tree *tree,
				struct extent_buffer *eb)
{
Loading