Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a512bbf8 authored by Yan Zheng's avatar Yan Zheng Committed by Chris Mason
Browse files

Btrfs: superblock duplication



This patch implements superblock duplication. Superblocks
are stored at offset 16K, 64M and 256G on every devices.
Spaces used by superblocks are preserved by the allocator,
which uses a reverse mapping function to find the logical
addresses that correspond to superblocks. Thank you,

Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
parent d20f7043
Loading
Loading
Loading
Loading
+143 −65
Original line number Diff line number Diff line
@@ -1595,8 +1595,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
		     fs_info, BTRFS_ROOT_TREE_OBJECTID);


	bh = __bread(fs_devices->latest_bdev,
		     BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
	bh = btrfs_read_dev_super(fs_devices->latest_bdev);
	if (!bh)
		goto fail_iput;

@@ -1710,7 +1709,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	}

	mutex_lock(&fs_info->chunk_mutex);
	ret = btrfs_read_sys_array(tree_root);
	ret = btrfs_read_sys_array(tree_root, btrfs_super_bytenr(disk_super));
	mutex_unlock(&fs_info->chunk_mutex);
	if (ret) {
		printk("btrfs: failed to read the system array on %s\n",
@@ -1905,19 +1904,147 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
	put_bh(bh);
}

static int write_all_supers(struct btrfs_root *root)
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
{
	struct buffer_head *bh;
	struct buffer_head *latest = NULL;
	struct btrfs_super_block *super;
	int i;
	u64 transid = 0;
	u64 bytenr;

	/* we would like to check all the supers, but that would make
	 * a btrfs mount succeed after a mkfs from a different FS.
	 * So, we need to add a special mount option to scan for
	 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
	 */
	for (i = 0; i < 1; i++) {
		bytenr = btrfs_sb_offset(i);
		if (bytenr + 4096 >= i_size_read(bdev->bd_inode))
			break;
		bh = __bread(bdev, bytenr / 4096, 4096);
		if (!bh)
			continue;

		super = (struct btrfs_super_block *)bh->b_data;
		if (btrfs_super_bytenr(super) != bytenr ||
		    strncmp((char *)(&super->magic), BTRFS_MAGIC,
			    sizeof(super->magic))) {
			brelse(bh);
			continue;
		}

		if (!latest || btrfs_super_generation(super) > transid) {
			brelse(latest);
			latest = bh;
			transid = btrfs_super_generation(super);
		} else {
			brelse(bh);
		}
	}
	return latest;
}

static int write_dev_supers(struct btrfs_device *device,
			    struct btrfs_super_block *sb,
			    int do_barriers, int wait, int max_mirrors)
{
	struct buffer_head *bh;
	int i;
	int ret;
	int errors = 0;
	u32 crc;
	u64 bytenr;
	int last_barrier = 0;

	if (max_mirrors == 0)
		max_mirrors = BTRFS_SUPER_MIRROR_MAX;

	/* make sure only the last submit_bh does a barrier */
	if (do_barriers) {
		for (i = 0; i < max_mirrors; i++) {
			bytenr = btrfs_sb_offset(i);
			if (bytenr + BTRFS_SUPER_INFO_SIZE >=
			    device->total_bytes)
				break;
			last_barrier = i;
		}
	}

	for (i = 0; i < max_mirrors; i++) {
		bytenr = btrfs_sb_offset(i);
		if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
			break;

		if (wait) {
			bh = __find_get_block(device->bdev, bytenr / 4096,
					      BTRFS_SUPER_INFO_SIZE);
			BUG_ON(!bh);
			brelse(bh);
			wait_on_buffer(bh);
			if (buffer_uptodate(bh)) {
				brelse(bh);
				continue;
			}
		} else {
			btrfs_set_super_bytenr(sb, bytenr);

			crc = ~(u32)0;
			crc = btrfs_csum_data(NULL, (char *)sb +
					      BTRFS_CSUM_SIZE, crc,
					      BTRFS_SUPER_INFO_SIZE -
					      BTRFS_CSUM_SIZE);
			btrfs_csum_final(crc, sb->csum);

			bh = __getblk(device->bdev, bytenr / 4096,
				      BTRFS_SUPER_INFO_SIZE);
			memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);

			set_buffer_uptodate(bh);
			get_bh(bh);
			lock_buffer(bh);
			bh->b_end_io = btrfs_end_buffer_write_sync;
		}

		if (i == last_barrier && do_barriers && device->barriers) {
			ret = submit_bh(WRITE_BARRIER, bh);
			if (ret == -EOPNOTSUPP) {
				printk("btrfs: disabling barriers on dev %s\n",
				       device->name);
				set_buffer_uptodate(bh);
				device->barriers = 0;
				get_bh(bh);
				lock_buffer(bh);
				ret = submit_bh(WRITE, bh);
			}
		} else {
			ret = submit_bh(WRITE, bh);
		}

		if (!ret && wait) {
			wait_on_buffer(bh);
			if (!buffer_uptodate(bh))
				errors++;
		} else if (ret) {
			errors++;
		}
		if (wait)
			brelse(bh);
	}
	return errors < i ? 0 : -1;
}

int write_all_supers(struct btrfs_root *root, int max_mirrors)
{
	struct list_head *cur;
	struct list_head *head = &root->fs_info->fs_devices->devices;
	struct btrfs_device *dev;
	struct btrfs_super_block *sb;
	struct btrfs_dev_item *dev_item;
	struct buffer_head *bh;
	int ret;
	int do_barriers;
	int max_errors;
	int total_errors = 0;
	u32 crc;
	u64 flags;

	max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
@@ -1944,40 +2071,11 @@ static int write_all_supers(struct btrfs_root *root)
		btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
		memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
		memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);

		flags = btrfs_super_flags(sb);
		btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);


		crc = ~(u32)0;
		crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc,
				      BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
		btrfs_csum_final(crc, sb->csum);

		bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096,
			      BTRFS_SUPER_INFO_SIZE);

		memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
		dev->pending_io = bh;

		get_bh(bh);
		set_buffer_uptodate(bh);
		lock_buffer(bh);
		bh->b_end_io = btrfs_end_buffer_write_sync;

		if (do_barriers && dev->barriers) {
			ret = submit_bh(WRITE_BARRIER, bh);
			if (ret == -EOPNOTSUPP) {
				printk("btrfs: disabling barriers on dev %s\n",
				       dev->name);
				set_buffer_uptodate(bh);
				dev->barriers = 0;
				get_bh(bh);
				lock_buffer(bh);
				ret = submit_bh(WRITE, bh);
			}
		} else {
			ret = submit_bh(WRITE, bh);
		}
		ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors);
		if (ret)
			total_errors++;
	}
@@ -1985,8 +2083,8 @@ static int write_all_supers(struct btrfs_root *root)
		printk("btrfs: %d errors while writing supers\n", total_errors);
		BUG();
	}
	total_errors = 0;

	total_errors = 0;
	list_for_each(cur, head) {
		dev = list_entry(cur, struct btrfs_device, dev_list);
		if (!dev->bdev)
@@ -1994,30 +2092,10 @@ static int write_all_supers(struct btrfs_root *root)
		if (!dev->in_fs_metadata || !dev->writeable)
			continue;

		BUG_ON(!dev->pending_io);
		bh = dev->pending_io;
		wait_on_buffer(bh);
		if (!buffer_uptodate(dev->pending_io)) {
			if (do_barriers && dev->barriers) {
				printk("btrfs: disabling barriers on dev %s\n",
				       dev->name);
				set_buffer_uptodate(bh);
				get_bh(bh);
				lock_buffer(bh);
				dev->barriers = 0;
				ret = submit_bh(WRITE, bh);
				BUG_ON(ret);
				wait_on_buffer(bh);
				if (!buffer_uptodate(bh))
					total_errors++;
			} else {
		ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
		if (ret)
			total_errors++;
	}

		}
		dev->pending_io = NULL;
		brelse(bh);
	}
	if (total_errors > max_errors) {
		printk("btrfs: %d errors while writing supers\n", total_errors);
		BUG();
@@ -2025,12 +2103,12 @@ static int write_all_supers(struct btrfs_root *root)
	return 0;
}

int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
		      *root)
int write_ctree_super(struct btrfs_trans_handle *trans,
		      struct btrfs_root *root, int max_mirrors)
{
	int ret;

	ret = write_all_supers(root);
	ret = write_all_supers(root, max_mirrors);
	return ret;
}

@@ -2116,7 +2194,7 @@ int btrfs_commit_super(struct btrfs_root *root)
	ret = btrfs_write_and_wait_transaction(NULL, root);
	BUG_ON(ret);

	ret = write_ctree_super(NULL, root);
	ret = write_ctree_super(NULL, root, 0);
	return ret;
}

+15 −2
Original line number Diff line number Diff line
@@ -19,8 +19,20 @@
#ifndef __DISKIO__
#define __DISKIO__

#define BTRFS_SUPER_INFO_OFFSET (16 * 1024)
#define BTRFS_SUPER_INFO_OFFSET (64 * 1024)
#define BTRFS_SUPER_INFO_SIZE 4096

#define BTRFS_SUPER_MIRROR_MAX	 3
#define BTRFS_SUPER_MIRROR_SHIFT 12

static inline u64 btrfs_sb_offset(int mirror)
{
	u64 start = 16 * 1024;
	if (mirror)
		return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
	return BTRFS_SUPER_INFO_OFFSET;
}

struct btrfs_device;
struct btrfs_fs_devices;

@@ -37,7 +49,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
			      char *options);
int close_ctree(struct btrfs_root *root);
int write_ctree_super(struct btrfs_trans_handle *trans,
		      struct btrfs_root *root);
		      struct btrfs_root *root, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
					    u64 bytenr, u32 blocksize);
+28 −26
Original line number Diff line number Diff line
@@ -189,6 +189,29 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
	return 0;
}

static int remove_sb_from_cache(struct btrfs_root *root,
				struct btrfs_block_group_cache *cache)
{
	u64 bytenr;
	u64 *logical;
	int stripe_len;
	int i, nr, ret;

	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
		bytenr = btrfs_sb_offset(i);
		ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
				       cache->key.objectid, bytenr, 0,
				       &logical, &nr, &stripe_len);
		BUG_ON(ret);
		while (nr--) {
			btrfs_remove_free_space(cache, logical[nr],
						stripe_len);
		}
		kfree(logical);
	}
	return 0;
}

static int cache_block_group(struct btrfs_root *root,
			     struct btrfs_block_group_cache *block_group)
{
@@ -197,9 +220,7 @@ static int cache_block_group(struct btrfs_root *root,
	struct btrfs_key key;
	struct extent_buffer *leaf;
	int slot;
	u64 last = 0;
	u64 first_free;
	int found = 0;
	u64 last = block_group->key.objectid;

	if (!block_group)
		return 0;
@@ -220,23 +241,13 @@ static int cache_block_group(struct btrfs_root *root,
	 * skip the locking here
	 */
	path->skip_locking = 1;
	first_free = max_t(u64, block_group->key.objectid,
			   BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
	key.objectid = block_group->key.objectid;
	key.objectid = last;
	key.offset = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0)
		goto err;
	ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
	if (ret < 0)
		goto err;
	if (ret == 0) {
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		if (key.objectid + key.offset > first_free)
			first_free = key.objectid + key.offset;
	}

	while(1) {
		leaf = path->nodes[0];
		slot = path->slots[0];
@@ -258,11 +269,6 @@ static int cache_block_group(struct btrfs_root *root,
			break;

		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
			if (!found) {
				last = first_free;
				found = 1;
			}

			add_new_free_space(block_group, root->fs_info, last,
					   key.objectid);

@@ -272,13 +278,11 @@ static int cache_block_group(struct btrfs_root *root,
		path->slots[0]++;
	}

	if (!found)
		last = first_free;

	add_new_free_space(block_group, root->fs_info, last,
			   block_group->key.objectid +
			   block_group->key.offset);

	remove_sb_from_cache(root, block_group);
	block_group->cached = 1;
	ret = 0;
err:
@@ -1974,10 +1978,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
		if (alloc) {
			old_val += num_bytes;
			cache->space_info->bytes_used += num_bytes;
			if (cache->ro) {
			if (cache->ro)
				cache->space_info->bytes_readonly -= num_bytes;
				WARN_ON(1);
			}
			btrfs_set_block_group_used(&cache->item, old_val);
			spin_unlock(&cache->lock);
			spin_unlock(&cache->space_info->lock);
+0 −1
Original line number Diff line number Diff line
@@ -290,7 +290,6 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
			ret = -EINVAL;
			goto out;
		}

		unlink_free_space(block_group, info);

		if (info->bytes == bytes) {
+1 −1
Original line number Diff line number Diff line
@@ -1038,7 +1038,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
	mutex_unlock(&root->fs_info->trans_mutex);
	ret = btrfs_write_and_wait_transaction(trans, root);
	BUG_ON(ret);
	write_ctree_super(trans, root);
	write_ctree_super(trans, root, 0);

	/*
	 * the super is written, we can safely allow the tree-loggers
Loading