Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e4404d6e authored by Yan Zheng's avatar Yan Zheng Committed by Chris Mason
Browse files

Btrfs: shared seed device



This patch makes seed device possible to be shared by
multiple mounted file systems. The sharing is achieved
by cloning seed device's btrfs_fs_devices structure.
Thanks you,

Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
parent d2fb3437
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1711,7 +1711,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	}

	mutex_lock(&fs_info->chunk_mutex);
	ret = btrfs_read_sys_array(tree_root, btrfs_super_bytenr(disk_super));
	ret = btrfs_read_sys_array(tree_root);
	mutex_unlock(&fs_info->chunk_mutex);
	if (ret) {
		printk("btrfs: failed to read the system array on %s\n",
+18 −19
Original line number Diff line number Diff line
@@ -218,7 +218,7 @@ static int cache_block_group(struct btrfs_root *root,
	struct btrfs_key key;
	struct extent_buffer *leaf;
	int slot;
	u64 last = block_group->key.objectid;
	u64 last;

	if (!block_group)
		return 0;
@@ -239,7 +239,8 @@ static int cache_block_group(struct btrfs_root *root,
	 * skip the locking here
	 */
	path->skip_locking = 1;
	key.objectid = max_t(u64, last, BTRFS_SUPER_INFO_OFFSET);
	last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
	key.objectid = last;
	key.offset = 0;
	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -5335,8 +5336,20 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root,
			prev_block = block_start;
		}

		if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
		    pass >= 2) {
		btrfs_record_root_in_trans(found_root);
		if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
			/*
			 * try to update data extent references while
			 * keeping metadata shared between snapshots.
			 */
			if (pass == 1) {
				ret = relocate_one_path(trans, found_root,
						path, &first_key, ref_path,
						group, reloc_inode);
				if (ret < 0)
					goto out;
				continue;
			}
			/*
			 * use fallback method to process the remaining
			 * references.
@@ -5359,23 +5372,9 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root,
						path, extent_key,
						&first_key, ref_path,
						new_extents, nr_extents);
			if (ret < 0)
				goto out;
			continue;
		}

		btrfs_record_root_in_trans(found_root);
		if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
		} else {
			ret = relocate_tree_block(trans, found_root, path,
						  &first_key, ref_path);
		} else {
			/*
			 * try to update data extent references while
			 * keeping metadata shared between snapshots.
			 */
			ret = relocate_one_path(trans, found_root, path,
						&first_key, ref_path,
						group, reloc_inode);
		}
		if (ret < 0)
			goto out;
+5 −3
Original line number Diff line number Diff line
@@ -58,14 +58,15 @@ static struct super_operations btrfs_super_ops;
static void btrfs_put_super (struct super_block * sb)
{
	struct btrfs_root *root = btrfs_sb(sb);
	struct btrfs_fs_info *fs = root->fs_info;
	int ret;

	ret = close_ctree(root);
	if (ret) {
		printk("close ctree returns %d\n", ret);
	}
	btrfs_sysfs_del_super(fs);
#if 0
	btrfs_sysfs_del_super(root->fs_info);
#endif
	sb->s_fs_info = NULL;
}

@@ -349,11 +350,12 @@ static int btrfs_fill_super(struct super_block * sb,
		err = -ENOMEM;
		goto fail_close;
	}

#if 0
	/* this does the super kobj at the same time */
	err = btrfs_sysfs_add_super(tree_root->fs_info);
	if (err)
		goto fail_close;
#endif

	sb->s_root = root_dentry;

+131 −109
Original line number Diff line number Diff line
@@ -47,7 +47,6 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
				struct btrfs_device *device);
static int btrfs_relocate_sys_chunks(struct btrfs_root *root);


#define map_lookup_size(n) (sizeof(struct map_lookup) + \
			    (sizeof(struct btrfs_bio_stripe) * (n)))

@@ -74,34 +73,29 @@ static void unlock_chunks(struct btrfs_root *root)
	mutex_unlock(&root->fs_info->chunk_mutex);
}

static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
{
	struct btrfs_device *device;
	WARN_ON(fs_devices->opened);
	while (!list_empty(&fs_devices->devices)) {
		device = list_entry(fs_devices->devices.next,
				    struct btrfs_device, dev_list);
		list_del(&device->dev_list);
		kfree(device->name);
		kfree(device);
	}
	kfree(fs_devices);
}

int btrfs_cleanup_fs_uuids(void)
{
	struct btrfs_fs_devices *fs_devices;
	struct btrfs_device *dev;

	while (!list_empty(&fs_uuids)) {
		fs_devices = list_entry(fs_uuids.next,
					struct btrfs_fs_devices, list);
		list_del(&fs_devices->list);
		while(!list_empty(&fs_devices->devices)) {
			dev = list_entry(fs_devices->devices.next,
					 struct btrfs_device, dev_list);
			if (dev->bdev) {
				close_bdev_exclusive(dev->bdev, dev->mode);
				fs_devices->open_devices--;
			}
			fs_devices->num_devices--;
			if (dev->writeable)
				fs_devices->rw_devices--;
			list_del(&dev->dev_list);
			list_del(&dev->dev_alloc_list);
			kfree(dev->name);
			kfree(dev);
		}
		WARN_ON(fs_devices->num_devices);
		WARN_ON(fs_devices->open_devices);
		WARN_ON(fs_devices->rw_devices);
		kfree(fs_devices);
		free_fs_devices(fs_devices);
	}
	return 0;
}
@@ -304,12 +298,55 @@ static noinline int device_list_add(const char *path,
	return 0;
}

static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
{
	struct btrfs_fs_devices *fs_devices;
	struct btrfs_device *device;
	struct btrfs_device *orig_dev;

	fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
	if (!fs_devices)
		return ERR_PTR(-ENOMEM);

	INIT_LIST_HEAD(&fs_devices->devices);
	INIT_LIST_HEAD(&fs_devices->alloc_list);
	INIT_LIST_HEAD(&fs_devices->list);
	fs_devices->latest_devid = orig->latest_devid;
	fs_devices->latest_trans = orig->latest_trans;
	memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));

	list_for_each_entry(orig_dev, &orig->devices, dev_list) {
		device = kzalloc(sizeof(*device), GFP_NOFS);
		if (!device)
			goto error;

		device->name = kstrdup(orig_dev->name, GFP_NOFS);
		if (!device->name)
			goto error;

		device->devid = orig_dev->devid;
		device->work.func = pending_bios_fn;
		memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
		device->barriers = 1;
		spin_lock_init(&device->io_lock);
		INIT_LIST_HEAD(&device->dev_list);
		INIT_LIST_HEAD(&device->dev_alloc_list);

		list_add(&device->dev_list, &fs_devices->devices);
		device->fs_devices = fs_devices;
		fs_devices->num_devices++;
	}
	return fs_devices;
error:
	free_fs_devices(fs_devices);
	return ERR_PTR(-ENOMEM);
}

int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
{
	struct list_head *tmp;
	struct list_head *cur;
	struct btrfs_device *device;
	int seed_devices = 0;

	mutex_lock(&uuid_mutex);
again:
@@ -328,17 +365,14 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
			device->writeable = 0;
			fs_devices->rw_devices--;
		}
		if (!seed_devices) {
		list_del_init(&device->dev_list);
		fs_devices->num_devices--;
		kfree(device->name);
		kfree(device);
	}
	}

	if (fs_devices->seed) {
		fs_devices = fs_devices->seed;
		seed_devices = 1;
		goto again;
	}

@@ -348,10 +382,9 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)

static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
{
	struct btrfs_fs_devices *seed_devices;
	struct list_head *cur;
	struct btrfs_device *device;
again:

	if (--fs_devices->opened > 0)
		return 0;

@@ -370,30 +403,37 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
		device->writeable = 0;
		device->in_fs_metadata = 0;
	}
	WARN_ON(fs_devices->open_devices);
	WARN_ON(fs_devices->rw_devices);
	fs_devices->opened = 0;
	fs_devices->seeding = 0;
	fs_devices->sprouted = 0;

	seed_devices = fs_devices->seed;
	fs_devices->seed = NULL;
	if (seed_devices) {
		fs_devices = seed_devices;
		goto again;
	}
	return 0;
}

int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
{
	struct btrfs_fs_devices *seed_devices = NULL;
	int ret;

	mutex_lock(&uuid_mutex);
	ret = __btrfs_close_devices(fs_devices);
	if (!fs_devices->opened) {
		seed_devices = fs_devices->seed;
		fs_devices->seed = NULL;
	}
	mutex_unlock(&uuid_mutex);

	while (seed_devices) {
		fs_devices = seed_devices;
		seed_devices = fs_devices->seed;
		__btrfs_close_devices(fs_devices);
		free_fs_devices(fs_devices);
	}
	return ret;
}

int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
				fmode_t flags, void *holder)
{
	struct block_device *bdev;
@@ -490,12 +530,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,

	mutex_lock(&uuid_mutex);
	if (fs_devices->opened) {
		if (fs_devices->sprouted) {
			ret = -EBUSY;
		} else {
		fs_devices->opened++;
		ret = 0;
		}
	} else {
		ret = __btrfs_open_devices(fs_devices, flags, holder);
	}
@@ -1043,12 +1079,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
		goto error_brelse;

	device->in_fs_metadata = 0;
	if (device->fs_devices == root->fs_info->fs_devices) {
	list_del_init(&device->dev_list);
		root->fs_info->fs_devices->num_devices--;
		if (device->bdev)
			device->fs_devices->open_devices--;
	}
	device->fs_devices->num_devices--;

	next_device = list_entry(root->fs_info->fs_devices->devices.next,
				 struct btrfs_device, dev_list);
@@ -1057,20 +1089,15 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
	if (device->bdev == root->fs_info->fs_devices->latest_bdev)
		root->fs_info->fs_devices->latest_bdev = next_device->bdev;

	num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
	btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);

	if (device->fs_devices != root->fs_info->fs_devices) {
		BUG_ON(device->writeable);
		brelse(bh);
		if (bdev)
			close_bdev_exclusive(bdev, FMODE_READ);

	if (device->bdev) {
		close_bdev_exclusive(device->bdev, device->mode);
		device->bdev = NULL;
		device->fs_devices->open_devices--;
	}

	num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
	btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);

	if (device->fs_devices->open_devices == 0) {
		struct btrfs_fs_devices *fs_devices;
		fs_devices = root->fs_info->fs_devices;
@@ -1082,9 +1109,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
		fs_devices->seed = device->fs_devices->seed;
		device->fs_devices->seed = NULL;
		__btrfs_close_devices(device->fs_devices);
		}
		ret = 0;
		goto out;
		free_fs_devices(device->fs_devices);
	}

	/*
@@ -1099,20 +1124,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
		set_buffer_dirty(bh);
		sync_dirty_buffer(bh);
	}
	brelse(bh);

	if (device->bdev) {
		/* one close for the device struct or super_block */
		close_bdev_exclusive(device->bdev, device->mode);
	}
	if (bdev) {
		/* one close for us */
		close_bdev_exclusive(bdev, FMODE_READ);
	}
	kfree(device->name);
	kfree(device);
	ret = 0;
	goto out;

error_brelse:
	brelse(bh);
@@ -1133,34 +1148,41 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
{
	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
	struct btrfs_fs_devices *old_devices;
	struct btrfs_fs_devices *seed_devices;
	struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
	struct btrfs_device *device;
	u64 super_flags;

	BUG_ON(!mutex_is_locked(&uuid_mutex));
	if (!fs_devices->seeding || fs_devices->opened != 1)
	if (!fs_devices->seeding)
		return -EINVAL;

	old_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
	if (!old_devices)
	seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
	if (!seed_devices)
		return -ENOMEM;

	memcpy(old_devices, fs_devices, sizeof(*old_devices));
	old_devices->opened = 1;
	old_devices->sprouted = 1;
	INIT_LIST_HEAD(&old_devices->devices);
	INIT_LIST_HEAD(&old_devices->alloc_list);
	list_splice_init(&fs_devices->devices, &old_devices->devices);
	list_splice_init(&fs_devices->alloc_list, &old_devices->alloc_list);
	list_for_each_entry(device, &old_devices->devices, dev_list) {
		device->fs_devices = old_devices;
	old_devices = clone_fs_devices(fs_devices);
	if (IS_ERR(old_devices)) {
		kfree(seed_devices);
		return PTR_ERR(old_devices);
	}

	list_add(&old_devices->list, &fs_uuids);

	memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
	seed_devices->opened = 1;
	INIT_LIST_HEAD(&seed_devices->devices);
	INIT_LIST_HEAD(&seed_devices->alloc_list);
	list_splice_init(&fs_devices->devices, &seed_devices->devices);
	list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
	list_for_each_entry(device, &seed_devices->devices, dev_list) {
		device->fs_devices = seed_devices;
	}

	fs_devices->seeding = 0;
	fs_devices->num_devices = 0;
	fs_devices->open_devices = 0;
	fs_devices->seed = old_devices;
	fs_devices->seed = seed_devices;

	generate_random_uuid(fs_devices->fsid);
	memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
@@ -2642,7 +2664,6 @@ int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
				 NULL, 0, page);
}


static void end_bio_multi_stripe(struct bio *bio, int err)
{
	struct btrfs_multi_bio *multi = bio->bi_private;
@@ -2840,6 +2861,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
	device->dev_root = root->fs_info->dev_root;
	device->devid = devid;
	device->work.func = pending_bios_fn;
	device->fs_devices = fs_devices;
	fs_devices->num_devices++;
	spin_lock_init(&device->io_lock);
	INIT_LIST_HEAD(&device->dev_alloc_list);
@@ -2980,8 +3002,10 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
		ret = -ENOENT;
		goto out;
	}
	if (fs_devices->opened) {
		ret = -EBUSY;

	fs_devices = clone_fs_devices(fs_devices);
	if (IS_ERR(fs_devices)) {
		ret = PTR_ERR(fs_devices);
		goto out;
	}

@@ -2992,13 +3016,13 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)

	if (!fs_devices->seeding) {
		__btrfs_close_devices(fs_devices);
		free_fs_devices(fs_devices);
		ret = -EINVAL;
		goto out;
	}

	fs_devices->seed = root->fs_info->fs_devices->seed;
	root->fs_info->fs_devices->seed = fs_devices;
	fs_devices->sprouted = 1;
out:
	mutex_unlock(&uuid_mutex);
	return ret;
@@ -3011,7 +3035,6 @@ static int read_one_dev(struct btrfs_root *root,
	struct btrfs_device *device;
	u64 devid;
	int ret;
	int seed_devices = 0;
	u8 fs_uuid[BTRFS_UUID_SIZE];
	u8 dev_uuid[BTRFS_UUID_SIZE];

@@ -3025,14 +3048,13 @@ static int read_one_dev(struct btrfs_root *root,

	if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
		ret = open_seed_devices(root, fs_uuid);
		if (ret)
		if (ret && !btrfs_test_opt(root, DEGRADED))
			return ret;
		seed_devices = 1;
	}

	device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
	if (!device || !device->bdev) {
		if (!btrfs_test_opt(root, DEGRADED) || seed_devices)
		if (!btrfs_test_opt(root, DEGRADED))
			return -EIO;

		if (!device) {
@@ -3074,7 +3096,7 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
	return read_one_dev(root, buf, dev_item);
}

int btrfs_read_sys_array(struct btrfs_root *root, u64 sb_bytenr)
int btrfs_read_sys_array(struct btrfs_root *root)
{
	struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
	struct extent_buffer *sb;
@@ -3089,7 +3111,7 @@ int btrfs_read_sys_array(struct btrfs_root *root, u64 sb_bytenr)
	u32 cur;
	struct btrfs_key key;

	sb = btrfs_find_create_tree_block(root, sb_bytenr,
	sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
					  BTRFS_SUPER_INFO_SIZE);
	if (!sb)
		return -ENOMEM;
+1 −2
Original line number Diff line number Diff line
@@ -93,7 +93,6 @@ struct btrfs_fs_devices {

	struct btrfs_fs_devices *seed;
	int seeding;
	int sprouted;

	int opened;
};
@@ -127,7 +126,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
		     u64 chunk_start, u64 physical, u64 devid,
		     u64 **logical, int *naddrs, int *stripe_len);
int btrfs_read_sys_array(struct btrfs_root *root, u64 sb_bytenr);
int btrfs_read_sys_array(struct btrfs_root *root);
int btrfs_read_chunk_tree(struct btrfs_root *root);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
		      struct btrfs_root *extent_root, u64 type);