Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 75cb379d authored by Jeff Mahoney's avatar Jeff Mahoney Committed by David Sterba
Browse files

btrfs: defer adding raid type kobject until after chunk relocation



Any time the first block group of a new type is created, we add a new
kobject to sysfs to hold the attributes for that type.  Kobject-internal
allocations always use GFP_KERNEL, making them prone to fs-reclaim races.
While it appears as if this can occur any time a block group is created,
the only times the first block group of a new type can be created in
memory is at mount and when we create the first new block group during
raid conversion.

This patch adds a new list to track pending kobject additions and then
handles them after we do chunk relocation.  Between relocating the
target chunk (or forcing allocation of a new chunk in the case of data)
and removing the old chunk, we're in a safe place for fs-reclaim to
occur.  We're holding the volume mutex, which is already held across
page faults, and the delete_unused_bgs_mutex, which will only stall
the cleaner thread.

Signed-off-by: default avatarJeff Mahoney <jeffm@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent dc2d3005
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -385,8 +385,9 @@ struct btrfs_dev_replace {

/* For raid type sysfs entries */
struct raid_kobject {
	int raid_type;
	u64 flags;
	struct kobject kobj;
	struct list_head list;
};

struct btrfs_space_info {
@@ -940,6 +941,8 @@ struct btrfs_fs_info {
	u32 thread_pool_size;

	struct kobject *space_info_kobj;
	struct list_head pending_raid_kobjs;
	spinlock_t pending_raid_kobjs_lock; /* uncontended */

	u64 total_pinned;

@@ -2700,6 +2703,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
			   struct btrfs_fs_info *fs_info, u64 bytes_used,
			   u64 type, u64 chunk_offset, u64 size);
void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
				struct btrfs_fs_info *fs_info,
				const u64 chunk_offset);
+2 −0
Original line number Diff line number Diff line
@@ -2431,6 +2431,8 @@ int open_ctree(struct super_block *sb,
	INIT_LIST_HEAD(&fs_info->delayed_iputs);
	INIT_LIST_HEAD(&fs_info->delalloc_roots);
	INIT_LIST_HEAD(&fs_info->caching_block_groups);
	INIT_LIST_HEAD(&fs_info->pending_raid_kobjs);
	spin_lock_init(&fs_info->pending_raid_kobjs_lock);
	spin_lock_init(&fs_info->delalloc_root_lock);
	spin_lock_init(&fs_info->trans_lock);
	spin_lock_init(&fs_info->fs_roots_radix_lock);
+42 −18
Original line number Diff line number Diff line
@@ -9918,9 +9918,39 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
	return 0;
}

/* link_block_group will queue up kobjects to add when we're reclaim-safe */
void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
{
	struct btrfs_space_info *space_info;
	struct raid_kobject *rkobj;
	LIST_HEAD(list);
	int index;
	int ret = 0;

	spin_lock(&fs_info->pending_raid_kobjs_lock);
	list_splice_init(&fs_info->pending_raid_kobjs, &list);
	spin_unlock(&fs_info->pending_raid_kobjs_lock);

	list_for_each_entry(rkobj, &list, list) {
		space_info = __find_space_info(fs_info, rkobj->flags);
		index = btrfs_bg_flags_to_raid_index(rkobj->flags);

		ret = kobject_add(&rkobj->kobj, &space_info->kobj,
				  "%s", get_raid_name(index));
		if (ret) {
			kobject_put(&rkobj->kobj);
			break;
		}
	}
	if (ret)
		btrfs_warn(fs_info,
			   "failed to add kobject for block cache, ignoring");
}

static void link_block_group(struct btrfs_block_group_cache *cache)
{
	struct btrfs_space_info *space_info = cache->space_info;
	struct btrfs_fs_info *fs_info = cache->fs_info;
	int index = btrfs_bg_flags_to_raid_index(cache->flags);
	bool first = false;

@@ -9931,27 +9961,20 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
	up_write(&space_info->groups_sem);

	if (first) {
		struct raid_kobject *rkobj;
		int ret;

		rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
		if (!rkobj)
			goto out_err;
		rkobj->raid_type = index;
		kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
		ret = kobject_add(&rkobj->kobj, &space_info->kobj,
				  "%s", get_raid_name(index));
		if (ret) {
			kobject_put(&rkobj->kobj);
			goto out_err;
		struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
		if (!rkobj) {
			btrfs_warn(cache->fs_info,
				"couldn't alloc memory for raid level kobject");
			return;
		}
		rkobj->flags = cache->flags;
		kobject_init(&rkobj->kobj, &btrfs_raid_ktype);

		spin_lock(&fs_info->pending_raid_kobjs_lock);
		list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
		spin_unlock(&fs_info->pending_raid_kobjs_lock);
		space_info->block_group_kobjs[index] = &rkobj->kobj;
	}

	return;
out_err:
	btrfs_warn(cache->fs_info,
		   "failed to add kobject for block cache, ignoring");
}

static struct btrfs_block_group_cache *
@@ -10167,6 +10190,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
			inc_block_group_ro(cache, 1);
	}

	btrfs_add_raid_kobjects(info);
	init_global_block_rsv(info);
	ret = 0;
error:
+1 −1
Original line number Diff line number Diff line
@@ -272,7 +272,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
{
	struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
	struct btrfs_block_group_cache *block_group;
	int index = to_raid_kobj(kobj)->raid_type;
	int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
	u64 val = 0;

	down_read(&sinfo->groups_sem);
+12 −0
Original line number Diff line number Diff line
@@ -3003,6 +3003,16 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
	if (ret)
		return ret;

	/*
	 * We add the kobjects here (and after forcing data chunk creation)
	 * since relocation is the only place we'll create chunks of a new
	 * type at runtime.  The only place where we'll remove the last
	 * chunk of a type is the call immediately below this one.  Even
	 * so, we're protected against races with the cleaner thread since
	 * we're covered by the delete_unused_bgs_mutex.
	 */
	btrfs_add_raid_kobjects(fs_info);

	trans = btrfs_start_trans_remove_block_group(root->fs_info,
						     chunk_offset);
	if (IS_ERR(trans)) {
@@ -3130,6 +3140,8 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
			if (ret < 0)
				return ret;

			btrfs_add_raid_kobjects(fs_info);

			return 1;
		}
	}