Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f0486c68 authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason
Browse files

Btrfs: Introduce contexts for metadata reservation



Introducing metadata reseravtion contexts has two major advantages.
First, it makes metadata reseravtion more traceable. Second, it can
reclaim freed space and re-add them to the itself after transaction
committed.

Besides add btrfs_block_rsv structure and related helper functions,
This patch contains following changes:

Move code that decides if freed tree block should be pinned into
btrfs_free_tree_block().

Make space accounting more accurate, mainly for handling read only
block groups.

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 2ead6ae7
Loading
Loading
Loading
Loading
+63 −41
Original line number Diff line number Diff line
@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
				       struct btrfs_root *root,
				       struct extent_buffer *buf,
				       struct extent_buffer *cow)
				       struct extent_buffer *cow,
				       int *last_ref)
{
	u64 refs;
	u64 owner;
@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
			BUG_ON(ret);
		}
		clean_tree_block(trans, root, buf);
		*last_ref = 1;
	}
	return 0;
}
@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
	struct btrfs_disk_key disk_key;
	struct extent_buffer *cow;
	int level;
	int last_ref = 0;
	int unlock_orig = 0;
	u64 parent_start;

@@ -442,7 +445,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
			    (unsigned long)btrfs_header_fsid(cow),
			    BTRFS_FSID_SIZE);

	update_ref_for_cow(trans, root, buf, cow);
	update_ref_for_cow(trans, root, buf, cow, &last_ref);

	if (buf == root->node) {
		WARN_ON(parent && parent != buf);
@@ -457,8 +460,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
		extent_buffer_get(cow);
		spin_unlock(&root->node_lock);

		btrfs_free_tree_block(trans, root, buf->start, buf->len,
				parent_start, root->root_key.objectid, level);
		btrfs_free_tree_block(trans, root, buf, parent_start,
				      last_ref);
		free_extent_buffer(buf);
		add_root_to_dirty_list(root);
	} else {
@@ -473,8 +476,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
		btrfs_set_node_ptr_generation(parent, parent_slot,
					      trans->transid);
		btrfs_mark_buffer_dirty(parent);
		btrfs_free_tree_block(trans, root, buf->start, buf->len,
				parent_start, root->root_key.objectid, level);
		btrfs_free_tree_block(trans, root, buf, parent_start,
				      last_ref);
	}
	if (unlock_orig)
		btrfs_tree_unlock(buf);
@@ -949,6 +952,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
	return bin_search(eb, key, level, slot);
}

static void root_add_used(struct btrfs_root *root, u32 size)
{
	spin_lock(&root->accounting_lock);
	btrfs_set_root_used(&root->root_item,
			    btrfs_root_used(&root->root_item) + size);
	spin_unlock(&root->accounting_lock);
}

static void root_sub_used(struct btrfs_root *root, u32 size)
{
	spin_lock(&root->accounting_lock);
	btrfs_set_root_used(&root->root_item,
			    btrfs_root_used(&root->root_item) - size);
	spin_unlock(&root->accounting_lock);
}

/* given a node and slot number, this reads the blocks it points to.  The
 * extent buffer is returned with a reference taken (but unlocked).
 * NULL is returned on error.
@@ -1019,7 +1038,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
		btrfs_tree_lock(child);
		btrfs_set_lock_blocking(child);
		ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
		BUG_ON(ret);
		if (ret) {
			btrfs_tree_unlock(child);
			free_extent_buffer(child);
			goto enospc;
		}

		spin_lock(&root->node_lock);
		root->node = child;
@@ -1034,11 +1057,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
		btrfs_tree_unlock(mid);
		/* once for the path */
		free_extent_buffer(mid);
		ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
					    0, root->root_key.objectid, level);

		root_sub_used(root, mid->len);
		btrfs_free_tree_block(trans, root, mid, 0, 1);
		/* once for the root ptr */
		free_extent_buffer(mid);
		return ret;
		return 0;
	}
	if (btrfs_header_nritems(mid) >
	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
@@ -1088,23 +1112,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
		if (wret < 0 && wret != -ENOSPC)
			ret = wret;
		if (btrfs_header_nritems(right) == 0) {
			u64 bytenr = right->start;
			u32 blocksize = right->len;

			clean_tree_block(trans, root, right);
			btrfs_tree_unlock(right);
			free_extent_buffer(right);
			right = NULL;
			wret = del_ptr(trans, root, path, level + 1, pslot +
				       1);
			if (wret)
				ret = wret;
			wret = btrfs_free_tree_block(trans, root,
						     bytenr, blocksize, 0,
						     root->root_key.objectid,
						     level);
			if (wret)
				ret = wret;
			root_sub_used(root, right->len);
			btrfs_free_tree_block(trans, root, right, 0, 1);
			free_extent_buffer(right);
			right = NULL;
		} else {
			struct btrfs_disk_key right_key;
			btrfs_node_key(right, &right_key, 0);
@@ -1136,21 +1153,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
		BUG_ON(wret == 1);
	}
	if (btrfs_header_nritems(mid) == 0) {
		/* we've managed to empty the middle node, drop it */
		u64 bytenr = mid->start;
		u32 blocksize = mid->len;

		clean_tree_block(trans, root, mid);
		btrfs_tree_unlock(mid);
		free_extent_buffer(mid);
		mid = NULL;
		wret = del_ptr(trans, root, path, level + 1, pslot);
		if (wret)
			ret = wret;
		wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
					 0, root->root_key.objectid, level);
		if (wret)
			ret = wret;
		root_sub_used(root, mid->len);
		btrfs_free_tree_block(trans, root, mid, 0, 1);
		free_extent_buffer(mid);
		mid = NULL;
	} else {
		/* update the parent key to reflect our changes */
		struct btrfs_disk_key mid_key;
@@ -1740,7 +1751,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
					      p->nodes[level + 1],
					      p->slots[level + 1], &b);
			if (err) {
				free_extent_buffer(b);
				ret = err;
				goto done;
			}
@@ -2076,6 +2086,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
	if (IS_ERR(c))
		return PTR_ERR(c);

	root_add_used(root, root->nodesize);

	memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
	btrfs_set_header_nritems(c, 1);
	btrfs_set_header_level(c, level);
@@ -2134,6 +2146,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
	int nritems;

	BUG_ON(!path->nodes[level]);
	btrfs_assert_tree_locked(path->nodes[level]);
	lower = path->nodes[level];
	nritems = btrfs_header_nritems(lower);
	BUG_ON(slot > nritems);
@@ -2202,6 +2215,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
	if (IS_ERR(split))
		return PTR_ERR(split);

	root_add_used(root, root->nodesize);

	memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
	btrfs_set_header_level(split, btrfs_header_level(c));
	btrfs_set_header_bytenr(split, split->start);
@@ -2415,6 +2430,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,

	if (left_nritems)
		btrfs_mark_buffer_dirty(left);
	else
		clean_tree_block(trans, root, left);

	btrfs_mark_buffer_dirty(right);

	btrfs_item_key(right, &disk_key, 0);
@@ -2660,6 +2678,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
	btrfs_mark_buffer_dirty(left);
	if (right_nritems)
		btrfs_mark_buffer_dirty(right);
	else
		clean_tree_block(trans, root, right);

	btrfs_item_key(right, &disk_key, 0);
	wret = fixup_low_keys(trans, root, path, &disk_key, 1);
@@ -2669,8 +2689,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
	/* then fixup the leaf pointer in the path */
	if (path->slots[0] < push_items) {
		path->slots[0] += old_left_nritems;
		if (btrfs_header_nritems(path->nodes[0]) == 0)
			clean_tree_block(trans, root, path->nodes[0]);
		btrfs_tree_unlock(path->nodes[0]);
		free_extent_buffer(path->nodes[0]);
		path->nodes[0] = left;
@@ -2932,10 +2950,10 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
	right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
					root->root_key.objectid,
					&disk_key, 0, l->start, 0);
	if (IS_ERR(right)) {
		BUG_ON(1);
	if (IS_ERR(right))
		return PTR_ERR(right);
	}

	root_add_used(root, root->leafsize);

	memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
	btrfs_set_header_bytenr(right, right->start);
@@ -3054,7 +3072,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,

	btrfs_set_path_blocking(path);
	ret = split_leaf(trans, root, &key, path, ins_len, 1);
	BUG_ON(ret);
	if (ret)
		goto err;

	path->keep_locks = 0;
	btrfs_unlock_up_safe(path, 1);
@@ -3796,9 +3815,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
	 */
	btrfs_unlock_up_safe(path, 0);

	ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
				    0, root->root_key.objectid, 0);
	return ret;
	root_sub_used(root, leaf->len);

	btrfs_free_tree_block(trans, root, leaf, 0, 1);
	return 0;
}
/*
 * delete the item at the leaf level in path.  If that empties
@@ -3865,6 +3885,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		if (leaf == root->node) {
			btrfs_set_header_level(leaf, 0);
		} else {
			btrfs_set_path_blocking(path);
			clean_tree_block(trans, root, leaf);
			ret = btrfs_del_leaf(trans, root, path, leaf);
			BUG_ON(ret);
		}
+62 −7
Original line number Diff line number Diff line
@@ -707,6 +707,20 @@ struct btrfs_space_info {
	atomic_t caching_threads;
};

struct btrfs_block_rsv {
	u64 size;
	u64 reserved;
	u64 freed[2];
	struct btrfs_space_info *space_info;
	struct list_head list;
	spinlock_t lock;
	atomic_t usage;
	unsigned int priority:8;
	unsigned int durable:1;
	unsigned int refill_used:1;
	unsigned int full:1;
};

/*
 * free clusters are used to claim free space in relatively large chunks,
 * allowing us to do less seeky writes.  They are used for all metadata
@@ -757,6 +771,7 @@ struct btrfs_block_group_cache {
	spinlock_t lock;
	u64 pinned;
	u64 reserved;
	u64 reserved_pinned;
	u64 bytes_super;
	u64 flags;
	u64 sectorsize;
@@ -822,6 +837,22 @@ struct btrfs_fs_info {
	/* logical->physical extent mapping */
	struct btrfs_mapping_tree mapping_tree;

	/* block reservation for extent, checksum and root tree */
	struct btrfs_block_rsv global_block_rsv;
	/* block reservation for delay allocation */
	struct btrfs_block_rsv delalloc_block_rsv;
	/* block reservation for metadata operations */
	struct btrfs_block_rsv trans_block_rsv;
	/* block reservation for chunk tree */
	struct btrfs_block_rsv chunk_block_rsv;

	struct btrfs_block_rsv empty_block_rsv;

	/* list of block reservations that cross multiple transactions */
	struct list_head durable_block_rsv_list;

	struct mutex durable_block_rsv_mutex;

	u64 generation;
	u64 last_trans_committed;

@@ -1008,6 +1039,9 @@ struct btrfs_root {
	struct completion kobj_unregister;
	struct mutex objectid_mutex;

	spinlock_t accounting_lock;
	struct btrfs_block_rsv *block_rsv;

	struct mutex log_mutex;
	wait_queue_head_t log_writer_wait;
	wait_queue_head_t log_commit_wait[2];
@@ -1980,10 +2014,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
					u64 parent, u64 root_objectid,
					struct btrfs_disk_key *key, int level,
					u64 hint, u64 empty_size);
int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
			   struct btrfs_root *root,
			  u64 bytenr, u32 blocksize,
			  u64 parent, u64 root_objectid, int level);
			   struct extent_buffer *buf,
			   u64 parent, int last_ref);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
					    struct btrfs_root *root,
					    u64 bytenr, u32 blocksize,
@@ -2037,9 +2071,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
			   u64 size);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root, u64 group_start);
int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
				struct btrfs_block_group_cache *group);

u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
@@ -2058,6 +2089,30 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
				 u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
			      u64 bytes);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_root *root,
			  struct btrfs_block_rsv *rsv);
void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
				 struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
			struct btrfs_root *root,
			struct btrfs_block_rsv *block_rsv,
			u64 num_bytes, int *retries);
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root,
			  struct btrfs_block_rsv *block_rsv,
			  u64 min_reserved, int min_factor);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
			    struct btrfs_block_rsv *dst_rsv,
			    u64 num_bytes);
void btrfs_block_rsv_release(struct btrfs_root *root,
			     struct btrfs_block_rsv *block_rsv,
			     u64 num_bytes);
int btrfs_set_block_group_ro(struct btrfs_root *root,
			     struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
			     struct btrfs_block_group_cache *cache);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
		     int level, int *slot);
+9 −0
Original line number Diff line number Diff line
@@ -903,6 +903,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
	root->name = NULL;
	root->in_sysfs = 0;
	root->inode_tree = RB_ROOT;
	root->block_rsv = NULL;

	INIT_LIST_HEAD(&root->dirty_list);
	INIT_LIST_HEAD(&root->orphan_list);
@@ -910,6 +911,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
	spin_lock_init(&root->node_lock);
	spin_lock_init(&root->list_lock);
	spin_lock_init(&root->inode_lock);
	spin_lock_init(&root->accounting_lock);
	mutex_init(&root->objectid_mutex);
	mutex_init(&root->log_mutex);
	init_waitqueue_head(&root->log_writer_wait);
@@ -1620,6 +1622,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
	INIT_LIST_HEAD(&fs_info->space_info);
	btrfs_mapping_init(&fs_info->mapping_tree);
	btrfs_init_block_rsv(&fs_info->global_block_rsv);
	btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
	btrfs_init_block_rsv(&fs_info->trans_block_rsv);
	btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
	btrfs_init_block_rsv(&fs_info->empty_block_rsv);
	INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
	mutex_init(&fs_info->durable_block_rsv_mutex);
	atomic_set(&fs_info->nr_async_submits, 0);
	atomic_set(&fs_info->async_delalloc_pages, 0);
	atomic_set(&fs_info->async_submit_draining, 0);
+701 −328

File changed.

Preview size limit exceeded, changes collapsed.

+13 −3
Original line number Diff line number Diff line
@@ -3514,6 +3514,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
	struct btrfs_fs_info *fs_info = extent_root->fs_info;
	struct reloc_control *rc;
	int ret;
	int rw = 0;
	int err = 0;

	rc = kzalloc(sizeof(*rc), GFP_NOFS);
@@ -3524,15 +3525,22 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
	extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
	INIT_LIST_HEAD(&rc->reloc_roots);

	rc->extent_root = extent_root;
	rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
	BUG_ON(!rc->block_group);

	if (!rc->block_group->ro) {
		ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
		if (ret) {
			err = ret;
			goto out;
		}
		rw = 1;
	}

	btrfs_init_workers(&rc->workers, "relocate",
			   fs_info->thread_pool_size, NULL);

	rc->extent_root = extent_root;
	btrfs_prepare_block_group_relocation(extent_root, rc->block_group);

	rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
	if (IS_ERR(rc->data_inode)) {
		err = PTR_ERR(rc->data_inode);
@@ -3597,6 +3605,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
	WARN_ON(rc->block_group->reserved > 0);
	WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
out:
	if (err && rw)
		btrfs_set_block_group_rw(extent_root, rc->block_group);
	iput(rc->data_inode);
	btrfs_stop_workers(&rc->workers);
	btrfs_put_block_group(rc->block_group);
Loading