Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit be744175 authored by Chris Mason's avatar Chris Mason Committed by David Woodhouse
Browse files

Btrfs: more allocator enhancements

parent be08c1b9
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -26,8 +26,10 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit)
		if (ret)
			return ret;
	}
	set_bit(bit_slot, bits + 1);
	return 0;
	ret = test_and_set_bit(bit_slot, bits + 1);
	if (ret < 0)
		ret = 1;
	return ret;
}

int test_radix_bit(struct radix_tree_root *radix, unsigned long bit)
+5 −2
Original line number Diff line number Diff line
@@ -257,6 +257,8 @@ struct btrfs_block_group_cache {
	struct btrfs_block_group_item item;
	u64 first_free;
	u64 last_alloc;
	u64 pinned;
	int data;
};

struct crypto_hash;
@@ -264,12 +266,12 @@ struct btrfs_fs_info {
	struct btrfs_root *extent_root;
	struct btrfs_root *tree_root;
	struct btrfs_root *dev_root;
	struct btrfs_block_group_cache *block_group_cache;
	struct radix_tree_root fs_roots_radix;
	struct radix_tree_root pending_del_radix;
	struct radix_tree_root pinned_radix;
	struct radix_tree_root dev_radix;
	struct radix_tree_root block_group_radix;
	struct radix_tree_root block_group_data_radix;

	u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3];
	int extent_tree_insert_nr;
@@ -1072,7 +1074,8 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh)
/* extent-tree.c */
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
						 *hint, int data);
						 *hint, u64 search_start,
						 int data);
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
		       struct btrfs_root *root);
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
+1 −1
Original line number Diff line number Diff line
@@ -554,6 +554,7 @@ struct btrfs_root *open_ctree(struct super_block *sb)
	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
	INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS);
	INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
	INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL);
	INIT_LIST_HEAD(&fs_info->trans_list);
	sb_set_blocksize(sb, 4096);
	fs_info->running_transaction = NULL;
@@ -582,7 +583,6 @@ struct btrfs_root *open_ctree(struct super_block *sb)
	}
	mutex_init(&fs_info->trans_mutex);
	mutex_init(&fs_info->fs_mutex);
	fs_info->block_group_cache = NULL;

	__setup_root(sb->s_blocksize, dev_root,
		     fs_info, BTRFS_DEV_TREE_OBJECTID);
+238 −79
Original line number Diff line number Diff line
@@ -12,36 +12,88 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			       btrfs_root *extent_root);

static struct btrfs_block_group_cache *lookup_block_group(struct
							  btrfs_fs_info *info,
							  u64 blocknr)
{
	struct btrfs_block_group_cache *block_group;
	int ret;

	ret = radix_tree_gang_lookup(&info->block_group_radix,
				     (void **)&block_group,
				     blocknr, 1);
	if (ret) {
		if (block_group->key.objectid <= blocknr && blocknr <
		    block_group->key.objectid + block_group->key.offset)
			return block_group;
	}
	ret = radix_tree_gang_lookup(&info->block_group_data_radix,
				     (void **)&block_group,
				     blocknr, 1);
	if (ret) {
		if (block_group->key.objectid <= blocknr && blocknr <
		    block_group->key.objectid + block_group->key.offset)
			return block_group;
	}
printk("lookup_block_group fails for blocknr %Lu\n", blocknr);
	return NULL;
}

struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
						 struct btrfs_block_group_cache
						 *hint, int data)
						 *hint, u64 search_start,
						 int data)
{
	struct btrfs_block_group_cache *cache[8];
	struct btrfs_block_group_cache *found_group = NULL;
	struct btrfs_fs_info *info = root->fs_info;
	struct radix_tree_root *radix;
	u64 used;
	u64 last = 0;
	u64 hint_last;
	int i;
	int ret;
	int full_search = 0;
	if (!data && hint) {

	if (data)
		radix = &info->block_group_data_radix;
	else
		radix = &info->block_group_radix;

	if (search_start) {
		struct btrfs_block_group_cache *shint;
		shint = lookup_block_group(info, search_start);
		if (shint->data == data) {
			used = btrfs_block_group_used(&shint->item);
			if (used + shint->pinned <
			    (shint->key.offset * 8) / 10) {
				return shint;
			}
		}
	}
	if (hint && hint->data == data) {
		used = btrfs_block_group_used(&hint->item);
		if (used < (hint->key.offset * 2) / 3) {
		if (used + hint->pinned < (hint->key.offset * 8) / 10) {
			return hint;
		}
		radix_tree_tag_clear(&info->block_group_radix,
				     hint->key.objectid + hint->key.offset - 1,
		if (used >= (hint->key.offset * 8) / 10) {
			radix_tree_tag_clear(radix,
					     hint->key.objectid +
					     hint->key.offset - 1,
					     BTRFS_BLOCK_GROUP_AVAIL);
		}
		last = hint->key.offset * 2;
		if (hint->key.objectid >= last)
			last = max(search_start, hint->key.objectid - last);
		else
			last = hint->key.objectid + hint->key.offset;
		hint_last = last;
	} else {
		hint_last = 0;
		last = 0;
		hint_last = search_start;
		last = search_start;
	}
	while(1) {
		ret = radix_tree_gang_lookup_tag(&info->block_group_radix,
						 (void **)cache,
		ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
						 last, ARRAY_SIZE(cache),
						 BTRFS_BLOCK_GROUP_AVAIL);
		if (!ret)
@@ -49,65 +101,54 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
		for (i = 0; i < ret; i++) {
			last = cache[i]->key.objectid +
				cache[i]->key.offset;
			if (!full_search && !data &&
			   (cache[i]->key.objectid & cache[i]->key.offset))
				continue;
			if (!full_search && data &&
			   (cache[i]->key.objectid & cache[i]->key.offset) == 0)
				continue;
			used = btrfs_block_group_used(&cache[i]->item);
			if (used < (cache[i]->key.offset * 2) / 3) {
				info->block_group_cache = cache[i];
			if (used + cache[i]->pinned <
			    (cache[i]->key.offset * 8) / 10) {
				found_group = cache[i];
				goto found;
			}
			radix_tree_tag_clear(&info->block_group_radix,
			if (used >= (cache[i]->key.offset * 8) / 10) {
				radix_tree_tag_clear(radix,
						     cache[i]->key.objectid +
						     cache[i]->key.offset - 1,
						     BTRFS_BLOCK_GROUP_AVAIL);
			}
		}
	}
	last = hint_last;
again:
	while(1) {
		ret = radix_tree_gang_lookup(&info->block_group_radix,
						 (void **)cache,
		ret = radix_tree_gang_lookup(radix, (void **)cache,
					     last, ARRAY_SIZE(cache));
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			last = cache[i]->key.objectid +
				cache[i]->key.offset;
			if (!full_search && !data &&
			   (cache[i]->key.objectid & cache[i]->key.offset))
				continue;
			if (!full_search && data &&
			   (cache[i]->key.objectid & cache[i]->key.offset) == 0)
				continue;
			used = btrfs_block_group_used(&cache[i]->item);
			if (used < cache[i]->key.offset) {
				info->block_group_cache = cache[i];
			if (used + cache[i]->pinned < cache[i]->key.offset) {
				found_group = cache[i];
				goto found;
			}
			radix_tree_tag_clear(&info->block_group_radix,
			if (used >= cache[i]->key.offset) {
				radix_tree_tag_clear(radix,
						     cache[i]->key.objectid +
						     cache[i]->key.offset - 1,
						     BTRFS_BLOCK_GROUP_AVAIL);
			}
		}
	info->block_group_cache = NULL;
	}
	if (!full_search) {
		last = 0;
		last = search_start;
		full_search = 1;
		goto again;
	}
found:
	if (!found_group) {
		ret = radix_tree_gang_lookup(&info->block_group_radix,
		ret = radix_tree_gang_lookup(radix,
					     (void **)&found_group, 0, 1);
		BUG_ON(ret != 1);
	}
found:
	return found_group;
}

@@ -252,18 +293,20 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
		return ret;
	if (pending_ret)
		return pending_ret;
	if (cache->data)
		cache->last_alloc = cache->first_free;
	return 0;

}

int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				    struct btrfs_root *root)
static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root,
				   struct radix_tree_root *radix)
{
	struct btrfs_block_group_cache *cache[8];
	int ret;
	int err = 0;
	int werr = 0;
	struct radix_tree_root *radix = &root->fs_info->block_group_radix;
	int i;
	struct btrfs_path *path;

@@ -285,35 +328,74 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
						    path, cache[i]);
			if (err)
				werr = err;
			cache[i]->last_alloc = cache[i]->first_free;
		}
	}
	btrfs_free_path(path);
	return werr;
}

int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root)
{
	int ret;
	int ret2;
	ret = write_dirty_block_radix(trans, root,
				      &root->fs_info->block_group_radix);
	ret2 = write_dirty_block_radix(trans, root,
				      &root->fs_info->block_group_data_radix);
	if (ret)
		return ret;
	if (ret2)
		return ret2;
	return 0;
}

static int update_block_group(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      u64 blocknr, u64 num, int alloc)
{
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
	struct radix_tree_root *radix;
	u64 total = num;
	u64 old_val;
	u64 block_in_group;
	int ret;
	if (num != 1)
		radix = &info->block_group_data_radix;
	else
		radix = &info->block_group_radix;
	while(total) {
		ret = radix_tree_gang_lookup(&info->block_group_radix,
					     (void **)&cache, blocknr, 1);
		ret = radix_tree_gang_lookup(radix, (void **)&cache,
					     blocknr, 1);
		if (!ret) {
			printk(KERN_CRIT "blocknr %Lu lookup failed\n",
			       blocknr);
			return -1;
		}
		block_in_group = blocknr - cache->key.objectid;
		if (block_in_group > cache->key.offset || cache->key.objectid >
		    blocknr) {
			if (radix == &info->block_group_data_radix)
				radix = &info->block_group_radix;
			else
				radix = &info->block_group_data_radix;
			ret = radix_tree_gang_lookup(radix, (void **)&cache,
						     blocknr, 1);
			if (!ret) {
				printk(KERN_CRIT "blocknr %Lu lookup failed\n",
				       blocknr);
				return -1;
			}
			block_in_group = blocknr - cache->key.objectid;
			if (block_in_group > cache->key.offset ||
			    cache->key.objectid > blocknr) {
				BUG();
			}
		}
		WARN_ON(block_in_group > cache->key.offset);
		radix_tree_tag_set(&info->block_group_radix,
				   cache->key.objectid + cache->key.offset - 1,
		radix_tree_tag_set(radix, cache->key.objectid +
				   cache->key.offset - 1,
				   BTRFS_BLOCK_GROUP_DIRTY);

		old_val = btrfs_block_group_used(&cache->item);
@@ -346,6 +428,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
{
	unsigned long gang[8];
	struct inode *btree_inode = root->fs_info->btree_inode;
	struct btrfs_block_group_cache *block_group;
	u64 first = 0;
	int ret;
	int i;
@@ -360,6 +443,14 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
			first = gang[0];
		for (i = 0; i < ret; i++) {
			clear_radix_bit(pinned_radix, gang[i]);
			block_group = lookup_block_group(root->fs_info,
							 gang[i]);
			if (block_group) {
				WARN_ON(block_group->pinned == 0);
				block_group->pinned--;
				if (gang[i] < block_group->last_alloc)
					block_group->last_alloc = gang[i];
			}
			try_remove_page(btree_inode->i_mapping,
					gang[i] << (PAGE_CACHE_SHIFT -
						    btree_inode->i_blkbits));
@@ -420,10 +511,16 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
			btrfs_block_release(root, bh);
		}
		err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
		if (!err) {
			struct btrfs_block_group_cache *cache;
			cache = lookup_block_group(root->fs_info, blocknr);
			if (cache)
				cache->pinned++;
		}
	} else {
		err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
	}
	BUG_ON(err);
	BUG_ON(err < 0);
	return 0;
}

@@ -502,6 +599,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
	int i;
	struct radix_tree_root *pending_radix;
	struct radix_tree_root *pinned_radix;
	struct btrfs_block_group_cache *cache;

	pending_radix = &extent_root->fs_info->pending_del_radix;
	pinned_radix = &extent_root->fs_info->pinned_radix;
@@ -513,7 +611,17 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
			break;
		for (i = 0; i < ret; i++) {
			wret = set_radix_bit(pinned_radix, gang[i]);
			BUG_ON(wret);
			if (wret == 0) {
				cache = lookup_block_group(extent_root->fs_info,
							   gang[i]);
				if (cache)
					cache->pinned++;
			}
			if (wret < 0) {
				printk(KERN_CRIT "set_radix_bit, err %d\n",
				       wret);
				BUG_ON(wret < 0);
			}
			wret = clear_radix_bit(pending_radix, gang[i]);
			BUG_ON(wret);
			wret = __free_extent(trans, extent_root,
@@ -563,6 +671,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
	int slot = 0;
	u64 last_block = 0;
	u64 test_block;
	u64 orig_search_start = search_start;
	int start_found;
	struct btrfs_leaf *l;
	struct btrfs_root * root = orig_root->fs_info->extent_root;
@@ -572,6 +681,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
	int fill_prealloc = 0;
	int level;
	struct btrfs_block_group_cache *block_group;
	int full_scan = 0;

	path = btrfs_alloc_path();
	ins->flags = 0;
@@ -583,10 +693,21 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
		num_blocks = 1;
		total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
	}
	block_group = btrfs_find_block_group(root, trans->block_group, data);
	if (search_start) {
		block_group = lookup_block_group(info, search_start);
		block_group = btrfs_find_block_group(root, block_group,
						     search_start, data);
	} else {
		block_group = btrfs_find_block_group(root,
						     trans->block_group, 0,
						     data);
	}

check_failed:
	if (block_group->data != data)
		WARN_ON(1);
	if (block_group->last_alloc > search_start)
		search_start = block_group->last_alloc;
check_failed:
	btrfs_init_path(path);
	ins->objectid = search_start;
	ins->offset = 0;
@@ -639,6 +760,13 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
		}
		start_found = 1;
		last_block = key.objectid + key.offset;
		if (last_block >= block_group->key.objectid +
		    block_group->key.offset) {
			btrfs_release_path(root, path);
			search_start = block_group->key.objectid +
				block_group->key.offset * 2;
			goto new_group;
		}
next:
		path->slots[0]++;
	}
@@ -650,16 +778,17 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
	btrfs_release_path(root, path);
	BUG_ON(ins->objectid < search_start);
	if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) {
		if (search_start == 0)
		if (full_scan)
			return -ENOSPC;
		search_start = 0;
		goto check_failed;
		search_start = orig_search_start;
		full_scan = 1;
		goto new_group;
	}
	for (test_block = ins->objectid;
	     test_block < ins->objectid + num_blocks; test_block++) {
		if (test_radix_bit(&info->pinned_radix, test_block)) {
			search_start = test_block + 1;
			goto check_failed;
			goto new_group;
		}
	}
	if (!fill_prealloc && info->extent_tree_insert_nr) {
@@ -670,7 +799,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
		    ins->objectid <= last) {
			search_start = last + 1;
			WARN_ON(1);
			goto check_failed;
			goto new_group;
		}
	}
	if (!fill_prealloc && info->extent_tree_prealloc_nr) {
@@ -680,7 +809,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
		    ins->objectid <= info->extent_tree_prealloc[0]) {
			search_start = info->extent_tree_prealloc[0] + 1;
			WARN_ON(1);
			goto check_failed;
			goto new_group;
		}
	}
	if (fill_prealloc) {
@@ -696,14 +825,12 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
		}
		if (total_found < total_needed) {
			search_start = test_block;
			goto check_failed;
			goto new_group;
		}
		info->extent_tree_prealloc_nr = total_found;
	}
	ret = radix_tree_gang_lookup(&info->block_group_radix,
				     (void **)&block_group,
				     ins->objectid, 1);
	if (ret) {
	block_group = lookup_block_group(info, ins->objectid);
	if (block_group) {
		block_group->last_alloc = ins->objectid;
		if (!data)
			trans->block_group = block_group;
@@ -711,6 +838,18 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
	ins->offset = num_blocks;
	btrfs_free_path(path);
	return 0;

new_group:
	if (search_start >= btrfs_super_total_blocks(info->disk_super)) {
		search_start = orig_search_start;
		full_scan = 1;
	}
	block_group = lookup_block_group(info, search_start);
	if (!full_scan)
		block_group = btrfs_find_block_group(root, block_group,
						     search_start, data);
	goto check_failed;

error:
	btrfs_release_path(root, path);
	btrfs_free_path(path);
@@ -794,7 +933,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
	struct buffer_head *buf;

	ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
				 1, 0, (unsigned long)-1, &ins, 0);
				 1, hint, (unsigned long)-1, &ins, 0);
	if (ret) {
		BUG();
		return NULL;
@@ -984,21 +1123,19 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
	return ret;
}

int btrfs_free_block_groups(struct btrfs_fs_info *info)
static int free_block_group_radix(struct radix_tree_root *radix)
{
	int ret;
	struct btrfs_block_group_cache *cache[8];
	int i;

	while(1) {
		ret = radix_tree_gang_lookup(&info->block_group_radix,
					     (void **)cache, 0,
		ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
					     ARRAY_SIZE(cache));
		if (!ret)
			break;
		for (i = 0; i < ret; i++) {
			radix_tree_delete(&info->block_group_radix,
					  cache[i]->key.objectid +
			radix_tree_delete(radix, cache[i]->key.objectid +
					  cache[i]->key.offset - 1);
			kfree(cache[i]);
		}
@@ -1006,6 +1143,20 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
	return 0;
}

int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
	int ret;
	int ret2;

	ret = free_block_group_radix(&info->block_group_radix);
	ret2 = free_block_group_radix(&info->block_group_data_radix);
	if (ret)
		return ret;
	if (ret2)
		return ret2;
	return 0;
}

int btrfs_read_block_groups(struct btrfs_root *root)
{
	struct btrfs_path *path;
@@ -1013,13 +1164,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
	int err = 0;
	struct btrfs_block_group_item *bi;
	struct btrfs_block_group_cache *cache;
	struct btrfs_fs_info *info = root->fs_info;
	struct radix_tree_root *radix;
	struct btrfs_key key;
	struct btrfs_key found_key;
	struct btrfs_leaf *leaf;
	u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize;
	u64 used;
	u64 nr = 0;

	root = root->fs_info->extent_root;
	root = info->extent_root;
	key.objectid = 0;
	key.offset = group_size_blocks;
	key.flags = 0;
@@ -1030,7 +1184,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
		return -ENOMEM;

	while(1) {
		ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
		ret = btrfs_search_slot(NULL, info->extent_root,
					&key, path, 0, 0);
		if (ret != 0) {
			err = ret;
@@ -1050,23 +1204,28 @@ int btrfs_read_block_groups(struct btrfs_root *root)
		memcpy(&cache->key, &found_key, sizeof(found_key));
		cache->last_alloc = cache->key.objectid;
		cache->first_free = cache->key.objectid;
		cache->pinned = 0;
		cache->data = (nr & 1);
		key.objectid = found_key.objectid + found_key.offset;
		btrfs_release_path(root, path);
		ret = radix_tree_insert(&root->fs_info->block_group_radix,
					found_key.objectid +
		if (nr & 1)
			radix = &info->block_group_data_radix;
		else
			radix = &info->block_group_radix;
		ret = radix_tree_insert(radix, found_key.objectid +
					found_key.offset - 1,
					(void *)cache);
		BUG_ON(ret);
		used = btrfs_block_group_used(bi);
		if (used < (key.offset * 2) / 3) {
			radix_tree_tag_set(&root->fs_info->block_group_radix,
					   found_key.objectid +
		if (used < (key.offset * 8) / 10) {
			radix_tree_tag_set(radix, found_key.objectid +
					   found_key.offset - 1,
					   BTRFS_BLOCK_GROUP_AVAIL);
		}
		if (key.objectid >=
		    btrfs_super_total_blocks(root->fs_info->disk_super))
		    btrfs_super_total_blocks(info->disk_super))
			break;
		nr++;
	}

	btrfs_free_path(path);
+1 −1
Original line number Diff line number Diff line
@@ -811,7 +811,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
		return ERR_PTR(-ENOMEM);

	BTRFS_I(inode)->root = root;
	group = btrfs_find_block_group(root, group, 0);
	group = btrfs_find_block_group(root, group, 0, 0);
	BTRFS_I(inode)->block_group = group;

	inode->i_uid = current->fsuid;