Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ec44a35c authored by Chris Mason's avatar Chris Mason
Browse files

Btrfs: Add balance ioctl to restripe the chunks

parent 788f20eb
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1364,7 +1364,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
		       u64 root_objectid, u64 ref_generation,
		       u64 owner, u64 owner_offset,
		       u64 empty_size, u64 hint_byte,
		       u64 search_end, struct btrfs_key *ins, int data);
		       u64 search_end, struct btrfs_key *ins, u64 data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		  struct extent_buffer *buf);
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
+89 −17
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
 */
#include <linux/sched.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include "hash.h"
#include "crc32c.h"
#include "ctree.h"
@@ -1058,6 +1059,26 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
	}
}

static u64 reduce_alloc_profile(u64 flags)
{
	if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
	    (flags & (BTRFS_BLOCK_GROUP_RAID1 |
		      BTRFS_BLOCK_GROUP_RAID10)))
		flags &= ~BTRFS_BLOCK_GROUP_DUP;

	if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
	    (flags & BTRFS_BLOCK_GROUP_RAID10))
		flags &= ~BTRFS_BLOCK_GROUP_RAID1;

	if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
	    ((flags & BTRFS_BLOCK_GROUP_RAID1) |
	     (flags & BTRFS_BLOCK_GROUP_RAID10) |
	     (flags & BTRFS_BLOCK_GROUP_DUP)))
		flags &= ~BTRFS_BLOCK_GROUP_RAID0;
	return flags;
}


static int do_chunk_alloc(struct btrfs_trans_handle *trans,
			  struct btrfs_root *extent_root, u64 alloc_bytes,
			  u64 flags)
@@ -1068,6 +1089,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
	u64 num_bytes;
	int ret;

	flags = reduce_alloc_profile(flags);

	space_info = __find_space_info(extent_root->fs_info, flags);
	if (!space_info) {
		ret = update_space_info(extent_root->fs_info, flags,
@@ -1684,6 +1707,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
error:
	return ret;
}

/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
@@ -1697,7 +1721,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
		       u64 root_objectid, u64 ref_generation,
		       u64 owner, u64 owner_offset,
		       u64 empty_size, u64 hint_byte,
		       u64 search_end, struct btrfs_key *ins, int data)
		       u64 search_end, struct btrfs_key *ins, u64 data)
{
	int ret;
	int pending_ret;
@@ -1727,6 +1751,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
		data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
	}
again:
	data = reduce_alloc_profile(data);
	if (root->ref_cows) {
		if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
@@ -1752,6 +1777,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
		num_bytes = max(num_bytes, min_alloc_size);
		goto again;
	}
	if (ret) {
		printk("allocation failed flags %Lu\n", data);
	}
	BUG_ON(ret);
	if (ret)
		return ret;
@@ -2274,8 +2302,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
{
	u64 page_start;
	u64 page_end;
	u64 delalloc_start;
	u64 existing_delalloc;
	unsigned long last_index;
	unsigned long i;
	struct page *page;
@@ -2293,7 +2319,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
	ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;

	file_ra_state_init(ra, inode->i_mapping);
	kfree(ra);

	for (; i <= last_index; i++) {
		if (total_read % ra_pages == 0) {
@@ -2313,26 +2338,30 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
				goto out_unlock;
			}
		}
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
		ClearPageDirty(page);
#else
		cancel_dirty_page(page, PAGE_CACHE_SIZE);
#endif
		wait_on_page_writeback(page);
		set_page_extent_mapped(page);
		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
		page_end = page_start + PAGE_CACHE_SIZE - 1;

		lock_extent(io_tree, page_start, page_end, GFP_NOFS);

		delalloc_start = page_start;
		existing_delalloc = count_range_bits(io_tree,
					     &delalloc_start, page_end,
					     PAGE_CACHE_SIZE, EXTENT_DELALLOC);

		set_page_dirty(page);
		set_extent_delalloc(io_tree, page_start,
				    page_end, GFP_NOFS);

		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
		set_page_dirty(page);
		unlock_page(page);
		page_cache_release(page);
		balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
	}

out_unlock:
	kfree(ra);
	mutex_unlock(&inode->i_mutex);
	return 0;
}
@@ -2397,8 +2426,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root,
			goto out;
		}
		relocate_inode_pages(inode, ref_offset, extent_key->offset);
		/* FIXME, data=ordered will help get rid of this */
		filemap_fdatawrite(inode->i_mapping);
		iput(inode);
		mutex_lock(&extent_root->fs_info->fs_mutex);
	} else {
@@ -2486,6 +2513,47 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root,
	return ret;
}

static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
{
	u64 num_devices;
	u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
		BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;

	num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
	if (num_devices == 1) {
		stripped |= BTRFS_BLOCK_GROUP_DUP;
		stripped = flags & ~stripped;

		/* turn raid0 into single device chunks */
		if (flags & BTRFS_BLOCK_GROUP_RAID0)
			return stripped;

		/* turn mirroring into duplication */
		if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
			     BTRFS_BLOCK_GROUP_RAID10))
			return stripped | BTRFS_BLOCK_GROUP_DUP;
		return flags;
	} else {
		/* they already had raid on here, just return */
		if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
		    (flags & BTRFS_BLOCK_GROUP_RAID1)) {
		}
		if (flags & stripped)
			return flags;

		stripped |= BTRFS_BLOCK_GROUP_DUP;
		stripped = flags & ~stripped;

		/* switch duplicated blocks with raid1 */
		if (flags & BTRFS_BLOCK_GROUP_DUP)
			return stripped | BTRFS_BLOCK_GROUP_RAID1;

		/* turn single device chunks into raid0 */
		return stripped | BTRFS_BLOCK_GROUP_RAID0;
	}
	return flags;
}

int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
{
	struct btrfs_trans_handle *trans;
@@ -2494,6 +2562,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
	u64 cur_byte;
	u64 total_found;
	u64 shrink_last_byte;
	u64 new_alloc_flags;
	struct btrfs_block_group_cache *shrink_block_group;
	struct btrfs_fs_info *info = root->fs_info;
	struct btrfs_key key;
@@ -2511,17 +2580,20 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)

	shrink_block_group->space_info->total_bytes -=
		shrink_block_group->key.offset;
printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags);
	path = btrfs_alloc_path();
	root = root->fs_info->extent_root;
	path->reada = 2;

again:
	if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
		trans = btrfs_start_transaction(root, 1);
		new_alloc_flags = update_block_group_flags(root,
						   shrink_block_group->flags);
		do_chunk_alloc(trans, root->fs_info->extent_root,
			btrfs_block_group_used(&shrink_block_group->item) +
			2 * 1024 * 1024, shrink_block_group->flags);
			2 * 1024 * 1024, new_alloc_flags);
		btrfs_end_transaction(trans, root);
	}
	shrink_block_group->ro = 1;

	total_found = 0;
+11 −0
Original line number Diff line number Diff line
@@ -2864,6 +2864,15 @@ int btrfs_defrag_file(struct file *file) {
				goto out_unlock;
			}
		}

#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
		ClearPageDirty(page);
#else
		cancel_dirty_page(page, PAGE_CACHE_SIZE);
#endif
		wait_on_page_writeback(page);
		set_page_extent_mapped(page);

		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
		page_end = page_start + PAGE_CACHE_SIZE - 1;

@@ -3105,6 +3114,8 @@ long btrfs_ioctl(struct file *file, unsigned int
		return btrfs_ioctl_resize(root, (void __user *)arg);
	case BTRFS_IOC_ADD_DEV:
		return btrfs_ioctl_add_dev(root, (void __user *)arg);
	case BTRFS_IOC_BALANCE:
		return btrfs_balance(root->fs_info->dev_root);
	}

	return -ENOTTY;
+106 −9
Original line number Diff line number Diff line
@@ -869,6 +869,107 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
	return 0;
}

static u64 div_factor(u64 num, int factor)
{
	if (factor == 10)
		return num;
	num *= factor;
	do_div(num, 10);
	return num;
}


int btrfs_balance(struct btrfs_root *dev_root)
{
	int ret;
	struct list_head *cur;
	struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
	struct btrfs_device *device;
	u64 old_size;
	u64 size_to_free;
	struct btrfs_path *path;
	struct btrfs_key key;
	struct btrfs_chunk *chunk;
	struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
	struct btrfs_trans_handle *trans;
	struct btrfs_key found_key;


	dev_root = dev_root->fs_info->dev_root;

	mutex_lock(&dev_root->fs_info->fs_mutex);
	/* step one make some room on all the devices */
	list_for_each(cur, devices) {
		device = list_entry(cur, struct btrfs_device, dev_list);
		old_size = device->total_bytes;
		size_to_free = div_factor(old_size, 1);
		size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
		if (device->total_bytes - device->bytes_used > size_to_free)
			continue;

		ret = btrfs_shrink_device(device, old_size - size_to_free);
		BUG_ON(ret);

		trans = btrfs_start_transaction(dev_root, 1);
		BUG_ON(!trans);

		ret = btrfs_grow_device(trans, device, old_size);
		BUG_ON(ret);

		btrfs_end_transaction(trans, dev_root);
	}

	/* step two, relocate all the chunks */
	path = btrfs_alloc_path();
	BUG_ON(!path);

	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
	key.offset = (u64)-1;
	key.type = BTRFS_CHUNK_ITEM_KEY;

	while(1) {
		ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
		if (ret < 0)
			goto error;

		/*
		 * this shouldn't happen, it means the last relocate
		 * failed
		 */
		if (ret == 0)
			break;

		ret = btrfs_previous_item(chunk_root, path, 0,
					  BTRFS_CHUNK_ITEM_KEY);
		if (ret) {
			break;
		}
		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
				      path->slots[0]);
		if (found_key.objectid != key.objectid)
			break;
		chunk = btrfs_item_ptr(path->nodes[0],
				       path->slots[0],
				       struct btrfs_chunk);
		key.offset = found_key.offset;
		/* chunk zero is special */
		if (key.offset == 0)
			break;

		ret = btrfs_relocate_chunk(chunk_root,
					   chunk_root->root_key.objectid,
					   found_key.objectid,
					   found_key.offset);
		BUG_ON(ret);
		btrfs_release_path(chunk_root, path);
	}
	ret = 0;
error:
	btrfs_free_path(path);
	mutex_unlock(&dev_root->fs_info->fs_mutex);
	return ret;
}

/*
 * shrinking a device means finding all of the device extents past
 * the new size, and then following the back refs to the chunks.
@@ -985,15 +1086,6 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
	return 0;
}

static u64 div_factor(u64 num, int factor)
{
	if (factor == 10)
		return num;
	num *= factor;
	do_div(num, 10);
	return num;
}

static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
			       int sub_stripes)
{
@@ -1040,6 +1132,11 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
	int stripe_len = 64 * 1024;
	struct btrfs_key key;

	if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
	    (type & BTRFS_BLOCK_GROUP_DUP)) {
		WARN_ON(1);
		type &= ~BTRFS_BLOCK_GROUP_DUP;
	}
	dev_list = &extent_root->fs_info->fs_devices->alloc_list;
	if (list_empty(dev_list))
		return -ENOSPC;
+1 −0
Original line number Diff line number Diff line
@@ -134,4 +134,5 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
				       u8 *uuid);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_root *dev_root);
#endif