Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 27263e28 authored by Chris Mason's avatar Chris Mason
Browse files

Merge branch 'restriper' of git://github.com/idryomov/btrfs-unstable into integration

parents 64e05503 19a39dce
Loading
Loading
Loading
Loading
+193 −12
Original line number Diff line number Diff line
@@ -86,6 +86,9 @@ struct btrfs_ordered_sum;
/* holds checksums of all the data extents */
#define BTRFS_CSUM_TREE_OBJECTID 7ULL

/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL

/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL

@@ -692,6 +695,54 @@ struct btrfs_root_ref {
	__le16 name_len;
} __attribute__ ((__packed__));

struct btrfs_disk_balance_args {
	/*
	 * profiles to operate on, single is denoted by
	 * BTRFS_AVAIL_ALLOC_BIT_SINGLE
	 */
	__le64 profiles;

	/* usage filter */
	__le64 usage;

	/* devid filter */
	__le64 devid;

	/* devid subset filter [pstart..pend) */
	__le64 pstart;
	__le64 pend;

	/* btrfs virtual address space subset filter [vstart..vend) */
	__le64 vstart;
	__le64 vend;

	/*
	 * profile to convert to, single is denoted by
	 * BTRFS_AVAIL_ALLOC_BIT_SINGLE
	 */
	__le64 target;

	/* BTRFS_BALANCE_ARGS_* */
	__le64 flags;

	__le64 unused[8];
} __attribute__ ((__packed__));

/*
 * store balance parameters to disk so that balance can be properly
 * resumed after crash or unmount
 */
struct btrfs_balance_item {
	/* BTRFS_BALANCE_* */
	__le64 flags;

	struct btrfs_disk_balance_args data;
	struct btrfs_disk_balance_args meta;
	struct btrfs_disk_balance_args sys;

	__le64 unused[4];
} __attribute__ ((__packed__));

#define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2
@@ -751,15 +802,33 @@ struct btrfs_csum_item {
} __attribute__ ((__packed__));

/* different types of block groups (and chunks) */
#define BTRFS_BLOCK_GROUP_DATA     (1 << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM   (1 << 1)
#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
#define BTRFS_BLOCK_GROUP_RAID0    (1 << 3)
#define BTRFS_BLOCK_GROUP_RAID1    (1 << 4)
#define BTRFS_BLOCK_GROUP_DUP	   (1 << 5)
#define BTRFS_BLOCK_GROUP_RAID10   (1 << 6)
#define BTRFS_BLOCK_GROUP_DATA		(1ULL << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM	(1ULL << 1)
#define BTRFS_BLOCK_GROUP_METADATA	(1ULL << 2)
#define BTRFS_BLOCK_GROUP_RAID0		(1ULL << 3)
#define BTRFS_BLOCK_GROUP_RAID1		(1ULL << 4)
#define BTRFS_BLOCK_GROUP_DUP		(1ULL << 5)
#define BTRFS_BLOCK_GROUP_RAID10	(1ULL << 6)
#define BTRFS_BLOCK_GROUP_RESERVED	BTRFS_AVAIL_ALLOC_BIT_SINGLE
#define BTRFS_NR_RAID_TYPES		5

#define BTRFS_BLOCK_GROUP_TYPE_MASK	(BTRFS_BLOCK_GROUP_DATA |    \
					 BTRFS_BLOCK_GROUP_SYSTEM |  \
					 BTRFS_BLOCK_GROUP_METADATA)

#define BTRFS_BLOCK_GROUP_PROFILE_MASK	(BTRFS_BLOCK_GROUP_RAID0 |   \
					 BTRFS_BLOCK_GROUP_RAID1 |   \
					 BTRFS_BLOCK_GROUP_DUP |     \
					 BTRFS_BLOCK_GROUP_RAID10)
/*
 * We need a bit for restriper to be able to tell when chunks of type
 * SINGLE are available.  This "extended" profile format is used in
 * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
 * (on-disk).  The corresponding on-disk bit in chunk.type is reserved
 * to avoid remappings between two formats in future.
 */
#define BTRFS_AVAIL_ALLOC_BIT_SINGLE	(1ULL << 48)

struct btrfs_block_group_item {
	__le64 used;
	__le64 chunk_objectid;
@@ -916,6 +985,7 @@ struct btrfs_block_group_cache {
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
struct btrfs_balance_control;
struct btrfs_delayed_root;
struct btrfs_fs_info {
	u8 fsid[BTRFS_FSID_SIZE];
@@ -1132,12 +1202,23 @@ struct btrfs_fs_info {
	spinlock_t ref_cache_lock;
	u64 total_ref_cache_size;

	/*
	 * these three are in extended format (availability of single
	 * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
	 * types are denoted by corresponding BTRFS_BLOCK_GROUP_* bits)
	 */
	u64 avail_data_alloc_bits;
	u64 avail_metadata_alloc_bits;
	u64 avail_system_alloc_bits;
	u64 data_alloc_profile;
	u64 metadata_alloc_profile;
	u64 system_alloc_profile;

	/* restriper state */
	spinlock_t balance_lock;
	struct mutex balance_mutex;
	atomic_t balance_running;
	atomic_t balance_pause_req;
	atomic_t balance_cancel_req;
	struct btrfs_balance_control *balance_ctl;
	wait_queue_head_t balance_wait_q;

	unsigned data_chunk_allocations;
	unsigned metadata_ratio;
@@ -1383,6 +1464,8 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_DEV_ITEM_KEY	216
#define BTRFS_CHUNK_ITEM_KEY	228

#define BTRFS_BALANCE_ITEM_KEY	248

/*
 * string items are for debugging.  They just store a short string of
 * data in the FS
@@ -1413,6 +1496,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_AUTO_DEFRAG		(1 << 16)
#define BTRFS_MOUNT_INODE_MAP_CACHE	(1 << 17)
#define BTRFS_MOUNT_RECOVERY		(1 << 18)
#define BTRFS_MOUNT_SKIP_BALANCE	(1 << 19)

#define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
@@ -2077,8 +2161,86 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
		   num_devices, 64);

/* struct btrfs_super_block */
/* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);

static inline void btrfs_balance_data(struct extent_buffer *eb,
				      struct btrfs_balance_item *bi,
				      struct btrfs_disk_balance_args *ba)
{
	read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}

static inline void btrfs_set_balance_data(struct extent_buffer *eb,
					  struct btrfs_balance_item *bi,
					  struct btrfs_disk_balance_args *ba)
{
	write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}

static inline void btrfs_balance_meta(struct extent_buffer *eb,
				      struct btrfs_balance_item *bi,
				      struct btrfs_disk_balance_args *ba)
{
	read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}

static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
					  struct btrfs_balance_item *bi,
					  struct btrfs_disk_balance_args *ba)
{
	write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}

static inline void btrfs_balance_sys(struct extent_buffer *eb,
				     struct btrfs_balance_item *bi,
				     struct btrfs_disk_balance_args *ba)
{
	read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}

static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
					 struct btrfs_balance_item *bi,
					 struct btrfs_disk_balance_args *ba)
{
	write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}

static inline void
btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
			       struct btrfs_disk_balance_args *disk)
{
	memset(cpu, 0, sizeof(*cpu));

	cpu->profiles = le64_to_cpu(disk->profiles);
	cpu->usage = le64_to_cpu(disk->usage);
	cpu->devid = le64_to_cpu(disk->devid);
	cpu->pstart = le64_to_cpu(disk->pstart);
	cpu->pend = le64_to_cpu(disk->pend);
	cpu->vstart = le64_to_cpu(disk->vstart);
	cpu->vend = le64_to_cpu(disk->vend);
	cpu->target = le64_to_cpu(disk->target);
	cpu->flags = le64_to_cpu(disk->flags);
}

static inline void
btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
			       struct btrfs_balance_args *cpu)
{
	memset(disk, 0, sizeof(*disk));

	disk->profiles = cpu_to_le64(cpu->profiles);
	disk->usage = cpu_to_le64(cpu->usage);
	disk->devid = cpu_to_le64(cpu->devid);
	disk->pstart = cpu_to_le64(cpu->pstart);
	disk->pend = cpu_to_le64(cpu->pend);
	disk->vstart = cpu_to_le64(cpu->vstart);
	disk->vend = cpu_to_le64(cpu->vend);
	disk->target = cpu_to_le64(cpu->target);
	disk->flags = cpu_to_le64(cpu->flags);
}

/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
@@ -2500,6 +2662,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
}
static inline void free_fs_info(struct btrfs_fs_info *fs_info)
{
	kfree(fs_info->balance_ctl);
	kfree(fs_info->delayed_root);
	kfree(fs_info->extent_root);
	kfree(fs_info->tree_root);
@@ -2510,6 +2673,24 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
	kfree(fs_info->super_for_commit);
	kfree(fs_info);
}
/**
 * profile_is_valid - tests whether a given profile is valid and reduced
 * @flags: profile to validate
 * @extended: if true @flags is treated as an extended profile
 */
static inline int profile_is_valid(u64 flags, int extended)
{
	u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK;

	flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
	if (extended)
		mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;

	if (flags & mask)
		return 0;
	/* true if zero or exactly one bit set */
	return (flags & (~flags + 1)) == flags;
}

/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
+15 −3
Original line number Diff line number Diff line
@@ -2002,6 +2002,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	init_rwsem(&fs_info->scrub_super_lock);
	fs_info->scrub_workers_refcnt = 0;

	spin_lock_init(&fs_info->balance_lock);
	mutex_init(&fs_info->balance_mutex);
	atomic_set(&fs_info->balance_running, 0);
	atomic_set(&fs_info->balance_pause_req, 0);
	atomic_set(&fs_info->balance_cancel_req, 0);
	fs_info->balance_ctl = NULL;
	init_waitqueue_head(&fs_info->balance_wait_q);

	sb->s_blocksize = 4096;
	sb->s_blocksize_bits = blksize_bits(4096);
	sb->s_bdi = &fs_info->bdi;
@@ -2321,9 +2329,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,

	fs_info->generation = generation;
	fs_info->last_trans_committed = generation;
	fs_info->data_alloc_profile = (u64)-1;
	fs_info->metadata_alloc_profile = (u64)-1;
	fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;

	ret = btrfs_init_space_info(fs_info);
	if (ret) {
@@ -2426,6 +2431,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
		if (!err)
			err = btrfs_orphan_cleanup(fs_info->tree_root);
		up_read(&fs_info->cleanup_work_sem);

		if (!err)
			err = btrfs_recover_balance(fs_info->tree_root);

		if (err) {
			close_ctree(tree_root);
			return ERR_PTR(err);
@@ -2975,6 +2984,9 @@ int close_ctree(struct btrfs_root *root)
	fs_info->closing = 1;
	smp_mb();

	/* pause restriper - we want to resume on mount */
	btrfs_pause_balance(root->fs_info);

	btrfs_scrub_cancel(root);

	/* wait for any defraggers to finish */
+103 −25
Original line number Diff line number Diff line
@@ -618,8 +618,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
	struct list_head *head = &info->space_info;
	struct btrfs_space_info *found;

	flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
		 BTRFS_BLOCK_GROUP_METADATA;
	flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;

	rcu_read_lock();
	list_for_each_entry_rcu(found, head, list) {
@@ -2999,9 +2998,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
		INIT_LIST_HEAD(&found->block_groups[i]);
	init_rwsem(&found->groups_sem);
	spin_lock_init(&found->lock);
	found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
				BTRFS_BLOCK_GROUP_SYSTEM |
				BTRFS_BLOCK_GROUP_METADATA);
	found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
	found->total_bytes = total_bytes;
	found->disk_total = total_bytes * factor;
	found->bytes_used = bytes_used;
@@ -3022,11 +3019,12 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,

static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
	u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
				   BTRFS_BLOCK_GROUP_RAID1 |
				   BTRFS_BLOCK_GROUP_RAID10 |
				   BTRFS_BLOCK_GROUP_DUP);
	if (extra_flags) {
	u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;

	/* chunk -> extended profile */
	if (extra_flags == 0)
		extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;

	if (flags & BTRFS_BLOCK_GROUP_DATA)
		fs_info->avail_data_alloc_bits |= extra_flags;
	if (flags & BTRFS_BLOCK_GROUP_METADATA)
@@ -3034,8 +3032,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
		fs_info->avail_system_alloc_bits |= extra_flags;
}
}

/*
 * @flags: available profiles in extended format (see ctree.h)
 *
 * Returns reduced profile in chunk format.  If profile changing is in
 * progress (either running or paused) picks the target profile (if it's
 * already available), otherwise falls back to plain reducing.
 */
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
	/*
@@ -3046,6 +3050,34 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
	u64 num_devices = root->fs_info->fs_devices->rw_devices +
		root->fs_info->fs_devices->missing_devices;

	/* pick restriper's target profile if it's available */
	spin_lock(&root->fs_info->balance_lock);
	if (root->fs_info->balance_ctl) {
		struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
		u64 tgt = 0;

		if ((flags & BTRFS_BLOCK_GROUP_DATA) &&
		    (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
		    (flags & bctl->data.target)) {
			tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
		} else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
			   (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
			   (flags & bctl->sys.target)) {
			tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
		} else if ((flags & BTRFS_BLOCK_GROUP_METADATA) &&
			   (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
			   (flags & bctl->meta.target)) {
			tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
		}

		if (tgt) {
			spin_unlock(&root->fs_info->balance_lock);
			flags = tgt;
			goto out;
		}
	}
	spin_unlock(&root->fs_info->balance_lock);

	if (num_devices == 1)
		flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
	if (num_devices < 4)
@@ -3065,22 +3097,25 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
	if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
	    ((flags & BTRFS_BLOCK_GROUP_RAID1) |
	     (flags & BTRFS_BLOCK_GROUP_RAID10) |
	     (flags & BTRFS_BLOCK_GROUP_DUP)))
	     (flags & BTRFS_BLOCK_GROUP_DUP))) {
		flags &= ~BTRFS_BLOCK_GROUP_RAID0;
	}

out:
	/* extended -> chunk profile */
	flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
	return flags;
}

static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
{
	if (flags & BTRFS_BLOCK_GROUP_DATA)
		flags |= root->fs_info->avail_data_alloc_bits &
			 root->fs_info->data_alloc_profile;
		flags |= root->fs_info->avail_data_alloc_bits;
	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
		flags |= root->fs_info->avail_system_alloc_bits &
			 root->fs_info->system_alloc_profile;
		flags |= root->fs_info->avail_system_alloc_bits;
	else if (flags & BTRFS_BLOCK_GROUP_METADATA)
		flags |= root->fs_info->avail_metadata_alloc_bits &
			 root->fs_info->metadata_alloc_profile;
		flags |= root->fs_info->avail_metadata_alloc_bits;

	return btrfs_reduce_alloc_profile(root, flags);
}

@@ -3282,7 +3317,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
	int wait_for_alloc = 0;
	int ret = 0;

	flags = btrfs_reduce_alloc_profile(extent_root, flags);
	BUG_ON(!profile_is_valid(flags, 0));

	space_info = __find_space_info(extent_root->fs_info, flags);
	if (!space_info) {
@@ -6792,6 +6827,29 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
	u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
		BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;

	if (root->fs_info->balance_ctl) {
		struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
		u64 tgt = 0;

		/* pick restriper's target profile and return */
		if (flags & BTRFS_BLOCK_GROUP_DATA &&
		    bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
			tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
		} else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
			   bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
			tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
		} else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
			   bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
			tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
		}

		if (tgt) {
			/* extended -> chunk profile */
			tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
			return tgt;
		}
	}

	/*
	 * we add in the count of missing devices because we want
	 * to make sure that any RAID levels on a degraded FS
@@ -7466,6 +7524,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
	return 0;
}

static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
	u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;

	/* chunk -> extended profile */
	if (extra_flags == 0)
		extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;

	if (flags & BTRFS_BLOCK_GROUP_DATA)
		fs_info->avail_data_alloc_bits &= ~extra_flags;
	if (flags & BTRFS_BLOCK_GROUP_METADATA)
		fs_info->avail_metadata_alloc_bits &= ~extra_flags;
	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
		fs_info->avail_system_alloc_bits &= ~extra_flags;
}

int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root, u64 group_start)
{
@@ -7476,6 +7550,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
	struct btrfs_key key;
	struct inode *inode;
	int ret;
	int index;
	int factor;

	root = root->fs_info->extent_root;
@@ -7491,6 +7566,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
	free_excluded_extents(root, block_group);

	memcpy(&key, &block_group->key, sizeof(key));
	index = get_block_group_index(block_group);
	if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
				  BTRFS_BLOCK_GROUP_RAID1 |
				  BTRFS_BLOCK_GROUP_RAID10))
@@ -7565,6 +7641,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
	 * are still on the list after taking the semaphore
	 */
	list_del_init(&block_group->list);
	if (list_empty(&block_group->space_info->block_groups[index]))
		clear_avail_alloc_bits(root->fs_info, block_group->flags);
	up_write(&block_group->space_info->groups_sem);

	if (block_group->cached == BTRFS_CACHE_STARTED)
+210 −16
Original line number Diff line number Diff line
@@ -1203,13 +1203,21 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	mutex_lock(&root->fs_info->volume_mutex);
	if (root->fs_info->balance_ctl) {
		printk(KERN_INFO "btrfs: balance in progress\n");
		ret = -EINVAL;
		goto out;
	}

	vol_args = memdup_user(arg, sizeof(*vol_args));
	if (IS_ERR(vol_args))
		return PTR_ERR(vol_args);
	if (IS_ERR(vol_args)) {
		ret = PTR_ERR(vol_args);
		goto out;
	}

	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';

	mutex_lock(&root->fs_info->volume_mutex);
	sizestr = vol_args->name;
	devstr = strchr(sizestr, ':');
	if (devstr) {
@@ -1226,7 +1234,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
		printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
		       (unsigned long long)devid);
		ret = -EINVAL;
		goto out_unlock;
		goto out_free;
	}
	if (!strcmp(sizestr, "max"))
		new_size = device->bdev->bd_inode->i_size;
@@ -1241,7 +1249,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
		new_size = memparse(sizestr, NULL);
		if (new_size == 0) {
			ret = -EINVAL;
			goto out_unlock;
			goto out_free;
		}
	}

@@ -1250,7 +1258,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
	if (mod < 0) {
		if (new_size > old_size) {
			ret = -EINVAL;
			goto out_unlock;
			goto out_free;
		}
		new_size = old_size - new_size;
	} else if (mod > 0) {
@@ -1259,11 +1267,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,

	if (new_size < 256 * 1024 * 1024) {
		ret = -EINVAL;
		goto out_unlock;
		goto out_free;
	}
	if (new_size > device->bdev->bd_inode->i_size) {
		ret = -EFBIG;
		goto out_unlock;
		goto out_free;
	}

	do_div(new_size, root->sectorsize);
@@ -1276,7 +1284,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
		trans = btrfs_start_transaction(root, 0);
		if (IS_ERR(trans)) {
			ret = PTR_ERR(trans);
			goto out_unlock;
			goto out_free;
		}
		ret = btrfs_grow_device(trans, device, new_size);
		btrfs_commit_transaction(trans, root);
@@ -1284,9 +1292,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
		ret = btrfs_shrink_device(device, new_size);
	}

out_unlock:
	mutex_unlock(&root->fs_info->volume_mutex);
out_free:
	kfree(vol_args);
out:
	mutex_unlock(&root->fs_info->volume_mutex);
	return ret;
}

@@ -2052,14 +2061,25 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	mutex_lock(&root->fs_info->volume_mutex);
	if (root->fs_info->balance_ctl) {
		printk(KERN_INFO "btrfs: balance in progress\n");
		ret = -EINVAL;
		goto out;
	}

	vol_args = memdup_user(arg, sizeof(*vol_args));
	if (IS_ERR(vol_args))
		return PTR_ERR(vol_args);
	if (IS_ERR(vol_args)) {
		ret = PTR_ERR(vol_args);
		goto out;
	}

	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
	ret = btrfs_init_new_device(root, vol_args->name);

	kfree(vol_args);
out:
	mutex_unlock(&root->fs_info->volume_mutex);
	return ret;
}

@@ -2074,14 +2094,25 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
	if (root->fs_info->sb->s_flags & MS_RDONLY)
		return -EROFS;

	mutex_lock(&root->fs_info->volume_mutex);
	if (root->fs_info->balance_ctl) {
		printk(KERN_INFO "btrfs: balance in progress\n");
		ret = -EINVAL;
		goto out;
	}

	vol_args = memdup_user(arg, sizeof(*vol_args));
	if (IS_ERR(vol_args))
		return PTR_ERR(vol_args);
	if (IS_ERR(vol_args)) {
		ret = PTR_ERR(vol_args);
		goto out;
	}

	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
	ret = btrfs_rm_device(root, vol_args->name);

	kfree(vol_args);
out:
	mutex_unlock(&root->fs_info->volume_mutex);
	return ret;
}

@@ -3034,6 +3065,163 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
	return ret;
}

void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
			       struct btrfs_ioctl_balance_args *bargs)
{
	struct btrfs_balance_control *bctl = fs_info->balance_ctl;

	bargs->flags = bctl->flags;

	if (atomic_read(&fs_info->balance_running))
		bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
	if (atomic_read(&fs_info->balance_pause_req))
		bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
	if (atomic_read(&fs_info->balance_cancel_req))
		bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;

	memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
	memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
	memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));

	if (lock) {
		spin_lock(&fs_info->balance_lock);
		memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
		spin_unlock(&fs_info->balance_lock);
	} else {
		memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
	}
}

static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
{
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct btrfs_ioctl_balance_args *bargs;
	struct btrfs_balance_control *bctl;
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	if (fs_info->sb->s_flags & MS_RDONLY)
		return -EROFS;

	mutex_lock(&fs_info->volume_mutex);
	mutex_lock(&fs_info->balance_mutex);

	if (arg) {
		bargs = memdup_user(arg, sizeof(*bargs));
		if (IS_ERR(bargs)) {
			ret = PTR_ERR(bargs);
			goto out;
		}

		if (bargs->flags & BTRFS_BALANCE_RESUME) {
			if (!fs_info->balance_ctl) {
				ret = -ENOTCONN;
				goto out_bargs;
			}

			bctl = fs_info->balance_ctl;
			spin_lock(&fs_info->balance_lock);
			bctl->flags |= BTRFS_BALANCE_RESUME;
			spin_unlock(&fs_info->balance_lock);

			goto do_balance;
		}
	} else {
		bargs = NULL;
	}

	if (fs_info->balance_ctl) {
		ret = -EINPROGRESS;
		goto out_bargs;
	}

	bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
	if (!bctl) {
		ret = -ENOMEM;
		goto out_bargs;
	}

	bctl->fs_info = fs_info;
	if (arg) {
		memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
		memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
		memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));

		bctl->flags = bargs->flags;
	} else {
		/* balance everything - no filters */
		bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
	}

do_balance:
	ret = btrfs_balance(bctl, bargs);
	/*
	 * bctl is freed in __cancel_balance or in free_fs_info if
	 * restriper was paused all the way until unmount
	 */
	if (arg) {
		if (copy_to_user(arg, bargs, sizeof(*bargs)))
			ret = -EFAULT;
	}

out_bargs:
	kfree(bargs);
out:
	mutex_unlock(&fs_info->balance_mutex);
	mutex_unlock(&fs_info->volume_mutex);
	return ret;
}

static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
{
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	switch (cmd) {
	case BTRFS_BALANCE_CTL_PAUSE:
		return btrfs_pause_balance(root->fs_info);
	case BTRFS_BALANCE_CTL_CANCEL:
		return btrfs_cancel_balance(root->fs_info);
	}

	return -EINVAL;
}

static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
					 void __user *arg)
{
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct btrfs_ioctl_balance_args *bargs;
	int ret = 0;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	mutex_lock(&fs_info->balance_mutex);
	if (!fs_info->balance_ctl) {
		ret = -ENOTCONN;
		goto out;
	}

	bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
	if (!bargs) {
		ret = -ENOMEM;
		goto out;
	}

	update_ioctl_balance_args(fs_info, 1, bargs);

	if (copy_to_user(arg, bargs, sizeof(*bargs)))
		ret = -EFAULT;

	kfree(bargs);
out:
	mutex_unlock(&fs_info->balance_mutex);
	return ret;
}

long btrfs_ioctl(struct file *file, unsigned int
		cmd, unsigned long arg)
{
@@ -3078,7 +3266,7 @@ long btrfs_ioctl(struct file *file, unsigned int
	case BTRFS_IOC_DEV_INFO:
		return btrfs_ioctl_dev_info(root, argp);
	case BTRFS_IOC_BALANCE:
		return btrfs_balance(root->fs_info->dev_root);
		return btrfs_ioctl_balance(root, NULL);
	case BTRFS_IOC_CLONE:
		return btrfs_ioctl_clone(file, arg, 0, 0, 0);
	case BTRFS_IOC_CLONE_RANGE:
@@ -3110,6 +3298,12 @@ long btrfs_ioctl(struct file *file, unsigned int
		return btrfs_ioctl_scrub_cancel(root, argp);
	case BTRFS_IOC_SCRUB_PROGRESS:
		return btrfs_ioctl_scrub_progress(root, argp);
	case BTRFS_IOC_BALANCE_V2:
		return btrfs_ioctl_balance(root, argp);
	case BTRFS_IOC_BALANCE_CTL:
		return btrfs_ioctl_balance_ctl(root, arg);
	case BTRFS_IOC_BALANCE_PROGRESS:
		return btrfs_ioctl_balance_progress(root, argp);
	}

	return -ENOTTY;
+54 −0
Original line number Diff line number Diff line
@@ -109,6 +109,55 @@ struct btrfs_ioctl_fs_info_args {
	__u64 reserved[124];			/* pad to 1k */
};

/* balance control ioctl modes */
#define BTRFS_BALANCE_CTL_PAUSE		1
#define BTRFS_BALANCE_CTL_CANCEL	2

/*
 * this is packed, because it should be exactly the same as its disk
 * byte order counterpart (struct btrfs_disk_balance_args)
 */
struct btrfs_balance_args {
	__u64 profiles;
	__u64 usage;
	__u64 devid;
	__u64 pstart;
	__u64 pend;
	__u64 vstart;
	__u64 vend;

	__u64 target;

	__u64 flags;

	__u64 unused[8];
} __attribute__ ((__packed__));

/* report balance progress to userspace */
struct btrfs_balance_progress {
	__u64 expected;		/* estimated # of chunks that will be
				 * relocated to fulfill the request */
	__u64 considered;	/* # of chunks we have considered so far */
	__u64 completed;	/* # of chunks relocated so far */
};

#define BTRFS_BALANCE_STATE_RUNNING	(1ULL << 0)
#define BTRFS_BALANCE_STATE_PAUSE_REQ	(1ULL << 1)
#define BTRFS_BALANCE_STATE_CANCEL_REQ	(1ULL << 2)

struct btrfs_ioctl_balance_args {
	__u64 flags;				/* in/out */
	__u64 state;				/* out */

	struct btrfs_balance_args data;		/* in/out */
	struct btrfs_balance_args meta;		/* in/out */
	struct btrfs_balance_args sys;		/* in/out */

	struct btrfs_balance_progress stat;	/* out */

	__u64 unused[72];			/* pad to 1k */
};

#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct btrfs_ioctl_ino_lookup_args {
	__u64 treeid;
@@ -272,6 +321,11 @@ struct btrfs_ioctl_logical_ino_args {
				 struct btrfs_ioctl_dev_info_args)
#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
			       struct btrfs_ioctl_fs_info_args)
#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
				   struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
					struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
					struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
Loading