Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit aa2dfb37 authored by Chris Mason's avatar Chris Mason
Browse files

Merge branch 'allocator' of...

parents 945d8962 73c5de00
Loading
Loading
Loading
Loading
+26 −0
Original line number Diff line number Diff line
@@ -914,6 +914,32 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
	return 0;
}

/* Used to sort the devices by max_avail(descending sort) */
static int btrfs_cmp_device_free_bytes(const void *dev_info1,
				       const void *dev_info2)
{
	if (((struct btrfs_device_info *)dev_info1)->max_avail >
	    ((struct btrfs_device_info *)dev_info2)->max_avail)
		return -1;
	else if (((struct btrfs_device_info *)dev_info1)->max_avail <
		 ((struct btrfs_device_info *)dev_info2)->max_avail)
		return 1;
	else
	return 0;
}

/*
 * sort the devices by max_avail, in which max free extent size of each device
 * is stored.(Descending Sort)
 */
static inline void btrfs_descending_sort_devices(
					struct btrfs_device_info *devices,
					size_t nr_devices)
{
	sort(devices, nr_devices, sizeof(struct btrfs_device_info),
	     btrfs_cmp_device_free_bytes, NULL);
}

/*
 * The helper to calc the free space on the devices that can be used to store
 * file data.
+174 −319
Original line number Diff line number Diff line
@@ -805,10 +805,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
	/* we don't want to overwrite the superblock on the drive,
	 * so we make sure to start at an offset of at least 1MB
	 */
	search_start = 1024 * 1024;

	if (root->fs_info->alloc_start + num_bytes <= search_end)
		search_start = max(root->fs_info->alloc_start, search_start);
	search_start = max(root->fs_info->alloc_start, 1024ull * 1024);

	max_hole_start = search_start;
	max_hole_size = 0;
@@ -2227,275 +2224,204 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
	return 0;
}

static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size,
					int num_stripes, int sub_stripes)
/*
 * sort the devices in descending order by max_avail, total_avail
 */
static int btrfs_cmp_device_info(const void *a, const void *b)
{
	if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
		return calc_size;
	else if (type & BTRFS_BLOCK_GROUP_RAID10)
		return calc_size * (num_stripes / sub_stripes);
	else
		return calc_size * num_stripes;
}
	const struct btrfs_device_info *di_a = a;
	const struct btrfs_device_info *di_b = b;

/* Used to sort the devices by max_avail(descending sort) */
int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
{
	if (((struct btrfs_device_info *)dev_info1)->max_avail >
	    ((struct btrfs_device_info *)dev_info2)->max_avail)
	if (di_a->max_avail > di_b->max_avail)
		return -1;
	else if (((struct btrfs_device_info *)dev_info1)->max_avail <
		 ((struct btrfs_device_info *)dev_info2)->max_avail)
	if (di_a->max_avail < di_b->max_avail)
		return 1;
	if (di_a->total_avail > di_b->total_avail)
		return -1;
	if (di_a->total_avail < di_b->total_avail)
		return 1;
	else
	return 0;
}

static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
				 int *num_stripes, int *min_stripes,
				 int *sub_stripes)
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
			       struct btrfs_root *extent_root,
			       struct map_lookup **map_ret,
			       u64 *num_bytes_out, u64 *stripe_size_out,
			       u64 start, u64 type)
{
	*num_stripes = 1;
	*min_stripes = 1;
	*sub_stripes = 0;
	struct btrfs_fs_info *info = extent_root->fs_info;
	struct btrfs_fs_devices *fs_devices = info->fs_devices;
	struct list_head *cur;
	struct map_lookup *map = NULL;
	struct extent_map_tree *em_tree;
	struct extent_map *em;
	struct btrfs_device_info *devices_info = NULL;
	u64 total_avail;
	int num_stripes;	/* total number of stripes to allocate */
	int sub_stripes;	/* sub_stripes info for map */
	int dev_stripes;	/* stripes per dev */
	int devs_max;		/* max devs to use */
	int devs_min;		/* min devs needed */
	int devs_increment;	/* ndevs has to be a multiple of this */
	int ncopies;		/* how many copies to data has */
	int ret;
	u64 max_stripe_size;
	u64 max_chunk_size;
	u64 stripe_size;
	u64 num_bytes;
	int ndevs;
	int i;
	int j;

	if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
		*num_stripes = fs_devices->rw_devices;
		*min_stripes = 2;
	}
	if (type & (BTRFS_BLOCK_GROUP_DUP)) {
		*num_stripes = 2;
		*min_stripes = 2;
	}
	if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
		if (fs_devices->rw_devices < 2)
			return -ENOSPC;
		*num_stripes = 2;
		*min_stripes = 2;
	}
	if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
		*num_stripes = fs_devices->rw_devices;
		if (*num_stripes < 4)
			return -ENOSPC;
		*num_stripes &= ~(u32)1;
		*sub_stripes = 2;
		*min_stripes = 4;
	if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
	    (type & BTRFS_BLOCK_GROUP_DUP)) {
		WARN_ON(1);
		type &= ~BTRFS_BLOCK_GROUP_DUP;
	}

	return 0;
}
	if (list_empty(&fs_devices->alloc_list))
		return -ENOSPC;

static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices,
				    u64 proposed_size, u64 type,
				    int num_stripes, int small_stripe)
{
	int min_stripe_size = 1 * 1024 * 1024;
	u64 calc_size = proposed_size;
	u64 max_chunk_size = calc_size;
	int ncopies = 1;
	sub_stripes = 1;
	dev_stripes = 1;
	devs_increment = 1;
	ncopies = 1;
	devs_max = 0;	/* 0 == as many as possible */
	devs_min = 1;

	if (type & (BTRFS_BLOCK_GROUP_RAID1 |
		    BTRFS_BLOCK_GROUP_DUP |
		    BTRFS_BLOCK_GROUP_RAID10))
	/*
	 * define the properties of each RAID type.
	 * FIXME: move this to a global table and use it in all RAID
	 * calculation code
	 */
	if (type & (BTRFS_BLOCK_GROUP_DUP)) {
		dev_stripes = 2;
		ncopies = 2;
		devs_max = 1;
	} else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
		devs_min = 2;
	} else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
		devs_increment = 2;
		ncopies = 2;
		devs_max = 2;
		devs_min = 2;
	} else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
		sub_stripes = 2;
		devs_increment = 2;
		ncopies = 2;
		devs_min = 4;
	} else {
		devs_max = 1;
	}

	if (type & BTRFS_BLOCK_GROUP_DATA) {
		max_chunk_size = 10 * calc_size;
		min_stripe_size = 64 * 1024 * 1024;
		max_stripe_size = 1024 * 1024 * 1024;
		max_chunk_size = 10 * max_stripe_size;
	} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
		max_chunk_size = 256 * 1024 * 1024;
		min_stripe_size = 32 * 1024 * 1024;
		max_stripe_size = 256 * 1024 * 1024;
		max_chunk_size = max_stripe_size;
	} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
		calc_size = 8 * 1024 * 1024;
		max_chunk_size = calc_size * 2;
		min_stripe_size = 1 * 1024 * 1024;
		max_stripe_size = 8 * 1024 * 1024;
		max_chunk_size = 2 * max_stripe_size;
	} else {
		printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
		       type);
		BUG_ON(1);
	}

	/* we don't want a chunk larger than 10% of writeable space */
	max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
			     max_chunk_size);

	if (calc_size * num_stripes > max_chunk_size * ncopies) {
		calc_size = max_chunk_size * ncopies;
		do_div(calc_size, num_stripes);
		do_div(calc_size, BTRFS_STRIPE_LEN);
		calc_size *= BTRFS_STRIPE_LEN;
	}
	devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
			       GFP_NOFS);
	if (!devices_info)
		return -ENOMEM;

	/* we don't want tiny stripes */
	if (!small_stripe)
		calc_size = max_t(u64, min_stripe_size, calc_size);
	cur = fs_devices->alloc_list.next;

	/*
	 * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure
	 * we end up with something bigger than a stripe
	 * in the first pass through the devices list, we gather information
	 * about the available holes on each device.
	 */
	calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN);

	do_div(calc_size, BTRFS_STRIPE_LEN);
	calc_size *= BTRFS_STRIPE_LEN;

	return calc_size;
}

static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
						      int num_stripes)
{
	struct map_lookup *new;
	size_t len = map_lookup_size(num_stripes);
	ndevs = 0;
	while (cur != &fs_devices->alloc_list) {
		struct btrfs_device *device;
		u64 max_avail;
		u64 dev_offset;

	BUG_ON(map->num_stripes < num_stripes);
		device = list_entry(cur, struct btrfs_device, dev_alloc_list);

	if (map->num_stripes == num_stripes)
		return map;
		cur = cur->next;

	new = kmalloc(len, GFP_NOFS);
	if (!new) {
		/* just change map->num_stripes */
		map->num_stripes = num_stripes;
		return map;
		if (!device->writeable) {
			printk(KERN_ERR
			       "btrfs: read-only device in alloc_list\n");
			WARN_ON(1);
			continue;
		}

	memcpy(new, map, len);
	new->num_stripes = num_stripes;
	kfree(map);
	return new;
}
		if (!device->in_fs_metadata)
			continue;

/*
 * helper to allocate device space from btrfs_device_info, in which we stored
 * max free space information of every device. It is used when we can not
 * allocate chunks by default size.
 *
 * By this helper, we can allocate a new chunk as larger as possible.
		if (device->total_bytes > device->bytes_used)
			total_avail = device->total_bytes - device->bytes_used;
		else
			total_avail = 0;
		/* avail is off by max(alloc_start, 1MB), but that is the same
		 * for all devices, so it doesn't hurt the sorting later on
		 */
static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
				    struct btrfs_fs_devices *fs_devices,
				    struct btrfs_device_info *devices,
				    int nr_device, u64 type,
				    struct map_lookup **map_lookup,
				    int min_stripes, u64 *stripe_size)
{
	int i, index, sort_again = 0;
	int min_devices = min_stripes;
	u64 max_avail, min_free;
	struct map_lookup *map = *map_lookup;
	int ret;

	if (nr_device < min_stripes)
		return -ENOSPC;
		ret = find_free_dev_extent(trans, device,
					   max_stripe_size * dev_stripes,
					   &dev_offset, &max_avail);
		if (ret && ret != -ENOSPC)
			goto error;

	btrfs_descending_sort_devices(devices, nr_device);
		if (ret == 0)
			max_avail = max_stripe_size * dev_stripes;

	max_avail = devices[0].max_avail;
	if (!max_avail)
		return -ENOSPC;
		if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
			continue;

	for (i = 0; i < nr_device; i++) {
		/*
		 * if dev_offset = 0, it means the free space of this device
		 * is less than what we need, and we didn't search max avail
		 * extent on this device, so do it now.
		 */
		if (!devices[i].dev_offset) {
			ret = find_free_dev_extent(trans, devices[i].dev,
						   max_avail,
						   &devices[i].dev_offset,
						   &devices[i].max_avail);
			if (ret != 0 && ret != -ENOSPC)
				return ret;
			sort_again = 1;
		}
		devices_info[ndevs].dev_offset = dev_offset;
		devices_info[ndevs].max_avail = max_avail;
		devices_info[ndevs].total_avail = total_avail;
		devices_info[ndevs].dev = device;
		++ndevs;
	}

	/* we update the max avail free extent of each devices, sort again */
	if (sort_again)
		btrfs_descending_sort_devices(devices, nr_device);

	if (type & BTRFS_BLOCK_GROUP_DUP)
		min_devices = 1;

	if (!devices[min_devices - 1].max_avail)
		return -ENOSPC;

	max_avail = devices[min_devices - 1].max_avail;
	if (type & BTRFS_BLOCK_GROUP_DUP)
		do_div(max_avail, 2);

	max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type,
					     min_stripes, 1);
	if (type & BTRFS_BLOCK_GROUP_DUP)
		min_free = max_avail * 2;
	else
		min_free = max_avail;

	if (min_free > devices[min_devices - 1].max_avail)
		return -ENOSPC;
	/*
	 * now sort the devices by hole size / available space
	 */
	sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
	     btrfs_cmp_device_info, NULL);

	map = __shrink_map_lookup_stripes(map, min_stripes);
	*stripe_size = max_avail;
	/* round down to number of usable stripes */
	ndevs -= ndevs % devs_increment;

	index = 0;
	for (i = 0; i < min_stripes; i++) {
		map->stripes[i].dev = devices[index].dev;
		map->stripes[i].physical = devices[index].dev_offset;
		if (type & BTRFS_BLOCK_GROUP_DUP) {
			i++;
			map->stripes[i].dev = devices[index].dev;
			map->stripes[i].physical = devices[index].dev_offset +
						   max_avail;
		}
		index++;
	}
	*map_lookup = map;

	return 0;
	if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
		ret = -ENOSPC;
		goto error;
	}

static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
			       struct btrfs_root *extent_root,
			       struct map_lookup **map_ret,
			       u64 *num_bytes, u64 *stripe_size,
			       u64 start, u64 type)
{
	struct btrfs_fs_info *info = extent_root->fs_info;
	struct btrfs_device *device = NULL;
	struct btrfs_fs_devices *fs_devices = info->fs_devices;
	struct list_head *cur;
	struct map_lookup *map;
	struct extent_map_tree *em_tree;
	struct extent_map *em;
	struct btrfs_device_info *devices_info;
	struct list_head private_devs;
	u64 calc_size = 1024 * 1024 * 1024;
	u64 min_free;
	u64 avail;
	u64 dev_offset;
	int num_stripes;
	int min_stripes;
	int sub_stripes;
	int min_devices;	/* the min number of devices we need */
	int i;
	int ret;
	int index;
	if (devs_max && ndevs > devs_max)
		ndevs = devs_max;
	/*
	 * the primary goal is to maximize the number of stripes, so use as many
	 * devices as possible, even if the stripes are not maximum sized.
	 */
	stripe_size = devices_info[ndevs-1].max_avail;
	num_stripes = ndevs * dev_stripes;

	if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
	    (type & BTRFS_BLOCK_GROUP_DUP)) {
		WARN_ON(1);
		type &= ~BTRFS_BLOCK_GROUP_DUP;
	if (stripe_size * num_stripes > max_chunk_size * ncopies) {
		stripe_size = max_chunk_size * ncopies;
		do_div(stripe_size, num_stripes);
	}
	if (list_empty(&fs_devices->alloc_list))
		return -ENOSPC;

	ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
				    &min_stripes, &sub_stripes);
	if (ret)
		return ret;

	devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
			       GFP_NOFS);
	if (!devices_info)
		return -ENOMEM;
	do_div(stripe_size, dev_stripes);
	do_div(stripe_size, BTRFS_STRIPE_LEN);
	stripe_size *= BTRFS_STRIPE_LEN;

	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
	if (!map) {
@@ -2504,85 +2430,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
	}
	map->num_stripes = num_stripes;

	cur = fs_devices->alloc_list.next;
	index = 0;
	i = 0;

	calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type,
					     num_stripes, 0);

	if (type & BTRFS_BLOCK_GROUP_DUP) {
		min_free = calc_size * 2;
		min_devices = 1;
	} else {
		min_free = calc_size;
		min_devices = min_stripes;
	}

	INIT_LIST_HEAD(&private_devs);
	while (index < num_stripes) {
		device = list_entry(cur, struct btrfs_device, dev_alloc_list);
		BUG_ON(!device->writeable);
		if (device->total_bytes > device->bytes_used)
			avail = device->total_bytes - device->bytes_used;
		else
			avail = 0;
		cur = cur->next;

		if (device->in_fs_metadata && avail >= min_free) {
			ret = find_free_dev_extent(trans, device, min_free,
						   &devices_info[i].dev_offset,
						   &devices_info[i].max_avail);
			if (ret == 0) {
				list_move_tail(&device->dev_alloc_list,
					       &private_devs);
				map->stripes[index].dev = device;
				map->stripes[index].physical =
						devices_info[i].dev_offset;
				index++;
				if (type & BTRFS_BLOCK_GROUP_DUP) {
					map->stripes[index].dev = device;
					map->stripes[index].physical =
						devices_info[i].dev_offset +
						calc_size;
					index++;
				}
			} else if (ret != -ENOSPC)
				goto error;

			devices_info[i].dev = device;
			i++;
		} else if (device->in_fs_metadata &&
			   avail >= BTRFS_STRIPE_LEN) {
			devices_info[i].dev = device;
			devices_info[i].max_avail = avail;
			i++;
		}

		if (cur == &fs_devices->alloc_list)
			break;
	}

	list_splice(&private_devs, &fs_devices->alloc_list);
	if (index < num_stripes) {
		if (index >= min_stripes) {
			num_stripes = index;
			if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
				num_stripes /= sub_stripes;
				num_stripes *= sub_stripes;
			}

			map = __shrink_map_lookup_stripes(map, num_stripes);
		} else if (i >= min_devices) {
			ret = __btrfs_alloc_tiny_space(trans, fs_devices,
						       devices_info, i, type,
						       &map, min_stripes,
						       &calc_size);
			if (ret)
				goto error;
		} else {
			ret = -ENOSPC;
			goto error;
	for (i = 0; i < ndevs; ++i) {
		for (j = 0; j < dev_stripes; ++j) {
			int s = i * dev_stripes + j;
			map->stripes[s].dev = devices_info[i].dev;
			map->stripes[s].physical = devices_info[i].dev_offset +
						   j * stripe_size;
		}
	}
	map->sector_size = extent_root->sectorsize;
@@ -2593,11 +2446,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
	map->sub_stripes = sub_stripes;

	*map_ret = map;
	*stripe_size = calc_size;
	*num_bytes = chunk_bytes_by_type(type, calc_size,
					 map->num_stripes, sub_stripes);
	num_bytes = stripe_size * (num_stripes / ncopies);

	*stripe_size_out = stripe_size;
	*num_bytes_out = num_bytes;

	trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes);
	trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);

	em = alloc_extent_map();
	if (!em) {
@@ -2606,7 +2460,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
	}
	em->bdev = (struct block_device *)map;
	em->start = start;
	em->len = *num_bytes;
	em->len = num_bytes;
	em->block_start = 0;
	em->block_len = em->len;

@@ -2619,20 +2473,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,

	ret = btrfs_make_block_group(trans, extent_root, 0, type,
				     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
				     start, *num_bytes);
				     start, num_bytes);
	BUG_ON(ret);

	index = 0;
	while (index < map->num_stripes) {
		device = map->stripes[index].dev;
		dev_offset = map->stripes[index].physical;
	for (i = 0; i < map->num_stripes; ++i) {
		struct btrfs_device *device;
		u64 dev_offset;

		device = map->stripes[i].dev;
		dev_offset = map->stripes[i].physical;

		ret = btrfs_alloc_dev_extent(trans, device,
				info->chunk_root->root_key.objectid,
				BTRFS_FIRST_CHUNK_TREE_OBJECTID,
				start, dev_offset, calc_size);
				start, dev_offset, stripe_size);
		BUG_ON(ret);
		index++;
	}

	kfree(devices_info);
+1 −15
Original line number Diff line number Diff line
@@ -144,6 +144,7 @@ struct btrfs_device_info {
	struct btrfs_device *dev;
	u64 dev_offset;
	u64 max_avail;
	u64 total_avail;
};

struct map_lookup {
@@ -157,21 +158,6 @@ struct map_lookup {
	struct btrfs_bio_stripe stripes[];
};

/* Used to sort the devices by max_avail(descending sort) */
int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);

/*
 * sort the devices by max_avail, in which max free extent size of each device
 * is stored.(Descending Sort)
 */
static inline void btrfs_descending_sort_devices(
					struct btrfs_device_info *devices,
					size_t nr_devices)
{
	sort(devices, nr_devices, sizeof(struct btrfs_device_info),
	     btrfs_cmp_device_free_bytes, NULL);
}

int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
				   u64 end, u64 *length);