Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 22365979 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from Chris Mason:
 "This has Jeff Mahoney's long standing trim patch that fixes corners
  where trims were missing.  Omar has some raid5/6 fixes, especially for
  using scrub and device replace when devices are missing.

  Zhao Lie continues cleaning and fixing things, this series fixes some
  really hard to hit corners in xfstests.  I had to pull it last merge
  window due to some deadlocks, but those are now resolved.

  I added support for Tejun's new blkio controllers.  It seems to work
  well for single devices, we'll expand to multi-device as well"

* 'for-linus-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (47 commits)
  btrfs: fix compile when block cgroups are not enabled
  Btrfs: fix file read corruption after extent cloning and fsync
  Btrfs: check if previous transaction aborted to avoid fs corruption
  btrfs: use __GFP_NOFAIL in alloc_btrfs_bio
  btrfs: Prevent from early transaction abort
  btrfs: Remove unused arguments in tree-log.c
  btrfs: Remove useless condition in start_log_trans()
  Btrfs: add support for blkio controllers
  Btrfs: remove unused mutex from struct 'btrfs_fs_info'
  Btrfs: fix parity scrub of RAID 5/6 with missing device
  Btrfs: fix device replace of a missing RAID 5/6 device
  Btrfs: add RAID 5/6 BTRFS_RBIO_REBUILD_MISSING operation
  Btrfs: count devices correctly in readahead during RAID 5/6 replace
  Btrfs: remove misleading handling of missing device scrub
  btrfs: fix clone / extent-same deadlocks
  Btrfs: fix defrag to merge tail file extent
  Btrfs: fix warning in backref walking
  btrfs: Add WARN_ON() for double lock in btrfs_tree_lock()
  btrfs: Remove root argument in extent_data_ref_count()
  btrfs: Fix wrong comment of btrfs_alloc_tree_block()
  ...
parents 6c0f568e 3a9508b0
Loading
Loading
Loading
Loading
+27 −8
Original line number Diff line number Diff line
@@ -206,10 +206,33 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
		return -ENOMEM;

	ref->root_id = root_id;
	if (key)
	if (key) {
		ref->key_for_search = *key;
	else
		/*
		 * We can often find data backrefs with an offset that is too
		 * large (>= LLONG_MAX, maximum allowed file offset) due to
		 * underflows when subtracting a file's offset with the data
		 * offset of its corresponding extent data item. This can
		 * happen for example in the clone ioctl.
		 * So if we detect such case we set the search key's offset to
		 * zero to make sure we will find the matching file extent item
		 * at add_all_parents(), otherwise we will miss it because the
		 * offset taken form the backref is much larger then the offset
		 * of the file extent item. This can make us scan a very large
		 * number of file extent items, but at least it will not make
		 * us miss any.
		 * This is an ugly workaround for a behaviour that should have
		 * never existed, but it does and a fix for the clone ioctl
		 * would touch a lot of places, cause backwards incompatibility
		 * and would not fix the problem for extents cloned with older
		 * kernels.
		 */
		if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY &&
		    ref->key_for_search.offset >= LLONG_MAX)
			ref->key_for_search.offset = 0;
	} else {
		memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
	}

	ref->inode_list = NULL;
	ref->level = level;
@@ -632,7 +655,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
			struct btrfs_delayed_tree_ref *ref;

			ref = btrfs_delayed_node_to_tree_ref(node);
			ret = __add_prelim_ref(prefs, ref->root, NULL,
			ret = __add_prelim_ref(prefs, 0, NULL,
					       ref->level + 1, ref->parent,
					       node->bytenr,
					       node->ref_mod * sgn, GFP_ATOMIC);
@@ -664,11 +687,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
			struct btrfs_delayed_data_ref *ref;

			ref = btrfs_delayed_node_to_data_ref(node);

			key.objectid = ref->objectid;
			key.type = BTRFS_EXTENT_DATA_KEY;
			key.offset = ref->offset;
			ret = __add_prelim_ref(prefs, ref->root, &key, 0,
			ret = __add_prelim_ref(prefs, 0, NULL, 0,
					       ref->parent, node->bytenr,
					       node->ref_mod * sgn, GFP_ATOMIC);
			break;
+3 −1
Original line number Diff line number Diff line
@@ -1159,9 +1159,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,

	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
		ret = btrfs_reloc_cow_block(trans, root, buf, cow);
		if (ret)
		if (ret) {
			btrfs_abort_transaction(trans, root, ret);
			return ret;
		}
	}

	if (buf == root->node) {
		WARN_ON(parent && parent != buf);
+7 −11
Original line number Diff line number Diff line
@@ -1300,7 +1300,7 @@ struct btrfs_block_group_cache {
	/* for raid56, this is a full stripe, without parity */
	unsigned long full_stripe_len;

	unsigned int ro:1;
	unsigned int ro;
	unsigned int iref:1;
	unsigned int has_caching_ctl:1;
	unsigned int removed:1;
@@ -1518,12 +1518,6 @@ struct btrfs_fs_info {
	 */
	struct mutex ordered_operations_mutex;

	/*
	 * Same as ordered_operations_mutex except this is for ordered extents
	 * and not the operations.
	 */
	struct mutex ordered_extent_flush_mutex;

	struct rw_semaphore commit_root_sem;

	struct rw_semaphore cleanup_work_sem;
@@ -3437,6 +3431,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root, u64 group_start,
			     struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
				       struct btrfs_root *root);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
@@ -3495,9 +3491,9 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
void btrfs_block_rsv_release(struct btrfs_root *root,
			     struct btrfs_block_rsv *block_rsv,
			     u64 num_bytes);
int btrfs_set_block_group_ro(struct btrfs_root *root,
int btrfs_inc_block_group_ro(struct btrfs_root *root,
			     struct btrfs_block_group_cache *cache);
void btrfs_set_block_group_rw(struct btrfs_root *root,
void btrfs_dec_block_group_ro(struct btrfs_root *root,
			      struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
@@ -4073,6 +4069,7 @@ __cold
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
		     unsigned int line, int errno, const char *fmt, ...);

const char *btrfs_decode_error(int errno);

__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
@@ -4185,8 +4182,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct extent_buffer *buf,
			  struct extent_buffer *cow);
void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
			      struct btrfs_pending_snapshot *pending,
void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
			      u64 *bytes_to_reserve);
int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
			      struct btrfs_pending_snapshot *pending);
+13 −3
Original line number Diff line number Diff line
@@ -1730,6 +1730,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
	bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
	bdi->congested_fn	= btrfs_congested_fn;
	bdi->congested_data	= info;
	bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
	return 0;
}

@@ -2613,7 +2614,6 @@ int open_ctree(struct super_block *sb,


	mutex_init(&fs_info->ordered_operations_mutex);
	mutex_init(&fs_info->ordered_extent_flush_mutex);
	mutex_init(&fs_info->tree_log_mutex);
	mutex_init(&fs_info->chunk_mutex);
	mutex_init(&fs_info->transaction_kthread_mutex);
@@ -2955,8 +2955,9 @@ int open_ctree(struct super_block *sb,
	if (fs_info->fs_devices->missing_devices >
	     fs_info->num_tolerated_disk_barrier_failures &&
	    !(sb->s_flags & MS_RDONLY)) {
		printk(KERN_WARNING "BTRFS: "
			"too many missing devices, writeable mount is not allowed\n");
		pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n",
			fs_info->fs_devices->missing_devices,
			fs_info->num_tolerated_disk_barrier_failures);
		goto fail_sysfs;
	}

@@ -3763,6 +3764,15 @@ void close_ctree(struct btrfs_root *root)
	cancel_work_sync(&fs_info->async_reclaim_work);

	if (!(fs_info->sb->s_flags & MS_RDONLY)) {
		/*
		 * If the cleaner thread is stopped and there are
		 * block groups queued for removal, the deletion will be
		 * skipped when we quit the cleaner thread.
		 */
		mutex_lock(&root->fs_info->cleaner_mutex);
		btrfs_delete_unused_bgs(root->fs_info);
		mutex_unlock(&root->fs_info->cleaner_mutex);

		ret = btrfs_commit_super(root);
		if (ret)
			btrfs_err(fs_info, "commit super ret %d", ret);
+262 −36
Original line number Diff line number Diff line
@@ -1316,8 +1316,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
	return ret;
}

static noinline u32 extent_data_ref_count(struct btrfs_root *root,
					  struct btrfs_path *path,
static noinline u32 extent_data_ref_count(struct btrfs_path *path,
					  struct btrfs_extent_inline_ref *iref)
{
	struct btrfs_key key;
@@ -1883,10 +1882,77 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
	return ret;
}

static int btrfs_issue_discard(struct block_device *bdev,
				u64 start, u64 len)
#define in_range(b, first, len)        ((b) >= (first) && (b) < (first) + (len))
static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
			       u64 *discarded_bytes)
{
	return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
	int j, ret = 0;
	u64 bytes_left, end;
	u64 aligned_start = ALIGN(start, 1 << 9);

	if (WARN_ON(start != aligned_start)) {
		len -= aligned_start - start;
		len = round_down(len, 1 << 9);
		start = aligned_start;
	}

	*discarded_bytes = 0;

	if (!len)
		return 0;

	end = start + len;
	bytes_left = len;

	/* Skip any superblocks on this device. */
	for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
		u64 sb_start = btrfs_sb_offset(j);
		u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
		u64 size = sb_start - start;

		if (!in_range(sb_start, start, bytes_left) &&
		    !in_range(sb_end, start, bytes_left) &&
		    !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
			continue;

		/*
		 * Superblock spans beginning of range.  Adjust start and
		 * try again.
		 */
		if (sb_start <= start) {
			start += sb_end - start;
			if (start > end) {
				bytes_left = 0;
				break;
			}
			bytes_left = end - start;
			continue;
		}

		if (size) {
			ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
						   GFP_NOFS, 0);
			if (!ret)
				*discarded_bytes += size;
			else if (ret != -EOPNOTSUPP)
				return ret;
		}

		start = sb_end;
		if (start > end) {
			bytes_left = 0;
			break;
		}
		bytes_left = end - start;
	}

	if (bytes_left) {
		ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
					   GFP_NOFS, 0);
		if (!ret)
			*discarded_bytes += bytes_left;
	}
	return ret;
}

int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
@@ -1907,14 +1973,16 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,


		for (i = 0; i < bbio->num_stripes; i++, stripe++) {
			u64 bytes;
			if (!stripe->dev->can_discard)
				continue;

			ret = btrfs_issue_discard(stripe->dev->bdev,
						  stripe->physical,
						  stripe->length);
						  stripe->length,
						  &bytes);
			if (!ret)
				discarded_bytes += stripe->length;
				discarded_bytes += bytes;
			else if (ret != -EOPNOTSUPP)
				break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */

@@ -6062,20 +6130,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
			       struct btrfs_root *root)
{
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct btrfs_block_group_cache *block_group, *tmp;
	struct list_head *deleted_bgs;
	struct extent_io_tree *unpin;
	u64 start;
	u64 end;
	int ret;

	if (trans->aborted)
		return 0;

	if (fs_info->pinned_extents == &fs_info->freed_extents[0])
		unpin = &fs_info->freed_extents[1];
	else
		unpin = &fs_info->freed_extents[0];

	while (1) {
	while (!trans->aborted) {
		mutex_lock(&fs_info->unused_bg_unpin_mutex);
		ret = find_first_extent_bit(unpin, 0, &start, &end,
					    EXTENT_DIRTY, NULL);
@@ -6094,6 +6161,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
		cond_resched();
	}

	/*
	 * Transaction is finished.  We don't need the lock anymore.  We
	 * do need to clean up the block groups in case of a transaction
	 * abort.
	 */
	deleted_bgs = &trans->transaction->deleted_bgs;
	list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
		u64 trimmed = 0;

		ret = -EROFS;
		if (!trans->aborted)
			ret = btrfs_discard_extent(root,
						   block_group->key.objectid,
						   block_group->key.offset,
						   &trimmed);

		list_del_init(&block_group->bg_list);
		btrfs_put_block_group_trimming(block_group);
		btrfs_put_block_group(block_group);

		if (ret) {
			const char *errstr = btrfs_decode_error(ret);
			btrfs_warn(fs_info,
				   "Discard failed while removing blockgroup: errno=%d %s\n",
				   ret, errstr);
		}
	}

	return 0;
}

@@ -6349,7 +6444,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
	} else {
		if (found_extent) {
			BUG_ON(is_data && refs_to_drop !=
			       extent_data_ref_count(root, path, iref));
			       extent_data_ref_count(path, iref));
			if (iref) {
				BUG_ON(path->slots[0] != extent_slot);
			} else {
@@ -7567,9 +7662,6 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,

/*
 * finds a free extent and does all the dirty work required for allocation
 * returns the key for the extent through ins, and a tree buffer for
 * the first block of the extent through buf.
 *
 * returns the tree buffer or an ERR_PTR on error.
 */
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
@@ -8723,14 +8815,13 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
	return flags;
}

static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
{
	struct btrfs_space_info *sinfo = cache->space_info;
	u64 num_bytes;
	u64 min_allocable_bytes;
	int ret = -ENOSPC;


	/*
	 * We need some metadata space and system metadata space for
	 * allocating chunks in some corner cases until we force to set
@@ -8747,6 +8838,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
	spin_lock(&cache->lock);

	if (cache->ro) {
		cache->ro++;
		ret = 0;
		goto out;
	}
@@ -8758,7 +8850,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
	    sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
	    min_allocable_bytes <= sinfo->total_bytes) {
		sinfo->bytes_readonly += num_bytes;
		cache->ro = 1;
		cache->ro++;
		list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
		ret = 0;
	}
@@ -8768,7 +8860,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
	return ret;
}

int btrfs_set_block_group_ro(struct btrfs_root *root,
int btrfs_inc_block_group_ro(struct btrfs_root *root,
			     struct btrfs_block_group_cache *cache)

{
@@ -8776,8 +8868,6 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
	u64 alloc_flags;
	int ret;

	BUG_ON(cache->ro);

again:
	trans = btrfs_join_transaction(root);
	if (IS_ERR(trans))
@@ -8820,7 +8910,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
			goto out;
	}

	ret = set_block_group_ro(cache, 0);
	ret = inc_block_group_ro(cache, 0);
	if (!ret)
		goto out;
	alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -8828,7 +8918,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
			     CHUNK_ALLOC_FORCE);
	if (ret < 0)
		goto out;
	ret = set_block_group_ro(cache, 0);
	ret = inc_block_group_ro(cache, 0);
out:
	if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
		alloc_flags = update_block_group_flags(root, cache->flags);
@@ -8891,7 +8981,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
	return free_bytes;
}

void btrfs_set_block_group_rw(struct btrfs_root *root,
void btrfs_dec_block_group_ro(struct btrfs_root *root,
			      struct btrfs_block_group_cache *cache)
{
	struct btrfs_space_info *sinfo = cache->space_info;
@@ -8901,11 +8991,13 @@ void btrfs_set_block_group_rw(struct btrfs_root *root,

	spin_lock(&sinfo->lock);
	spin_lock(&cache->lock);
	num_bytes = cache->key.offset - cache->reserved - cache->pinned -
		    cache->bytes_super - btrfs_block_group_used(&cache->item);
	if (!--cache->ro) {
		num_bytes = cache->key.offset - cache->reserved -
			    cache->pinned - cache->bytes_super -
			    btrfs_block_group_used(&cache->item);
		sinfo->bytes_readonly -= num_bytes;
	cache->ro = 0;
		list_del_init(&cache->ro_list);
	}
	spin_unlock(&cache->lock);
	spin_unlock(&sinfo->lock);
}
@@ -9421,7 +9513,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)

		set_avail_alloc_bits(root->fs_info, cache->flags);
		if (btrfs_chunk_readonly(root, cache->key.objectid)) {
			set_block_group_ro(cache, 1);
			inc_block_group_ro(cache, 1);
		} else if (btrfs_block_group_used(&cache->item) == 0) {
			spin_lock(&info->unused_bgs_lock);
			/* Should always be true but just in case. */
@@ -9449,11 +9541,11 @@ int btrfs_read_block_groups(struct btrfs_root *root)
		list_for_each_entry(cache,
				&space_info->block_groups[BTRFS_RAID_RAID0],
				list)
			set_block_group_ro(cache, 1);
			inc_block_group_ro(cache, 1);
		list_for_each_entry(cache,
				&space_info->block_groups[BTRFS_RAID_SINGLE],
				list)
			set_block_group_ro(cache, 1);
			inc_block_group_ro(cache, 1);
	}

	init_global_block_rsv(info);
@@ -9834,6 +9926,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
	 * currently running transaction might finish and a new one start,
	 * allowing for new block groups to be created that can reuse the same
	 * physical device locations unless we take this special care.
	 *
	 * There may also be an implicit trim operation if the file system
	 * is mounted with -odiscard. The same protections must remain
	 * in place until the extents have been discarded completely when
	 * the transaction commit has completed.
	 */
	remove_em = (atomic_read(&block_group->trimming) == 0);
	/*
@@ -9908,6 +10005,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
	spin_lock(&fs_info->unused_bgs_lock);
	while (!list_empty(&fs_info->unused_bgs)) {
		u64 start, end;
		int trimming;

		block_group = list_first_entry(&fs_info->unused_bgs,
					       struct btrfs_block_group_cache,
@@ -9941,7 +10039,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		spin_unlock(&block_group->lock);

		/* We don't want to force the issue, only flip if it's ok. */
		ret = set_block_group_ro(block_group, 0);
		ret = inc_block_group_ro(block_group, 0);
		up_write(&space_info->groups_sem);
		if (ret < 0) {
			ret = 0;
@@ -9955,7 +10053,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		/* 1 for btrfs_orphan_reserve_metadata() */
		trans = btrfs_start_transaction(root, 1);
		if (IS_ERR(trans)) {
			btrfs_set_block_group_rw(root, block_group);
			btrfs_dec_block_group_ro(root, block_group);
			ret = PTR_ERR(trans);
			goto next;
		}
@@ -9982,14 +10080,14 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
				  EXTENT_DIRTY, GFP_NOFS);
		if (ret) {
			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
			btrfs_set_block_group_rw(root, block_group);
			btrfs_dec_block_group_ro(root, block_group);
			goto end_trans;
		}
		ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
				  EXTENT_DIRTY, GFP_NOFS);
		if (ret) {
			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
			btrfs_set_block_group_rw(root, block_group);
			btrfs_dec_block_group_ro(root, block_group);
			goto end_trans;
		}
		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
@@ -10007,12 +10105,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		spin_unlock(&block_group->lock);
		spin_unlock(&space_info->lock);

		/* DISCARD can flip during remount */
		trimming = btrfs_test_opt(root, DISCARD);

		/* Implicit trim during transaction commit. */
		if (trimming)
			btrfs_get_block_group_trimming(block_group);

		/*
		 * Btrfs_remove_chunk will abort the transaction if things go
		 * horribly wrong.
		 */
		ret = btrfs_remove_chunk(trans, root,
					 block_group->key.objectid);

		if (ret) {
			if (trimming)
				btrfs_put_block_group_trimming(block_group);
			goto end_trans;
		}

		/*
		 * If we're not mounted with -odiscard, we can just forget
		 * about this block group. Otherwise we'll need to wait
		 * until transaction commit to do the actual discard.
		 */
		if (trimming) {
			WARN_ON(!list_empty(&block_group->bg_list));
			spin_lock(&trans->transaction->deleted_bgs_lock);
			list_move(&block_group->bg_list,
				  &trans->transaction->deleted_bgs);
			spin_unlock(&trans->transaction->deleted_bgs_lock);
			btrfs_get_block_group(block_group);
		}
end_trans:
		btrfs_end_transaction(trans, root);
next:
@@ -10066,10 +10191,99 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
	return unpin_extent_range(root, start, end, false);
}

/*
 * It used to be that old block groups would be left around forever.
 * Iterating over them would be enough to trim unused space.  Since we
 * now automatically remove them, we also need to iterate over unallocated
 * space.
 *
 * We don't want a transaction for this since the discard may take a
 * substantial amount of time.  We don't require that a transaction be
 * running, but we do need to take a running transaction into account
 * to ensure that we're not discarding chunks that were released in
 * the current transaction.
 *
 * Holding the chunks lock will prevent other threads from allocating
 * or releasing chunks, but it won't prevent a running transaction
 * from committing and releasing the memory that the pending chunks
 * list head uses.  For that, we need to take a reference to the
 * transaction.
 */
static int btrfs_trim_free_extents(struct btrfs_device *device,
				   u64 minlen, u64 *trimmed)
{
	u64 start = 0, len = 0;
	int ret;

	*trimmed = 0;

	/* Not writeable = nothing to do. */
	if (!device->writeable)
		return 0;

	/* No free space = nothing to do. */
	if (device->total_bytes <= device->bytes_used)
		return 0;

	ret = 0;

	while (1) {
		struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
		struct btrfs_transaction *trans;
		u64 bytes;

		ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
		if (ret)
			return ret;

		down_read(&fs_info->commit_root_sem);

		spin_lock(&fs_info->trans_lock);
		trans = fs_info->running_transaction;
		if (trans)
			atomic_inc(&trans->use_count);
		spin_unlock(&fs_info->trans_lock);

		ret = find_free_dev_extent_start(trans, device, minlen, start,
						 &start, &len);
		if (trans)
			btrfs_put_transaction(trans);

		if (ret) {
			up_read(&fs_info->commit_root_sem);
			mutex_unlock(&fs_info->chunk_mutex);
			if (ret == -ENOSPC)
				ret = 0;
			break;
		}

		ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
		up_read(&fs_info->commit_root_sem);
		mutex_unlock(&fs_info->chunk_mutex);

		if (ret)
			break;

		start += len;
		*trimmed += bytes;

		if (fatal_signal_pending(current)) {
			ret = -ERESTARTSYS;
			break;
		}

		cond_resched();
	}

	return ret;
}

int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
{
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct btrfs_block_group_cache *cache = NULL;
	struct btrfs_device *device;
	struct list_head *devices;
	u64 group_trimmed;
	u64 start;
	u64 end;
@@ -10124,6 +10338,18 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
		cache = next_block_group(fs_info->tree_root, cache);
	}

	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
	devices = &root->fs_info->fs_devices->alloc_list;
	list_for_each_entry(device, devices, dev_alloc_list) {
		ret = btrfs_trim_free_extents(device, range->minlen,
					      &group_trimmed);
		if (ret)
			break;

		trimmed += group_trimmed;
	}
	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);

	range->len = trimmed;
	return ret;
}
Loading