btrfs: scrub: Introduce full stripe lock for RAID56 (0966a7b1) · Commits · e / devices / android_kernel_oneplus_sm7250

fs/btrfs/ctree.h

+17 −0

Original line number	Diff line number	Diff line
		@@ -539,6 +539,14 @@ struct btrfs_io_ctl {
		unsigned check_crcs:1;
		};

		/*
		* Tree to record all locked full stripes of a RAID5/6 block group
		*/
		struct btrfs_full_stripe_locks_tree {
		struct rb_root root;
		struct mutex lock;
		};

		struct btrfs_block_group_cache {
		struct btrfs_key key;
		struct btrfs_block_group_item item;
		@@ -649,6 +657,9 @@ struct btrfs_block_group_cache {
		* Protected by free_space_lock.
		*/
		int needs_free_space;

		/* Record locked full stripes for RAID5/6 block group */
		struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
		};

		/* delayed seq elem */
		@@ -3653,6 +3664,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
		struct btrfs_device *dev);
		int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
		struct btrfs_scrub_progress *progress);
		static inline void btrfs_init_full_stripe_locks_tree(
		struct btrfs_full_stripe_locks_tree *locks_root)
		{
		locks_root->root = RB_ROOT;
		mutex_init(&locks_root->lock);
		}

		/* dev-replace.c */
		void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);

fs/btrfs/extent-tree.c

+11 −0

Original line number	Diff line number	Diff line
		@@ -131,6 +131,16 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
		if (atomic_dec_and_test(&cache->count)) {
		WARN_ON(cache->pinned > 0);
		WARN_ON(cache->reserved > 0);

		/*
		* If not empty, someone is still holding mutex of
		* full_stripe_lock, which can only be released by caller.
		* And it will definitely cause use-after-free when caller
		* tries to release full stripe lock.
		*
		* No better way to resolve, but only to warn.
		*/
		WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
		kfree(cache->free_space_ctl);
		kfree(cache);
		}
		@@ -9917,6 +9927,7 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
		btrfs_init_free_space_ctl(cache);
		atomic_set(&cache->trimming, 0);
		mutex_init(&cache->free_space_lock);
		btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);

		return cache;
		}

fs/btrfs/scrub.c

+223 −0

Original line number	Diff line number	Diff line
		@@ -240,6 +240,13 @@ struct scrub_warning {
		struct btrfs_device *dev;
		};

		struct full_stripe_lock {
		struct rb_node node;
		u64 logical;
		u64 refs;
		struct mutex mutex;
		};

		static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
		static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
		static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
		@@ -348,6 +355,222 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
		scrub_pause_off(fs_info);
		}

		/*
		* Insert new full stripe lock into full stripe locks tree
		*
		* Return pointer to existing or newly inserted full_stripe_lock structure if
		* everything works well.
		* Return ERR_PTR(-ENOMEM) if we failed to allocate memory
		*
		* NOTE: caller must hold full_stripe_locks_root->lock before calling this
		* function
		*/
		static struct full_stripe_lock *insert_full_stripe_lock(
		struct btrfs_full_stripe_locks_tree *locks_root,
		u64 fstripe_logical)
		{
		struct rb_node **p;
		struct rb_node *parent = NULL;
		struct full_stripe_lock *entry;
		struct full_stripe_lock *ret;

		WARN_ON(!mutex_is_locked(&locks_root->lock));

		p = &locks_root->root.rb_node;
		while (*p) {
		parent = *p;
		entry = rb_entry(parent, struct full_stripe_lock, node);
		if (fstripe_logical < entry->logical) {
		p = &(*p)->rb_left;
		} else if (fstripe_logical > entry->logical) {
		p = &(*p)->rb_right;
		} else {
		entry->refs++;
		return entry;
		}
		}

		/* Insert new lock */
		ret = kmalloc(sizeof(*ret), GFP_KERNEL);
		if (!ret)
		return ERR_PTR(-ENOMEM);
		ret->logical = fstripe_logical;
		ret->refs = 1;
		mutex_init(&ret->mutex);

		rb_link_node(&ret->node, parent, p);
		rb_insert_color(&ret->node, &locks_root->root);
		return ret;
		}

		/*
		* Search for a full stripe lock of a block group
		*
		* Return pointer to existing full stripe lock if found
		* Return NULL if not found
		*/
		static struct full_stripe_lock *search_full_stripe_lock(
		struct btrfs_full_stripe_locks_tree *locks_root,
		u64 fstripe_logical)
		{
		struct rb_node *node;
		struct full_stripe_lock *entry;

		WARN_ON(!mutex_is_locked(&locks_root->lock));

		node = locks_root->root.rb_node;
		while (node) {
		entry = rb_entry(node, struct full_stripe_lock, node);
		if (fstripe_logical < entry->logical)
		node = node->rb_left;
		else if (fstripe_logical > entry->logical)
		node = node->rb_right;
		else
		return entry;
		}
		return NULL;
		}

		/*
		* Helper to get full stripe logical from a normal bytenr.
		*
		* Caller must ensure @cache is a RAID56 block group.
		*/
		static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
		u64 bytenr)
		{
		u64 ret;

		/*
		* Due to chunk item size limit, full stripe length should not be
		* larger than U32_MAX. Just a sanity check here.
		*/
		WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);

		/*
		* round_down() can only handle power of 2, while RAID56 full
		* stripe length can be 64KiB * n, so we need to manually round down.
		*/
		ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
		cache->full_stripe_len + cache->key.objectid;
		return ret;
		}

		/*
		* Lock a full stripe to avoid concurrency of recovery and read
		*
		* It's only used for profiles with parities (RAID5/6), for other profiles it
		* does nothing.
		*
		* Return 0 if we locked full stripe covering @bytenr, with a mutex held.
		* So caller must call unlock_full_stripe() at the same context.
		*
		* Return <0 if encounters error.
		*/
		static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
		bool *locked_ret)
		{
		struct btrfs_block_group_cache *bg_cache;
		struct btrfs_full_stripe_locks_tree *locks_root;
		struct full_stripe_lock *existing;
		u64 fstripe_start;
		int ret = 0;

		*locked_ret = false;
		bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
		if (!bg_cache) {
		ASSERT(0);
		return -ENOENT;
		}

		/* Profiles not based on parity don't need full stripe lock */
		if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
		goto out;
		locks_root = &bg_cache->full_stripe_locks_root;

		fstripe_start = get_full_stripe_logical(bg_cache, bytenr);

		/* Now insert the full stripe lock */
		mutex_lock(&locks_root->lock);
		existing = insert_full_stripe_lock(locks_root, fstripe_start);
		mutex_unlock(&locks_root->lock);
		if (IS_ERR(existing)) {
		ret = PTR_ERR(existing);
		goto out;
		}
		mutex_lock(&existing->mutex);
		*locked_ret = true;
		out:
		btrfs_put_block_group(bg_cache);
		return ret;
		}

		/*
		* Unlock a full stripe.
		*
		* NOTE: Caller must ensure it's the same context calling corresponding
		* lock_full_stripe().
		*
		* Return 0 if we unlock full stripe without problem.
		* Return <0 for error
		*/
		static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
		bool locked)
		{
		struct btrfs_block_group_cache *bg_cache;
		struct btrfs_full_stripe_locks_tree *locks_root;
		struct full_stripe_lock *fstripe_lock;
		u64 fstripe_start;
		bool freeit = false;
		int ret = 0;

		/* If we didn't acquire full stripe lock, no need to continue */
		if (!locked)
		return 0;

		bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
		if (!bg_cache) {
		ASSERT(0);
		return -ENOENT;
		}
		if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
		goto out;

		locks_root = &bg_cache->full_stripe_locks_root;
		fstripe_start = get_full_stripe_logical(bg_cache, bytenr);

		mutex_lock(&locks_root->lock);
		fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
		/* Unpaired unlock_full_stripe() detected */
		if (!fstripe_lock) {
		WARN_ON(1);
		ret = -ENOENT;
		mutex_unlock(&locks_root->lock);
		goto out;
		}

		if (fstripe_lock->refs == 0) {
		WARN_ON(1);
		btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
		fstripe_lock->logical);
		} else {
		fstripe_lock->refs--;
		}

		if (fstripe_lock->refs == 0) {
		rb_erase(&fstripe_lock->node, &locks_root->root);
		freeit = true;
		}
		mutex_unlock(&locks_root->lock);

		mutex_unlock(&fstripe_lock->mutex);
		if (freeit)
		kfree(fstripe_lock);
		out:
		btrfs_put_block_group(bg_cache);
		return ret;
		}

		/*
		* used for workers that require transaction commits (i.e., for the
		* NOCOW case)