Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c7309e88 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from Chris Mason:
 "The first commit is a fix from Filipe for a very old extent buffer
  reuse race that triggered a BUG_ON.  It hasn't come up often, I looked
  through old logs at FB and we hit it a handful of times over the last
  year.

  The rest are other corners he hit during testing"

* 'for-linus-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix race when reusing stale extent buffers that leads to BUG_ON
  Btrfs: fix race between block group creation and their cache writeout
  Btrfs: fix panic when starting bg cache writeout after IO error
  Btrfs: fix crash after inode cache writeback failure
parents 518af3cb 062c19e9
Loading
Loading
Loading
Loading
+27 −4
Original line number Original line Diff line number Diff line
@@ -3180,8 +3180,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
	btrfs_mark_buffer_dirty(leaf);
	btrfs_mark_buffer_dirty(leaf);
fail:
fail:
	btrfs_release_path(path);
	btrfs_release_path(path);
	if (ret)
		btrfs_abort_transaction(trans, root, ret);
	return ret;
	return ret;


}
}
@@ -3487,8 +3485,30 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
				ret = 0;
				ret = 0;
			}
			}
		}
		}
		if (!ret)
		if (!ret) {
			ret = write_one_cache_group(trans, root, path, cache);
			ret = write_one_cache_group(trans, root, path, cache);
			/*
			 * Our block group might still be attached to the list
			 * of new block groups in the transaction handle of some
			 * other task (struct btrfs_trans_handle->new_bgs). This
			 * means its block group item isn't yet in the extent
			 * tree. If this happens ignore the error, as we will
			 * try again later in the critical section of the
			 * transaction commit.
			 */
			if (ret == -ENOENT) {
				ret = 0;
				spin_lock(&cur_trans->dirty_bgs_lock);
				if (list_empty(&cache->dirty_list)) {
					list_add_tail(&cache->dirty_list,
						      &cur_trans->dirty_bgs);
					btrfs_get_block_group(cache);
				}
				spin_unlock(&cur_trans->dirty_bgs_lock);
			} else if (ret) {
				btrfs_abort_transaction(trans, root, ret);
			}
		}


		/* if its not on the io list, we need to put the block group */
		/* if its not on the io list, we need to put the block group */
		if (should_put)
		if (should_put)
@@ -3597,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
				ret = 0;
				ret = 0;
			}
			}
		}
		}
		if (!ret)
		if (!ret) {
			ret = write_one_cache_group(trans, root, path, cache);
			ret = write_one_cache_group(trans, root, path, cache);
			if (ret)
				btrfs_abort_transaction(trans, root, ret);
		}


		/* if its not on the io list, we need to put the block group */
		/* if its not on the io list, we need to put the block group */
		if (should_put)
		if (should_put)
+19 −0
Original line number Original line Diff line number Diff line
@@ -4772,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
			       start >> PAGE_CACHE_SHIFT);
			       start >> PAGE_CACHE_SHIFT);
	if (eb && atomic_inc_not_zero(&eb->refs)) {
	if (eb && atomic_inc_not_zero(&eb->refs)) {
		rcu_read_unlock();
		rcu_read_unlock();
		/*
		 * Lock our eb's refs_lock to avoid races with
		 * free_extent_buffer. When we get our eb it might be flagged
		 * with EXTENT_BUFFER_STALE and another task running
		 * free_extent_buffer might have seen that flag set,
		 * eb->refs == 2, that the buffer isn't under IO (dirty and
		 * writeback flags not set) and it's still in the tree (flag
		 * EXTENT_BUFFER_TREE_REF set), therefore being in the process
		 * of decrementing the extent buffer's reference count twice.
		 * So here we could race and increment the eb's reference count,
		 * clear its stale flag, mark it as dirty and drop our reference
		 * before the other task finishes executing free_extent_buffer,
		 * which would later result in an attempt to free an extent
		 * buffer that is dirty.
		 */
		if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
			spin_lock(&eb->refs_lock);
			spin_unlock(&eb->refs_lock);
		}
		mark_extent_buffer_accessed(eb, NULL);
		mark_extent_buffer_accessed(eb, NULL);
		return eb;
		return eb;
	}
	}
+12 −2
Original line number Original line Diff line number Diff line
@@ -3466,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
	int ret;
	int ret;
	struct btrfs_io_ctl io_ctl;
	struct btrfs_io_ctl io_ctl;
	bool release_metadata = true;


	if (!btrfs_test_opt(root, INODE_MAP_CACHE))
	if (!btrfs_test_opt(root, INODE_MAP_CACHE))
		return 0;
		return 0;
@@ -3473,10 +3474,19 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
	memset(&io_ctl, 0, sizeof(io_ctl));
	memset(&io_ctl, 0, sizeof(io_ctl));
	ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
	ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
				      trans, path, 0);
				      trans, path, 0);
	if (!ret)
	if (!ret) {
		/*
		 * At this point writepages() didn't error out, so our metadata
		 * reservation is released when the writeback finishes, at
		 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
		 * with or without an error.
		 */
		release_metadata = false;
		ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
		ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
	}


	if (ret) {
	if (ret) {
		if (release_metadata)
			btrfs_delalloc_release_metadata(inode, inode->i_size);
			btrfs_delalloc_release_metadata(inode, inode->i_size);
#ifdef DEBUG
#ifdef DEBUG
		btrfs_err(root->fs_info,
		btrfs_err(root->fs_info,
+10 −4
Original line number Original line Diff line number Diff line
@@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
{
	int ret = 0;
	int ret = 0;
	int ret_wb = 0;
	u64 end;
	u64 end;
	u64 orig_end;
	u64 orig_end;
	struct btrfs_ordered_extent *ordered;
	struct btrfs_ordered_extent *ordered;
@@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
	if (ret)
	if (ret)
		return ret;
		return ret;


	ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
	/*
	if (ret)
	 * If we have a writeback error don't return immediately. Wait first
		return ret;
	 * for any ordered extents that haven't completed yet. This is to make
	 * sure no one can dirty the same page ranges and call writepages()
	 * before the ordered extents complete - to avoid failures (-EEXIST)
	 * when adding the new ordered extents to the ordered tree.
	 */
	ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);


	end = orig_end;
	end = orig_end;
	while (1) {
	while (1) {
@@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
			break;
			break;
		end--;
		end--;
	}
	}
	return ret;
	return ret_wb ? ret_wb : ret;
}
}


/*
/*