Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7ee9e440 authored by Josef Bacik's avatar Josef Bacik
Browse files

Btrfs: check if we can nocow if we don't have data space



We always just try and reserve data space when we write, but if we are out of
space but have prealloc'ed extents we should still successfully write.  This
patch will try and see if we can write to prealloc'ed space and if we can go
ahead and allow the write to continue.  With this patch we now pass xfstests
generic/274.  Thanks,

Signed-off-by: default avatarJosef Bacik <jbacik@fusionio.com>
parent 925a6efb
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -3552,6 +3552,10 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
					   size_t pg_offset, u64 start, u64 len,
					   int create);
noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
			      struct inode *inode, u64 offset, u64 *len,
			      u64 *orig_start, u64 *orig_block_len,
			      u64 *ram_bytes);

/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
+1 −0
Original line number Diff line number Diff line
@@ -3666,6 +3666,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)

	data_sinfo = root->fs_info->data_sinfo;
	spin_lock(&data_sinfo->lock);
	WARN_ON(data_sinfo->bytes_may_use < bytes);
	data_sinfo->bytes_may_use -= bytes;
	trace_btrfs_space_reservation(root->fs_info, "space_info",
				      data_sinfo->flags, bytes, 0);
+3 −0
Original line number Diff line number Diff line
@@ -543,6 +543,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,

	btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);

	if (bits & EXTENT_DELALLOC)
		bits |= EXTENT_NORESERVE;

	if (delete)
		bits |= ~EXTENT_CTLBITS;
	bits |= EXTENT_FIRST_DELALLOC;
+1 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_NEED_WAIT (1 << 13)
#define EXTENT_DAMAGED (1 << 14)
#define EXTENT_NORESERVE (1 << 15)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)

+114 −11
Original line number Diff line number Diff line
@@ -1312,6 +1312,56 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,

}

static noinline int check_can_nocow(struct inode *inode, loff_t pos,
				    size_t *write_bytes)
{
	struct btrfs_trans_handle *trans;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_ordered_extent *ordered;
	u64 lockstart, lockend;
	u64 num_bytes;
	int ret;

	lockstart = round_down(pos, root->sectorsize);
	lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1;

	while (1) {
		lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
		ordered = btrfs_lookup_ordered_range(inode, lockstart,
						     lockend - lockstart + 1);
		if (!ordered) {
			break;
		}
		unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
		btrfs_start_ordered_extent(inode, ordered, 1);
		btrfs_put_ordered_extent(ordered);
	}

	trans = btrfs_join_transaction(root);
	if (IS_ERR(trans)) {
		unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
		return PTR_ERR(trans);
	}

	num_bytes = lockend - lockstart + 1;
	ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL,
			       NULL);
	btrfs_end_transaction(trans, root);
	if (ret <= 0) {
		ret = 0;
	} else {
		clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
				 EXTENT_DIRTY | EXTENT_DELALLOC |
				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
				 NULL, GFP_NOFS);
		*write_bytes = min_t(size_t, *write_bytes, num_bytes);
	}

	unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);

	return ret;
}

static noinline ssize_t __btrfs_buffered_write(struct file *file,
					       struct iov_iter *i,
					       loff_t pos)
@@ -1319,10 +1369,12 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
	struct inode *inode = file_inode(file);
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct page **pages = NULL;
	u64 release_bytes = 0;
	unsigned long first_index;
	size_t num_written = 0;
	int nrptrs;
	int ret = 0;
	bool only_release_metadata = false;
	bool force_page_uptodate = false;

	nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
@@ -1343,6 +1395,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
					 offset);
		size_t num_pages = (write_bytes + offset +
				    PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
		size_t reserve_bytes;
		size_t dirty_pages;
		size_t copied;

@@ -1357,11 +1410,41 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
			break;
		}

		ret = btrfs_delalloc_reserve_space(inode,
					num_pages << PAGE_CACHE_SHIFT);
		reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
		ret = btrfs_check_data_free_space(inode, reserve_bytes);
		if (ret == -ENOSPC &&
		    (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
					      BTRFS_INODE_PREALLOC))) {
			ret = check_can_nocow(inode, pos, &write_bytes);
			if (ret > 0) {
				only_release_metadata = true;
				/*
				 * our prealloc extent may be smaller than
				 * write_bytes, so scale down.
				 */
				num_pages = (write_bytes + offset +
					     PAGE_CACHE_SIZE - 1) >>
					PAGE_CACHE_SHIFT;
				reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
				ret = 0;
			} else {
				ret = -ENOSPC;
			}
		}

		if (ret)
			break;

		ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
		if (ret) {
			if (!only_release_metadata)
				btrfs_free_reserved_data_space(inode,
							       reserve_bytes);
			break;
		}

		release_bytes = reserve_bytes;

		/*
		 * This is going to setup the pages array with the number of
		 * pages we want, so we don't really need to worry about the
@@ -1370,11 +1453,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
		ret = prepare_pages(root, file, pages, num_pages,
				    pos, first_index, write_bytes,
				    force_page_uptodate);
		if (ret) {
			btrfs_delalloc_release_space(inode,
					num_pages << PAGE_CACHE_SHIFT);
		if (ret)
			break;
		}

		copied = btrfs_copy_from_user(pos, num_pages,
					   write_bytes, pages, i);
@@ -1404,30 +1484,46 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
		 * managed to copy.
		 */
		if (num_pages > dirty_pages) {
			release_bytes = (num_pages - dirty_pages) <<
				PAGE_CACHE_SHIFT;
			if (copied > 0) {
				spin_lock(&BTRFS_I(inode)->lock);
				BTRFS_I(inode)->outstanding_extents++;
				spin_unlock(&BTRFS_I(inode)->lock);
			}
			if (only_release_metadata)
				btrfs_delalloc_release_metadata(inode,
								release_bytes);
			else
				btrfs_delalloc_release_space(inode,
					(num_pages - dirty_pages) <<
					PAGE_CACHE_SHIFT);
							     release_bytes);
		}

		release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
		if (copied > 0) {
			ret = btrfs_dirty_pages(root, inode, pages,
						dirty_pages, pos, copied,
						NULL);
			if (ret) {
				btrfs_delalloc_release_space(inode,
					dirty_pages << PAGE_CACHE_SHIFT);
				btrfs_drop_pages(pages, num_pages);
				break;
			}
		}

		release_bytes = 0;
		btrfs_drop_pages(pages, num_pages);

		if (only_release_metadata && copied > 0) {
			u64 lockstart = round_down(pos, root->sectorsize);
			u64 lockend = lockstart +
				(dirty_pages << PAGE_CACHE_SHIFT) - 1;

			set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
				       lockend, EXTENT_NORESERVE, NULL,
				       NULL, GFP_NOFS);
			only_release_metadata = false;
		}

		cond_resched();

		balance_dirty_pages_ratelimited(inode->i_mapping);
@@ -1440,6 +1536,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,

	kfree(pages);

	if (release_bytes) {
		if (only_release_metadata)
			btrfs_delalloc_release_metadata(inode, release_bytes);
		else
			btrfs_delalloc_release_space(inode, release_bytes);
	}

	return num_written ? num_written : ret;
}

Loading