Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5fd02043 authored by Josef Bacik's avatar Josef Bacik
Browse files

Btrfs: finish ordered extents in their own thread



We noticed that the ordered extent completion doesn't really rely on having
a page and that it could be done independantly of ending the writeback on a
page.  This patch makes us not do the threaded endio stuff for normal
buffered writes and direct writes so we can end page writeback as soon as
possible (in irq context) and only start threads to do the ordered work when
it is actually done.  Compression needs to be reworked some to take
advantage of this as well, but atm it has to do a find_get_page in its endio
handler so it must be done in its own thread.  This makes direct writes
quite a bit faster.  Thanks,

Signed-off-by: default avatarJosef Bacik <josef@redhat.com>
parent 4e899152
Loading
Loading
Loading
Loading
+0 −12
Original line number Diff line number Diff line
@@ -3671,17 +3671,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
	return 0;
}

static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page,
					  u64 start, u64 end,
					  struct extent_state *state)
{
	struct super_block *sb = page->mapping->host->i_sb;
	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
	btrfs_error(fs_info, -EIO,
		    "Error occured while writing out btree at %llu", start);
	return -EIO;
}

static struct extent_io_ops btree_extent_io_ops = {
	.write_cache_pages_lock_hook = btree_lock_page_hook,
	.readpage_end_io_hook = btree_readpage_end_io_hook,
@@ -3689,5 +3678,4 @@ static struct extent_io_ops btree_extent_io_ops = {
	.submit_bio_hook = btree_submit_bio_hook,
	/* note we're sharing with inode.c for the merge bio hook */
	.merge_bio_hook = btrfs_merge_bio_hook,
	.writepage_io_failed_hook = btree_writepage_io_failed_hook,
};
+2 −13
Original line number Diff line number Diff line
@@ -1172,9 +1172,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
			      cached_state, mask);
}

static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
				 u64 end, struct extent_state **cached_state,
				 gfp_t mask)
int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
			  struct extent_state **cached_state, gfp_t mask)
{
	return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
				cached_state, mask);
@@ -2221,17 +2220,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
			uptodate = 0;
	}

	if (!uptodate && tree->ops &&
	    tree->ops->writepage_io_failed_hook) {
		ret = tree->ops->writepage_io_failed_hook(NULL, page,
						 start, end, NULL);
		/* Writeback already completed */
		if (ret == 0)
			return 1;
	}

	if (!uptodate) {
		clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
		ClearPageUptodate(page);
		SetPageError(page);
	}
+2 −3
Original line number Diff line number Diff line
@@ -75,9 +75,6 @@ struct extent_io_ops {
			      unsigned long bio_flags);
	int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
	int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
					u64 start, u64 end,
				       struct extent_state *state);
	int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
				    struct extent_state *state, int mirror);
	int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
@@ -225,6 +222,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
		   struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
			struct extent_state **cached_state, gfp_t mask);
int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
			  struct extent_state **cached_state, gfp_t mask);
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
		   gfp_t mask);
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+1 −3
Original line number Diff line number Diff line
@@ -972,9 +972,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
		goto out;


	ret = filemap_write_and_wait(inode->i_mapping);
	if (ret)
		goto out;
	btrfs_wait_ordered_range(inode, 0, (u64)-1);

	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
	key.offset = offset;
+76 −101
Original line number Diff line number Diff line
@@ -89,7 +89,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {

static int btrfs_setsize(struct inode *inode, loff_t newsize);
static int btrfs_truncate(struct inode *inode);
static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
				   struct page *locked_page,
				   u64 start, u64 end, int *page_started,
@@ -1572,11 +1572,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
	if (btrfs_is_free_space_inode(root, inode))
		metadata = 2;

	if (!(rw & REQ_WRITE)) {
		ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
		if (ret)
			return ret;

	if (!(rw & REQ_WRITE)) {
		if (bio_flags & EXTENT_BIO_COMPRESSED) {
			return btrfs_submit_compressed_read(inode, bio,
						    mirror_num, bio_flags);
@@ -1815,25 +1815,24 @@ out:
 * an ordered extent if the range of bytes in the file it covers are
 * fully written.
 */
static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
{
	struct inode *inode = ordered_extent->inode;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_trans_handle *trans = NULL;
	struct btrfs_ordered_extent *ordered_extent = NULL;
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
	struct extent_state *cached_state = NULL;
	int compress_type = 0;
	int ret;
	bool nolock;

	ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
					     end - start + 1);
	if (!ret)
		return 0;
	BUG_ON(!ordered_extent); /* Logic error */

	nolock = btrfs_is_free_space_inode(root, inode);

	if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
		ret = -EIO;
		goto out;
	}

	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
		BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
		ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -1889,12 +1888,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
				   ordered_extent->file_offset,
				   ordered_extent->len);
	}
	unlock_extent_cached(io_tree, ordered_extent->file_offset,
			     ordered_extent->file_offset +
			     ordered_extent->len - 1, &cached_state, GFP_NOFS);

	if (ret < 0) {
		btrfs_abort_transaction(trans, root, ret);
		goto out;
		goto out_unlock;
	}

	add_pending_csums(trans, inode, ordered_extent->file_offset,
@@ -1905,10 +1902,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
		ret = btrfs_update_inode_fallback(trans, root, inode);
		if (ret) { /* -ENOMEM or corruption */
			btrfs_abort_transaction(trans, root, ret);
			goto out;
			goto out_unlock;
		}
	}
	ret = 0;
out_unlock:
	unlock_extent_cached(io_tree, ordered_extent->file_offset,
			     ordered_extent->file_offset +
			     ordered_extent->len - 1, &cached_state, GFP_NOFS);
out:
	if (root != root->fs_info->tree_root)
		btrfs_delalloc_release_metadata(inode, ordered_extent->len);
@@ -1919,26 +1920,57 @@ out:
			btrfs_end_transaction(trans, root);
	}

	if (ret)
		clear_extent_uptodate(io_tree, ordered_extent->file_offset,
				      ordered_extent->file_offset +
				      ordered_extent->len - 1, NULL, GFP_NOFS);

	/*
	 * This needs to be dont to make sure anybody waiting knows we are done
	 * upating everything for this ordered extent.
	 */
	btrfs_remove_ordered_extent(inode, ordered_extent);

	/* once for us */
	btrfs_put_ordered_extent(ordered_extent);
	/* once for the tree */
	btrfs_put_ordered_extent(ordered_extent);

	return 0;
out_unlock:
	unlock_extent_cached(io_tree, ordered_extent->file_offset,
			     ordered_extent->file_offset +
			     ordered_extent->len - 1, &cached_state, GFP_NOFS);
	goto out;
	return ret;
}

static void finish_ordered_fn(struct btrfs_work *work)
{
	struct btrfs_ordered_extent *ordered_extent;
	ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
	btrfs_finish_ordered_io(ordered_extent);
}

static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
				struct extent_state *state, int uptodate)
{
	struct inode *inode = page->mapping->host;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_ordered_extent *ordered_extent = NULL;
	struct btrfs_workers *workers;

	trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);

	ClearPagePrivate2(page);
	return btrfs_finish_ordered_io(page->mapping->host, start, end);
	if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
					    end - start + 1, uptodate))
		return 0;

	ordered_extent->work.func = finish_ordered_fn;
	ordered_extent->work.flags = 0;

	if (btrfs_is_free_space_inode(root, inode))
		workers = &root->fs_info->endio_freespace_worker;
	else
		workers = &root->fs_info->endio_write_workers;
	btrfs_queue_worker(workers, &ordered_extent->work);

	return 0;
}

/*
@@ -5909,9 +5941,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
	struct btrfs_dio_private *dip = bio->bi_private;
	struct inode *inode = dip->inode;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_trans_handle *trans;
	struct btrfs_ordered_extent *ordered = NULL;
	struct extent_state *cached_state = NULL;
	u64 ordered_offset = dip->logical_offset;
	u64 ordered_bytes = dip->bytes;
	int ret;
@@ -5921,73 +5951,14 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
again:
	ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
						   &ordered_offset,
						   ordered_bytes);
						   ordered_bytes, !err);
	if (!ret)
		goto out_test;

	BUG_ON(!ordered);

	trans = btrfs_join_transaction(root);
	if (IS_ERR(trans)) {
		err = -ENOMEM;
		goto out;
	}
	trans->block_rsv = &root->fs_info->delalloc_block_rsv;

	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
		ret = btrfs_ordered_update_i_size(inode, 0, ordered);
		if (!ret)
			err = btrfs_update_inode_fallback(trans, root, inode);
		goto out;
	}

	lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
			 ordered->file_offset + ordered->len - 1, 0,
			 &cached_state);

	if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
		ret = btrfs_mark_extent_written(trans, inode,
						ordered->file_offset,
						ordered->file_offset +
						ordered->len);
		if (ret) {
			err = ret;
			goto out_unlock;
		}
	} else {
		ret = insert_reserved_file_extent(trans, inode,
						  ordered->file_offset,
						  ordered->start,
						  ordered->disk_len,
						  ordered->len,
						  ordered->len,
						  0, 0, 0,
						  BTRFS_FILE_EXTENT_REG);
		unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
				   ordered->file_offset, ordered->len);
		if (ret) {
			err = ret;
			WARN_ON(1);
			goto out_unlock;
		}
	}

	add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
	ret = btrfs_ordered_update_i_size(inode, 0, ordered);
	if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
		btrfs_update_inode_fallback(trans, root, inode);
	ret = 0;
out_unlock:
	unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
			     ordered->file_offset + ordered->len - 1,
			     &cached_state, GFP_NOFS);
out:
	btrfs_delalloc_release_metadata(inode, ordered->len);
	btrfs_end_transaction(trans, root);
	ordered_offset = ordered->file_offset + ordered->len;
	btrfs_put_ordered_extent(ordered);
	btrfs_put_ordered_extent(ordered);

	ordered->work.func = finish_ordered_fn;
	ordered->work.flags = 0;
	btrfs_queue_worker(&root->fs_info->endio_write_workers,
			   &ordered->work);
out_test:
	/*
	 * our bio might span multiple ordered extents.  If we haven't
@@ -5996,12 +5967,12 @@ out_test:
	if (ordered_offset < dip->logical_offset + dip->bytes) {
		ordered_bytes = dip->logical_offset + dip->bytes -
			ordered_offset;
		ordered = NULL;
		goto again;
	}
out_done:
	bio->bi_private = dip->private;

	kfree(dip->csums);
	kfree(dip);

	/* If we had an error make sure to clear the uptodate flag */
@@ -6069,9 +6040,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
	int ret;

	bio_get(bio);

	if (!write) {
		ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
		if (ret)
			goto err;
	}

	if (skip_sum)
		goto map;
@@ -6491,13 +6465,13 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)

static void btrfs_invalidatepage(struct page *page, unsigned long offset)
{
	struct inode *inode = page->mapping->host;
	struct extent_io_tree *tree;
	struct btrfs_ordered_extent *ordered;
	struct extent_state *cached_state = NULL;
	u64 page_start = page_offset(page);
	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;


	/*
	 * we have the page locked, so new writeback can't start,
	 * and the dirty bit won't be cleared while we are here.
@@ -6507,13 +6481,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
	 */
	wait_on_page_writeback(page);

	tree = &BTRFS_I(page->mapping->host)->io_tree;
	tree = &BTRFS_I(inode)->io_tree;
	if (offset) {
		btrfs_releasepage(page, GFP_NOFS);
		return;
	}
	lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
	ordered = btrfs_lookup_ordered_extent(page->mapping->host,
	ordered = btrfs_lookup_ordered_extent(inode,
					   page_offset(page));
	if (ordered) {
		/*
@@ -6528,9 +6502,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
		 * whoever cleared the private bit is responsible
		 * for the finish_ordered_io
		 */
		if (TestClearPagePrivate2(page)) {
			btrfs_finish_ordered_io(page->mapping->host,
						page_start, page_end);
		if (TestClearPagePrivate2(page) &&
		    btrfs_dec_test_ordered_pending(inode, &ordered, page_start,
						   PAGE_CACHE_SIZE, 1)) {
			btrfs_finish_ordered_io(ordered);
		}
		btrfs_put_ordered_extent(ordered);
		cached_state = NULL;
Loading