Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b9473439 authored by Chris Mason's avatar Chris Mason
Browse files

Btrfs: leave btree locks spinning more often



btrfs_mark_buffer dirty would set dirty bits in the extent_io tree
for the buffers it was dirtying.  This may require a kmalloc and it
was not atomic.  So, anyone who called btrfs_mark_buffer_dirty had to
set any btree locks they were holding to blocking first.

This commit changes dirty tracking for extent buffers to just use a flag
in the extent buffer.  Now that we have one and only one extent buffer
per page, this can be safely done without losing dirty bits along the way.

This also introduces a path->leave_spinning flag that callers of
btrfs_search_slot can use to indicate they will properly deal with a
path returned where all the locks are spinning instead of blocking.

Many of the btree search callers now expect spinning paths,
resulting in better btree concurrency overall.

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 89573b9c
Loading
Loading
Loading
Loading
+11 −8
Original line number Diff line number Diff line
@@ -1684,6 +1684,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
	 * we don't really know what they plan on doing with the path
	 * from here on, so for now just mark it as blocking
	 */
	if (!p->leave_spinning)
		btrfs_set_path_blocking(p);
	return ret;
}
@@ -3032,26 +3033,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
		return -EAGAIN;
	}

	btrfs_set_path_blocking(path);
	ret = split_leaf(trans, root, &orig_key, path,
			 sizeof(struct btrfs_item), 1);
	path->keep_locks = 0;
	BUG_ON(ret);

	btrfs_unlock_up_safe(path, 1);
	leaf = path->nodes[0];
	BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));

split:
	/*
	 * make sure any changes to the path from split_leaf leave it
	 * in a blocking state
	 */
	btrfs_set_path_blocking(path);

	leaf = path->nodes[0];
	BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));

split:
	item = btrfs_item_nr(leaf, path->slots[0]);
	orig_offset = btrfs_item_offset(leaf, item);
	item_size = btrfs_item_size(leaf, item);


	buf = kmalloc(item_size, GFP_NOFS);
	read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
			    path->slots[0]), item_size);
@@ -3545,7 +3547,6 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
	}

	btrfs_set_header_nritems(leaf, nritems + nr);
	btrfs_mark_buffer_dirty(leaf);

	ret = 0;
	if (slot == 0) {
@@ -3553,6 +3554,8 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
		btrfs_cpu_key_to_disk(&disk_key, cpu_key);
		ret = fixup_low_keys(trans, root, path, &disk_key, 1);
	}
	btrfs_unlock_up_safe(path, 1);
	btrfs_mark_buffer_dirty(leaf);

	if (btrfs_leaf_free_space(root, leaf) < 0) {
		btrfs_print_leaf(root, leaf);
@@ -3596,7 +3599,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
			       total_data, total_size, nr);

out:
	btrfs_unlock_up_safe(path, 1);
	return ret;
}

@@ -3792,6 +3794,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
			slot = path->slots[1];
			extent_buffer_get(leaf);

			btrfs_set_path_blocking(path);
			wret = push_leaf_left(trans, root, path, 1, 1);
			if (wret < 0 && wret != -ENOSPC)
				ret = wret;
+9 −3
Original line number Diff line number Diff line
@@ -401,15 +401,16 @@ struct btrfs_path {
	int locks[BTRFS_MAX_LEVEL];
	int reada;
	/* keep some upper locks as we walk down */
	int keep_locks;
	int skip_locking;
	int lowest_level;

	/*
	 * set by btrfs_split_item, tells search_slot to keep all locks
	 * and to force calls to keep space in the nodes
	 */
	int search_for_split;
	unsigned int search_for_split:1;
	unsigned int keep_locks:1;
	unsigned int skip_locking:1;
	unsigned int leave_spinning:1;
};

/*
@@ -779,6 +780,11 @@ struct btrfs_fs_info {
	atomic_t throttle_gen;

	u64 total_pinned;

	/* protected by the delalloc lock, used to keep from writing
	 * metadata until there is a nice batch
	 */
	u64 dirty_metadata_bytes;
	struct list_head dirty_cowonly_roots;

	struct btrfs_fs_devices *fs_devices;
+3 −0
Original line number Diff line number Diff line
@@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
	key.objectid = dir;
	btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
	key.offset = btrfs_name_hash(name, name_len);

	path = btrfs_alloc_path();
	path->leave_spinning = 1;

	data_size = sizeof(*dir_item) + name_len;
	dir_item = insert_with_overflow(trans, root, path, &key, data_size,
					name, name_len);
+54 −13
Original line number Diff line number Diff line
@@ -668,15 +668,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
	struct extent_io_tree *tree;
	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
	struct extent_buffer *eb;
	int was_dirty;

	tree = &BTRFS_I(page->mapping->host)->io_tree;
	if (!(current->flags & PF_MEMALLOC)) {
		return extent_write_full_page(tree, page,
					      btree_get_extent, wbc);
	}

	if (current->flags & PF_MEMALLOC) {
	redirty_page_for_writepage(wbc, page);
	eb = btrfs_find_tree_block(root, page_offset(page),
				      PAGE_CACHE_SIZE);
	WARN_ON(!eb);

	was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
	if (!was_dirty) {
		spin_lock(&root->fs_info->delalloc_lock);
		root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
		spin_unlock(&root->fs_info->delalloc_lock);
	}
	free_extent_buffer(eb);

	unlock_page(page);
	return 0;
}
	return extent_write_full_page(tree, page, btree_get_extent, wbc);
}

static int btree_writepages(struct address_space *mapping,
			    struct writeback_control *wbc)
@@ -684,15 +701,15 @@ static int btree_writepages(struct address_space *mapping,
	struct extent_io_tree *tree;
	tree = &BTRFS_I(mapping->host)->io_tree;
	if (wbc->sync_mode == WB_SYNC_NONE) {
		struct btrfs_root *root = BTRFS_I(mapping->host)->root;
		u64 num_dirty;
		u64 start = 0;
		unsigned long thresh = 32 * 1024 * 1024;

		if (wbc->for_kupdate)
			return 0;

		num_dirty = count_range_bits(tree, &start, (u64)-1,
					     thresh, EXTENT_DIRTY);
		/* this is a bit racy, but that's ok */
		num_dirty = root->fs_info->dirty_metadata_bytes;
		if (num_dirty < thresh)
			return 0;
	}
@@ -859,9 +876,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
	    root->fs_info->running_transaction->transid) {
		btrfs_assert_tree_locked(buf);

		/* ugh, clear_extent_buffer_dirty can be expensive */
		btrfs_set_lock_blocking(buf);
		if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
			spin_lock(&root->fs_info->delalloc_lock);
			if (root->fs_info->dirty_metadata_bytes >= buf->len)
				root->fs_info->dirty_metadata_bytes -= buf->len;
			else
				WARN_ON(1);
			spin_unlock(&root->fs_info->delalloc_lock);
		}

		/* ugh, clear_extent_buffer_dirty needs to lock the page */
		btrfs_set_lock_blocking(buf);
		clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
					  buf);
	}
@@ -2348,8 +2373,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
	u64 transid = btrfs_header_generation(buf);
	struct inode *btree_inode = root->fs_info->btree_inode;

	btrfs_set_lock_blocking(buf);
	int was_dirty;

	btrfs_assert_tree_locked(buf);
	if (transid != root->fs_info->generation) {
@@ -2360,7 +2384,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
			(unsigned long long)root->fs_info->generation);
		WARN_ON(1);
	}
	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
	was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
					    buf);
	if (!was_dirty) {
		spin_lock(&root->fs_info->delalloc_lock);
		root->fs_info->dirty_metadata_bytes += buf->len;
		spin_unlock(&root->fs_info->delalloc_lock);
	}
}

void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
@@ -2400,6 +2430,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
int btree_lock_page_hook(struct page *page)
{
	struct inode *inode = page->mapping->host;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
	struct extent_buffer *eb;
	unsigned long len;
@@ -2415,6 +2446,16 @@ int btree_lock_page_hook(struct page *page)

	btrfs_tree_lock(eb);
	btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);

	if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
		spin_lock(&root->fs_info->delalloc_lock);
		if (root->fs_info->dirty_metadata_bytes >= eb->len)
			root->fs_info->dirty_metadata_bytes -= eb->len;
		else
			WARN_ON(1);
		spin_unlock(&root->fs_info->delalloc_lock);
	}

	btrfs_tree_unlock(eb);
	free_extent_buffer(eb);
out:
+1 −0
Original line number Diff line number Diff line
@@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root,
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
int wait_on_tree_block_writeback(struct btrfs_root *root,
Loading