Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a2135011 authored by Chris Mason's avatar Chris Mason
Browse files

Btrfs: Replace the big fs_mutex with a collection of other locks



Extent alloctions are still protected by a large alloc_mutex.
Objectid allocations are covered by a objectid mutex
Other btree operations are protected by a lock on individual btree nodes

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 925baedd
Loading
Loading
Loading
Loading
+33 −7
Original line number Diff line number Diff line
@@ -63,6 +63,9 @@ void btrfs_free_path(struct btrfs_path *p)
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
{
	int i;
	int skip = p->skip_locking;
	int keep = p->keep_locks;

	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
		if (!p->nodes[i])
			continue;
@@ -73,6 +76,8 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
		free_extent_buffer(p->nodes[i]);
	}
	memset(p, 0, sizeof(*p));
	p->skip_locking = skip;
	p->keep_locks = keep;
}

struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
@@ -1202,13 +1207,19 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock)
			u32 nritems;
			t = path->nodes[i];
			nritems = btrfs_header_nritems(t);
			if (path->slots[i] >= nritems - 1) {
			if (nritems < 2 || path->slots[i] >= nritems - 2) {
if (path->keep_locks) {
//printk("path %p skip level now %d\n", path, skip_level);
}
				skip_level = i + 1;
				continue;
			}
		}
		t = path->nodes[i];
		if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
if (path->keep_locks) {
//printk("path %p unlocking level %d slot %d nritems %d skip_level %d\n", path, i, path->slots[i], btrfs_header_nritems(t), skip_level);
}
			btrfs_tree_unlock(t);
			path->locks[i] = 0;
		}
@@ -1243,7 +1254,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
	lowest_level = p->lowest_level;
	WARN_ON(lowest_level && ins_len);
	WARN_ON(p->nodes[0] != NULL);
	// WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
	WARN_ON(root == root->fs_info->extent_root &&
		!mutex_is_locked(&root->fs_info->alloc_mutex));
	WARN_ON(root == root->fs_info->chunk_root &&
@@ -1321,7 +1331,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
			b = read_node_slot(root, b, slot);
			if (!p->skip_locking)
				btrfs_tree_lock(b);
			unlock_up(p, level, lowest_unlock);
			unlock_up(p, level + 1, lowest_unlock);
		} else {
			p->slots[level] = slot;
			if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
@@ -1804,6 +1814,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
	if (slot >= btrfs_header_nritems(upper) - 1)
		return 1;

	WARN_ON(!btrfs_tree_locked(path->nodes[1]));

	right = read_node_slot(root, upper, slot + 1);
	btrfs_tree_lock(right);
	free_space = btrfs_leaf_free_space(root, right);
@@ -1981,6 +1993,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
		return 1;
	}

	WARN_ON(!btrfs_tree_locked(path->nodes[1]));

	left = read_node_slot(root, path->nodes[1], slot - 1);
	btrfs_tree_lock(left);
	free_space = btrfs_leaf_free_space(root, left);
@@ -2957,15 +2971,16 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)

	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);

	path->keep_locks = 1;
	btrfs_release_path(root, path);
	path->keep_locks = 1;
	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	path->keep_locks = 0;

	if (ret < 0)
		return ret;

	if (path->slots[0] < nritems - 1) {
	nritems = btrfs_header_nritems(path->nodes[0]);
	if (nritems > 0 && path->slots[0] < nritems - 1) {
		goto done;
	}

@@ -2992,8 +3007,17 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
			reada_for_search(root, path, level, slot, 0);

		next = read_node_slot(root, c, slot);
		if (!path->skip_locking)
		if (!path->skip_locking) {
			if (!btrfs_tree_locked(c)) {
				int i;
				WARN_ON(1);
printk("path %p no lock on level %d\n", path, level);
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_header_nritems(path->nodes[i]));
}
			}
			btrfs_tree_lock(next);
		}
		break;
	}
	path->slots[level] = slot;
@@ -3011,9 +3035,11 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
		if (level == 1 && path->locks[1] && path->reada)
			reada_for_search(root, path, level, slot, 0);
		next = read_node_slot(root, next, 0);
		if (!path->skip_locking)
		if (!path->skip_locking) {
			WARN_ON(!btrfs_tree_locked(path->nodes[level]));
			btrfs_tree_lock(next);
		}
	}
done:
	unlock_up(path, 0, 1);
	return 0;
+3 −2
Original line number Diff line number Diff line
@@ -519,9 +519,9 @@ struct btrfs_fs_info {
	struct backing_dev_info bdi;
	spinlock_t hash_lock;
	struct mutex trans_mutex;
	struct mutex fs_mutex;
	struct mutex alloc_mutex;
	struct mutex chunk_mutex;
	struct mutex drop_mutex;
	struct list_head trans_list;
	struct list_head hashers;
	struct list_head dead_roots;
@@ -554,7 +554,7 @@ struct btrfs_fs_info {
	struct completion kobj_unregister;
	int do_barriers;
	int closing;
	unsigned long throttles;
	atomic_t throttles;

	u64 total_pinned;
	struct list_head dirty_cowonly_roots;
@@ -594,6 +594,7 @@ struct btrfs_root {
	struct inode *inode;
	struct kobject root_kobj;
	struct completion kobj_unregister;
	struct mutex objectid_mutex;
	u64 objectid;
	u64 last_trans;

+7 −8
Original line number Diff line number Diff line
@@ -724,6 +724,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,

	INIT_LIST_HEAD(&root->dirty_list);
	spin_lock_init(&root->node_lock);
	mutex_init(&root->objectid_mutex);
	memset(&root->root_key, 0, sizeof(root->root_key));
	memset(&root->root_item, 0, sizeof(root->root_item));
	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -1146,6 +1147,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	INIT_LIST_HEAD(&fs_info->space_info);
	btrfs_mapping_init(&fs_info->mapping_tree);
	atomic_set(&fs_info->nr_async_submits, 0);
	atomic_set(&fs_info->throttles, 0);
	fs_info->sb = sb;
	fs_info->max_extent = (u64)-1;
	fs_info->max_inline = 8192 * 1024;
@@ -1199,7 +1201,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);

	mutex_init(&fs_info->trans_mutex);
	mutex_init(&fs_info->fs_mutex);
	mutex_init(&fs_info->drop_mutex);
	mutex_init(&fs_info->alloc_mutex);
	mutex_init(&fs_info->chunk_mutex);

@@ -1278,8 +1280,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
		goto fail_sb_buffer;
	}

	mutex_lock(&fs_info->fs_mutex);

	mutex_lock(&fs_info->chunk_mutex);
	ret = btrfs_read_sys_array(tree_root);
	mutex_unlock(&fs_info->chunk_mutex);
@@ -1342,7 +1342,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
	fs_info->metadata_alloc_profile = (u64)-1;
	fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;

	mutex_unlock(&fs_info->fs_mutex);
	return tree_root;

fail_extent_root:
@@ -1350,7 +1349,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fail_tree_root:
	free_extent_buffer(tree_root->node);
fail_sys_array:
	mutex_unlock(&fs_info->fs_mutex);
fail_sb_buffer:
	extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
	btrfs_stop_workers(&fs_info->workers);
@@ -1562,8 +1560,9 @@ int close_ctree(struct btrfs_root *root)
	struct btrfs_fs_info *fs_info = root->fs_info;

	fs_info->closing = 1;
	smp_mb();

	btrfs_transaction_flush_work(root);
	mutex_lock(&fs_info->fs_mutex);
	btrfs_defrag_dirty_roots(root->fs_info);
	trans = btrfs_start_transaction(root, 1);
	ret = btrfs_commit_transaction(trans, root);
@@ -1574,7 +1573,6 @@ int close_ctree(struct btrfs_root *root)
	BUG_ON(ret);

	write_ctree_super(NULL, root);
	mutex_unlock(&fs_info->fs_mutex);

	btrfs_transaction_flush_work(root);

@@ -1679,7 +1677,8 @@ void btrfs_throttle(struct btrfs_root *root)
	struct backing_dev_info *bdi;

	bdi = &root->fs_info->bdi;
	if (root->fs_info->throttles && bdi_write_congested(bdi)) {
	if (atomic_read(&root->fs_info->throttles) &&
	    bdi_write_congested(bdi)) {
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
		congestion_wait(WRITE, HZ/20);
#else
+7 −11
Original line number Diff line number Diff line
@@ -1577,9 +1577,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
		}

		/* block accounting for super block */
		spin_lock_irq(&info->delalloc_lock);
		super_used = btrfs_super_bytes_used(&info->super_copy);
		btrfs_set_super_bytes_used(&info->super_copy,
					   super_used - num_bytes);
		spin_unlock_irq(&info->delalloc_lock);

		/* block accounting for root item */
		root_used = btrfs_root_used(&root->root_item);
@@ -1968,8 +1970,10 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
	}

	/* block accounting for super block */
	spin_lock_irq(&info->delalloc_lock);
	super_used = btrfs_super_bytes_used(&info->super_copy);
	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
	spin_unlock_irq(&info->delalloc_lock);

	/* block accounting for root item */
	root_used = btrfs_root_used(&root->root_item);
@@ -2172,12 +2176,12 @@ static void noinline reada_walk_down(struct btrfs_root *root,
				continue;
			}
		}
		mutex_unlock(&root->fs_info->fs_mutex);
		mutex_unlock(&root->fs_info->alloc_mutex);
		ret = readahead_tree_block(root, bytenr, blocksize,
					   btrfs_node_ptr_generation(node, i));
		last = bytenr + blocksize;
		cond_resched();
		mutex_lock(&root->fs_info->fs_mutex);
		mutex_lock(&root->fs_info->alloc_mutex);
		if (ret)
			break;
	}
@@ -2254,11 +2258,9 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
			free_extent_buffer(next);
			reada_walk_down(root, cur, path->slots[*level]);

			mutex_unlock(&root->fs_info->fs_mutex);
			mutex_unlock(&root->fs_info->alloc_mutex);
			next = read_tree_block(root, bytenr, blocksize,
					       ptr_gen);
			mutex_lock(&root->fs_info->fs_mutex);
			mutex_lock(&root->fs_info->alloc_mutex);

			/* we've dropped the lock, double check */
@@ -2381,6 +2383,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
	int orig_level;
	struct btrfs_root_item *root_item = &root->root_item;

	WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
	path = btrfs_alloc_path();
	BUG_ON(!path);

@@ -2710,7 +2713,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root,
		    *last_file_root == ref_root)
			goto out;

		mutex_unlock(&extent_root->fs_info->fs_mutex);
		inode = btrfs_iget_locked(extent_root->fs_info->sb,
					  ref_objectid, found_root);
		if (inode->i_state & I_NEW) {
@@ -2727,7 +2729,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root,
		 * the latest version of the tree root
		 */
		if (is_bad_inode(inode)) {
			mutex_lock(&extent_root->fs_info->fs_mutex);
			goto out;
		}
		*last_file_objectid = inode->i_ino;
@@ -2736,7 +2737,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root,

		relocate_inode_pages(inode, ref_offset, extent_key->offset);
		iput(inode);
		mutex_lock(&extent_root->fs_info->fs_mutex);
	} else {
		struct btrfs_trans_handle *trans;
		struct extent_buffer *eb;
@@ -3033,9 +3033,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)

		if (progress && need_resched()) {
			memcpy(&key, &found_key, sizeof(key));
			mutex_unlock(&root->fs_info->fs_mutex);
			cond_resched();
			mutex_lock(&root->fs_info->fs_mutex);
			btrfs_release_path(root, path);
			btrfs_search_slot(NULL, root, &key, path, 0, 0);
			progress = 0;
@@ -3068,9 +3066,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
		trans = btrfs_start_transaction(tree_root, 1);
		btrfs_commit_transaction(trans, tree_root);

		mutex_unlock(&root->fs_info->fs_mutex);
		btrfs_clean_old_snapshots(tree_root);
		mutex_lock(&root->fs_info->fs_mutex);

		trans = btrfs_start_transaction(tree_root, 1);
		btrfs_commit_transaction(trans, tree_root);
+1 −6
Original line number Diff line number Diff line
@@ -252,7 +252,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
	end_of_last_block = start_pos + num_bytes - 1;

	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
	mutex_lock(&root->fs_info->fs_mutex);
	trans = btrfs_start_transaction(root, 1);
	if (!trans) {
		err = -ENOMEM;
@@ -341,7 +340,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
failed:
	err = btrfs_end_transaction(trans, root);
out_unlock:
	mutex_unlock(&root->fs_info->fs_mutex);
	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
	return err;
}
@@ -905,9 +903,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
		WARN_ON(num_pages > nrptrs);
		memset(pages, 0, sizeof(pages));

		mutex_lock(&root->fs_info->fs_mutex);
		ret = btrfs_check_free_space(root, write_bytes, 0);
		mutex_unlock(&root->fs_info->fs_mutex);
		if (ret)
			goto out;

@@ -998,9 +994,9 @@ static int btrfs_sync_file(struct file *file,
	 * check the transaction that last modified this inode
	 * and see if its already been committed
	 */
	mutex_lock(&root->fs_info->fs_mutex);
	if (!BTRFS_I(inode)->last_trans)
		goto out;

	mutex_lock(&root->fs_info->trans_mutex);
	if (BTRFS_I(inode)->last_trans <=
	    root->fs_info->last_trans_committed) {
@@ -1023,7 +1019,6 @@ static int btrfs_sync_file(struct file *file,
	}
	ret = btrfs_commit_transaction(trans, root);
out:
	mutex_unlock(&root->fs_info->fs_mutex);
	return ret > 0 ? EIO : ret;
}

Loading