Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 890871be authored by Chris Mason's avatar Chris Mason
Browse files

Btrfs: switch extent_map to a rw lock



There are two main users of the extent_map tree.  The
first is regular file inodes, where it is evenly spread
between readers and writers.

The second is the chunk allocation tree, which maps blocks from
logical addresses to phyiscal ones, and it is 99.99% reads.

The mapping tree is a point of lock contention during heavy IO
workloads, so this commit switches things to a rw lock.

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 57fd5a5f
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -507,10 +507,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
		 */
		set_page_extent_mapped(page);
		lock_extent(tree, last_offset, end, GFP_NOFS);
		spin_lock(&em_tree->lock);
		read_lock(&em_tree->lock);
		em = lookup_extent_mapping(em_tree, last_offset,
					   PAGE_CACHE_SIZE);
		spin_unlock(&em_tree->lock);
		read_unlock(&em_tree->lock);

		if (!em || last_offset < em->start ||
		    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
@@ -594,11 +594,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
	em_tree = &BTRFS_I(inode)->extent_tree;

	/* we need the actual starting offset of this extent in the file */
	spin_lock(&em_tree->lock);
	read_lock(&em_tree->lock);
	em = lookup_extent_mapping(em_tree,
				   page_offset(bio->bi_io_vec->bv_page),
				   PAGE_CACHE_SIZE);
	spin_unlock(&em_tree->lock);
	read_unlock(&em_tree->lock);

	compressed_len = em->block_len;
	cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
+7 −7
Original line number Diff line number Diff line
@@ -121,15 +121,15 @@ static struct extent_map *btree_get_extent(struct inode *inode,
	struct extent_map *em;
	int ret;

	spin_lock(&em_tree->lock);
	read_lock(&em_tree->lock);
	em = lookup_extent_mapping(em_tree, start, len);
	if (em) {
		em->bdev =
			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
		spin_unlock(&em_tree->lock);
		read_unlock(&em_tree->lock);
		goto out;
	}
	spin_unlock(&em_tree->lock);
	read_unlock(&em_tree->lock);

	em = alloc_extent_map(GFP_NOFS);
	if (!em) {
@@ -142,7 +142,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
	em->block_start = 0;
	em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;

	spin_lock(&em_tree->lock);
	write_lock(&em_tree->lock);
	ret = add_extent_mapping(em_tree, em);
	if (ret == -EEXIST) {
		u64 failed_start = em->start;
@@ -161,7 +161,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
		free_extent_map(em);
		em = NULL;
	}
	spin_unlock(&em_tree->lock);
	write_unlock(&em_tree->lock);

	if (ret)
		em = ERR_PTR(ret);
@@ -1323,9 +1323,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
	offset = page_offset(page);

	em_tree = &BTRFS_I(inode)->extent_tree;
	spin_lock(&em_tree->lock);
	read_lock(&em_tree->lock);
	em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
	spin_unlock(&em_tree->lock);
	read_unlock(&em_tree->lock);
	if (!em) {
		__unplug_io_fn(bdi, page);
		return;
+2 −2
Original line number Diff line number Diff line
@@ -5396,9 +5396,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
	lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
	while (1) {
		int ret;
		spin_lock(&em_tree->lock);
		write_lock(&em_tree->lock);
		ret = add_extent_mapping(em_tree, em);
		spin_unlock(&em_tree->lock);
		write_unlock(&em_tree->lock);
		if (ret != -EEXIST) {
			free_extent_map(em);
			break;
+4 −4
Original line number Diff line number Diff line
@@ -2786,15 +2786,15 @@ int try_release_extent_mapping(struct extent_map_tree *map,
		u64 len;
		while (start <= end) {
			len = end - start + 1;
			spin_lock(&map->lock);
			write_lock(&map->lock);
			em = lookup_extent_mapping(map, start, len);
			if (!em || IS_ERR(em)) {
				spin_unlock(&map->lock);
				write_unlock(&map->lock);
				break;
			}
			if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
			    em->start != start) {
				spin_unlock(&map->lock);
				write_unlock(&map->lock);
				free_extent_map(em);
				break;
			}
@@ -2808,7 +2808,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
				free_extent_map(em);
			}
			start = extent_map_end(em);
			spin_unlock(&map->lock);
			write_unlock(&map->lock);

			/* once for us */
			free_extent_map(em);
+1 −4
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ void extent_map_exit(void)
void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
{
	tree->map.rb_node = NULL;
	spin_lock_init(&tree->lock);
	rwlock_init(&tree->lock);
}

/**
@@ -222,7 +222,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
		ret = -EEXIST;
		goto out;
	}
	assert_spin_locked(&tree->lock);
	rb = tree_insert(&tree->map, em->start, &em->rb_node);
	if (rb) {
		ret = -EEXIST;
@@ -285,7 +284,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
	struct rb_node *next = NULL;
	u64 end = range_end(start, len);

	assert_spin_locked(&tree->lock);
	rb_node = __tree_search(&tree->map, start, &prev, &next);
	if (!rb_node && prev) {
		em = rb_entry(prev, struct extent_map, rb_node);
@@ -331,7 +329,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
	int ret = 0;

	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
	assert_spin_locked(&tree->lock);
	rb_erase(&em->rb_node, &tree->map);
	em->in_tree = 0;
	return ret;
Loading