Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 70dec807 authored by Chris Mason's avatar Chris Mason
Browse files

Btrfs: extent_io and extent_state optimizations



The end_bio routines are changed to take a pointer to the extent state
struct, and the state tree is walked in order to set/clear appropriate
bits as IO completes.  This greatly reduces the number of rbtree searches
done by the end_bio handlers, and reduces lock contention.

The extent_io releasepage function is changed to avoid expensive searches
for locked state.

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent aadfeb6e
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -274,7 +274,7 @@ int btree_readpage(struct file *file, struct page *page)
	return extent_read_full_page(tree, page, btree_get_extent);
}

static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)
static int btree_releasepage(struct page *page, gfp_t gfp_flags)
{
	struct extent_io_tree *tree;
	struct extent_map_tree *map;
@@ -282,7 +282,7 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)

	tree = &BTRFS_I(page->mapping->host)->io_tree;
	map = &BTRFS_I(page->mapping->host)->extent_tree;
	ret = try_release_extent_mapping(map, tree, page);
	ret = try_release_extent_mapping(map, tree, page, gfp_flags);
	if (ret == 1) {
		ClearPagePrivate(page);
		set_page_private(page, 0);
+263 −95
Original line number Diff line number Diff line
@@ -27,13 +27,11 @@ static struct kmem_cache *extent_buffer_cache;
static LIST_HEAD(buffers);
static LIST_HEAD(states);

static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
#define BUFFER_LRU_MAX 64

struct tree_entry {
	u64 start;
	u64 end;
	int in_tree;
	struct rb_node rb_node;
};

@@ -69,7 +67,7 @@ void extent_io_exit(void)

	while (!list_empty(&states)) {
		state = list_entry(states.next, struct extent_state, list);
		printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
		printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs));
		list_del(&state->list);
		kmem_cache_free(extent_state_cache, state);

@@ -87,7 +85,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
	tree->state.rb_node = NULL;
	tree->ops = NULL;
	tree->dirty_bytes = 0;
	rwlock_init(&tree->lock);
	spin_lock_init(&tree->lock);
	spin_lock_init(&tree->lru_lock);
	tree->mapping = mapping;
	INIT_LIST_HEAD(&tree->buffer_lru);
@@ -110,18 +108,13 @@ EXPORT_SYMBOL(extent_io_tree_empty_lru);
struct extent_state *alloc_extent_state(gfp_t mask)
{
	struct extent_state *state;
	unsigned long flags;

	state = kmem_cache_alloc(extent_state_cache, mask);
	if (!state || IS_ERR(state))
		return state;
	state->state = 0;
	state->in_tree = 0;
	state->private = 0;

	spin_lock_irqsave(&state_lock, flags);
	list_add(&state->list, &states);
	spin_unlock_irqrestore(&state_lock, flags);
	state->tree = NULL;

	atomic_set(&state->refs, 1);
	init_waitqueue_head(&state->wq);
@@ -131,14 +124,10 @@ EXPORT_SYMBOL(alloc_extent_state);

void free_extent_state(struct extent_state *state)
{
	unsigned long flags;
	if (!state)
		return;
	if (atomic_dec_and_test(&state->refs)) {
		WARN_ON(state->in_tree);
		spin_lock_irqsave(&state_lock, flags);
		list_del(&state->list);
		spin_unlock_irqrestore(&state_lock, flags);
		WARN_ON(state->tree);
		kmem_cache_free(extent_state_cache, state);
	}
}
@@ -164,7 +153,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
	}

	entry = rb_entry(node, struct tree_entry, rb_node);
	entry->in_tree = 1;
	rb_link_node(node, parent, p);
	rb_insert_color(node, root);
	return NULL;
@@ -216,8 +204,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,

static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
{
	struct rb_node *prev;
	struct rb_node *prev = NULL;
	struct rb_node *ret;

	ret = __tree_search(root, offset, &prev, NULL);
	if (!ret)
		return prev;
@@ -248,7 +237,7 @@ static int merge_state(struct extent_io_tree *tree,
		if (other->end == state->start - 1 &&
		    other->state == state->state) {
			state->start = other->start;
			other->in_tree = 0;
			other->tree = NULL;
			rb_erase(&other->rb_node, &tree->state);
			free_extent_state(other);
		}
@@ -259,7 +248,7 @@ static int merge_state(struct extent_io_tree *tree,
		if (other->start == state->end + 1 &&
		    other->state == state->state) {
			other->start = state->start;
			state->in_tree = 0;
			state->tree = NULL;
			rb_erase(&state->rb_node, &tree->state);
			free_extent_state(state);
		}
@@ -300,6 +289,7 @@ static int insert_state(struct extent_io_tree *tree,
		free_extent_state(state);
		return -EEXIST;
	}
	state->tree = tree;
	merge_state(tree, state);
	return 0;
}
@@ -335,6 +325,7 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
		free_extent_state(prealloc);
		return -EEXIST;
	}
	prealloc->tree = tree;
	return 0;
}

@@ -361,9 +352,9 @@ static int clear_state_bit(struct extent_io_tree *tree,
	if (wake)
		wake_up(&state->wq);
	if (delete || state->state == 0) {
		if (state->in_tree) {
		if (state->tree) {
			rb_erase(&state->rb_node, &tree->state);
			state->in_tree = 0;
			state->tree = NULL;
			free_extent_state(state);
		} else {
			WARN_ON(1);
@@ -404,7 +395,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
			return -ENOMEM;
	}

	write_lock_irqsave(&tree->lock, flags);
	spin_lock_irqsave(&tree->lock, flags);
	/*
	 * this search will find the extents that end after
	 * our range starts
@@ -434,6 +425,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
	 */

	if (state->start < start) {
		if (!prealloc)
			prealloc = alloc_extent_state(GFP_ATOMIC);
		err = split_state(tree, state, prealloc, start);
		BUG_ON(err == -EEXIST);
		prealloc = NULL;
@@ -455,6 +448,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
	 * on the first half
	 */
	if (state->start <= end && state->end > end) {
		if (!prealloc)
			prealloc = alloc_extent_state(GFP_ATOMIC);
		err = split_state(tree, state, prealloc, end + 1);
		BUG_ON(err == -EEXIST);

@@ -471,7 +466,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
	goto search_again;

out:
	write_unlock_irqrestore(&tree->lock, flags);
	spin_unlock_irqrestore(&tree->lock, flags);
	if (prealloc)
		free_extent_state(prealloc);

@@ -480,7 +475,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
search_again:
	if (start > end)
		goto out;
	write_unlock_irqrestore(&tree->lock, flags);
	spin_unlock_irqrestore(&tree->lock, flags);
	if (mask & __GFP_WAIT)
		cond_resched();
	goto again;
@@ -492,9 +487,9 @@ static int wait_on_state(struct extent_io_tree *tree,
{
	DEFINE_WAIT(wait);
	prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
	read_unlock_irq(&tree->lock);
	spin_unlock_irq(&tree->lock);
	schedule();
	read_lock_irq(&tree->lock);
	spin_lock_irq(&tree->lock);
	finish_wait(&state->wq, &wait);
	return 0;
}
@@ -509,7 +504,7 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
	struct extent_state *state;
	struct rb_node *node;

	read_lock_irq(&tree->lock);
	spin_lock_irq(&tree->lock);
again:
	while (1) {
		/*
@@ -538,13 +533,13 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
			break;

		if (need_resched()) {
			read_unlock_irq(&tree->lock);
			spin_unlock_irq(&tree->lock);
			cond_resched();
			read_lock_irq(&tree->lock);
			spin_lock_irq(&tree->lock);
		}
	}
out:
	read_unlock_irq(&tree->lock);
	spin_unlock_irq(&tree->lock);
	return 0;
}
EXPORT_SYMBOL(wait_extent_bit);
@@ -589,7 +584,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
			return -ENOMEM;
	}

	write_lock_irqsave(&tree->lock, flags);
	spin_lock_irqsave(&tree->lock, flags);
	/*
	 * this search will find all the extents that end after
	 * our range starts.
@@ -709,7 +704,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
	goto search_again;

out:
	write_unlock_irqrestore(&tree->lock, flags);
	spin_unlock_irqrestore(&tree->lock, flags);
	if (prealloc)
		free_extent_state(prealloc);

@@ -718,7 +713,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
search_again:
	if (start > end)
		goto out;
	write_unlock_irqrestore(&tree->lock, flags);
	spin_unlock_irqrestore(&tree->lock, flags);
	if (mask & __GFP_WAIT)
		cond_resched();
	goto again;
@@ -817,10 +812,6 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
}
EXPORT_SYMBOL(wait_on_extent_writeback);

/*
 * locks a range in ascending order, waiting for any locked regions
 * it hits on the way.  [start,end] are inclusive, and this will sleep.
 */
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
{
	int err;
@@ -896,7 +887,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
	struct extent_state *state;
	int ret = 1;

	read_lock_irq(&tree->lock);
	spin_lock_irq(&tree->lock);
	/*
	 * this search will find all the extents that end after
	 * our range starts.
@@ -919,7 +910,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
			break;
	}
out:
	read_unlock_irq(&tree->lock);
	spin_unlock_irq(&tree->lock);
	return ret;
}
EXPORT_SYMBOL(find_first_extent_bit);
@@ -933,7 +924,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
	u64 found = 0;
	u64 total_bytes = 0;

	write_lock_irq(&tree->lock);
	spin_lock_irq(&tree->lock);
	/*
	 * this search will find all the extents that end after
	 * our range starts.
@@ -976,9 +967,9 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
			atomic_inc(&state->refs);
			prepare_to_wait(&state->wq, &wait,
					TASK_UNINTERRUPTIBLE);
			write_unlock_irq(&tree->lock);
			spin_unlock_irq(&tree->lock);
			schedule();
			write_lock_irq(&tree->lock);
			spin_lock_irq(&tree->lock);
			finish_wait(&state->wq, &wait);
			free_extent_state(state);
			goto search_again;
@@ -997,7 +988,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
			break;
	}
out:
	write_unlock_irq(&tree->lock);
	spin_unlock_irq(&tree->lock);
	return found;
}

@@ -1017,7 +1008,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
		return 0;
	}

	write_lock_irq(&tree->lock);
	spin_lock_irq(&tree->lock);
	if (cur_start == 0 && bits == EXTENT_DIRTY) {
		total_bytes = tree->dirty_bytes;
		goto out;
@@ -1050,7 +1041,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
			break;
	}
out:
	write_unlock_irq(&tree->lock);
	spin_unlock_irq(&tree->lock);
	return total_bytes;
}
/*
@@ -1122,7 +1113,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
	struct extent_state *state;
	int ret = 0;

	write_lock_irq(&tree->lock);
	spin_lock_irq(&tree->lock);
	/*
	 * this search will find all the extents that end after
	 * our range starts.
@@ -1139,7 +1130,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
	}
	state->private = private;
out:
	write_unlock_irq(&tree->lock);
	spin_unlock_irq(&tree->lock);
	return ret;
}

@@ -1149,7 +1140,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
	struct extent_state *state;
	int ret = 0;

	read_lock_irq(&tree->lock);
	spin_lock_irq(&tree->lock);
	/*
	 * this search will find all the extents that end after
	 * our range starts.
@@ -1166,13 +1157,13 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
	}
	*private = state->private;
out:
	read_unlock_irq(&tree->lock);
	spin_unlock_irq(&tree->lock);
	return ret;
}

/*
 * searches a range in the state tree for a given mask.
 * If 'filled' == 1, this returns 1 only if ever extent in the tree
 * If 'filled' == 1, this returns 1 only if every extent in the tree
 * has the bits set.  Otherwise, 1 is returned if any bit in the
 * range is found set.
 */
@@ -1184,7 +1175,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
	int bitset = 0;
	unsigned long flags;

	read_lock_irqsave(&tree->lock, flags);
	spin_lock_irqsave(&tree->lock, flags);
	node = tree_search(&tree->state, start);
	while (node && start <= end) {
		state = rb_entry(node, struct extent_state, rb_node);
@@ -1215,7 +1206,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
			break;
		}
	}
	read_unlock_irqrestore(&tree->lock, flags);
	spin_unlock_irqrestore(&tree->lock, flags);
	return bitset;
}
EXPORT_SYMBOL(test_range_bit);
@@ -1282,16 +1273,19 @@ static int end_bio_extent_writepage(struct bio *bio,
{
	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
	struct extent_io_tree *tree = bio->bi_private;
	struct extent_state *state = bio->bi_private;
	struct extent_io_tree *tree = state->tree;
	struct rb_node *node;
	u64 start;
	u64 end;
	u64 cur;
	int whole_page;
	unsigned long flags;

#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
	if (bio->bi_size)
		return 1;
#endif

	do {
		struct page *page = bvec->bv_page;
		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1311,16 +1305,80 @@ static int end_bio_extent_writepage(struct bio *bio,
			ClearPageUptodate(page);
			SetPageError(page);
		}
		clear_extent_writeback(tree, start, end, GFP_ATOMIC);

		if (tree->ops && tree->ops->writepage_end_io_hook) {
			tree->ops->writepage_end_io_hook(page, start, end,
							 state);
		}

		/*
		 * bios can get merged in funny ways, and so we need to
		 * be careful with the state variable.  We know the
		 * state won't be merged with others because it has
		 * WRITEBACK set, but we can't be sure each biovec is
		 * sequential in the file.  So, if our cached state
		 * doesn't match the expected end, search the tree
		 * for the correct one.
		 */

		spin_lock_irqsave(&tree->lock, flags);
		if (!state || state->end != end) {
			state = NULL;
			node = __tree_search(&tree->state, start, NULL, NULL);
			if (node) {
				state = rb_entry(node, struct extent_state,
						 rb_node);
				if (state->end != end ||
				    !(state->state & EXTENT_WRITEBACK))
					state = NULL;
			}
			if (!state) {
				spin_unlock_irqrestore(&tree->lock, flags);
				clear_extent_writeback(tree, start,
						       end, GFP_ATOMIC);
				goto next_io;
			}
		}
		cur = end;
		while(1) {
			struct extent_state *clear = state;
			cur = state->start;
			node = rb_prev(&state->rb_node);
			if (node) {
				state = rb_entry(node,
						 struct extent_state,
						 rb_node);
			} else {
				state = NULL;
			}

			clear_state_bit(tree, clear, EXTENT_WRITEBACK,
					1, 0);
			if (cur == start)
				break;
			if (cur < start) {
				WARN_ON(1);
				break;
			}
			if (!node)
				break;
		}
		/* before releasing the lock, make sure the next state
		 * variable has the expected bits set and corresponds
		 * to the correct offsets in the file
		 */
		if (state && (state->end + 1 != start ||
		    !state->state & EXTENT_WRITEBACK)) {
			state = NULL;
		}
		spin_unlock_irqrestore(&tree->lock, flags);
next_io:

		if (whole_page)
			end_page_writeback(page);
		else
			check_page_writeback(tree, page);
		if (tree->ops && tree->ops->writepage_end_io_hook)
			tree->ops->writepage_end_io_hook(page, start, end);
	} while (bvec >= bio->bi_io_vec);

	bio_put(bio);
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
	return 0;
@@ -1347,9 +1405,13 @@ static int end_bio_extent_readpage(struct bio *bio,
{
	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
	struct extent_io_tree *tree = bio->bi_private;
	struct extent_state *state = bio->bi_private;
	struct extent_io_tree *tree = state->tree;
	struct rb_node *node;
	u64 start;
	u64 end;
	u64 cur;
	unsigned long flags;
	int whole_page;
	int ret;

@@ -1373,27 +1435,83 @@ static int end_bio_extent_readpage(struct bio *bio,
			prefetchw(&bvec->bv_page->flags);

		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
			ret = tree->ops->readpage_end_io_hook(page, start, end);
			ret = tree->ops->readpage_end_io_hook(page, start, end,
							      state);
			if (ret)
				uptodate = 0;
		}

		spin_lock_irqsave(&tree->lock, flags);
		if (!state || state->end != end) {
			state = NULL;
			node = __tree_search(&tree->state, start, NULL, NULL);
			if (node) {
				state = rb_entry(node, struct extent_state,
						 rb_node);
				if (state->end != end ||
				    !(state->state & EXTENT_LOCKED))
					state = NULL;
			}
			if (!state) {
				spin_unlock_irqrestore(&tree->lock, flags);
				set_extent_uptodate(tree, start, end,
						    GFP_ATOMIC);
				unlock_extent(tree, start, end, GFP_ATOMIC);
				goto next_io;
			}
		}

		cur = end;
		while(1) {
			struct extent_state *clear = state;
			cur = state->start;
			node = rb_prev(&state->rb_node);
			if (node) {
				state = rb_entry(node,
					 struct extent_state,
					 rb_node);
			} else {
				state = NULL;
			}
			clear->state |= EXTENT_UPTODATE;
			clear_state_bit(tree, clear, EXTENT_LOCKED,
					1, 0);
			if (cur == start)
				break;
			if (cur < start) {
				WARN_ON(1);
				break;
			}
			if (!node)
				break;
		}
		/* before releasing the lock, make sure the next state
		 * variable has the expected bits set and corresponds
		 * to the correct offsets in the file
		 */
		if (state && (state->end + 1 != start ||
		    !state->state & EXTENT_WRITEBACK)) {
			state = NULL;
		}
		spin_unlock_irqrestore(&tree->lock, flags);
next_io:
		if (whole_page) {
			if (uptodate) {
			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
			if (whole_page)
				SetPageUptodate(page);
			else
				check_page_uptodate(tree, page);
			} else {
				ClearPageUptodate(page);
				SetPageError(page);
			}

		unlock_extent(tree, start, end, GFP_ATOMIC);

		if (whole_page)
			unlock_page(page);
		else
		} else {
			if (uptodate) {
				check_page_uptodate(tree, page);
			} else {
				ClearPageUptodate(page);
				SetPageError(page);
			}
			check_page_locked(tree, page);
		}
	} while (bvec >= bio->bi_io_vec);

	bio_put(bio);
@@ -1416,7 +1534,8 @@ static int end_bio_extent_preparewrite(struct bio *bio,
{
	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
	struct extent_io_tree *tree = bio->bi_private;
	struct extent_state *state = bio->bi_private;
	struct extent_io_tree *tree = state->tree;
	u64 start;
	u64 end;

@@ -1475,6 +1594,29 @@ static int submit_one_bio(int rw, struct bio *bio)
{
	u64 maxsector;
	int ret = 0;
	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
	struct page *page = bvec->bv_page;
	struct extent_io_tree *tree = bio->bi_private;
	struct rb_node *node;
	struct extent_state *state;
	u64 start;
	u64 end;

	start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
	end = start + bvec->bv_len - 1;

	spin_lock_irq(&tree->lock);
	node = __tree_search(&tree->state, start, NULL, NULL);
	BUG_ON(!node);
	state = rb_entry(node, struct extent_state, rb_node);
	while(state->end < end) {
		node = rb_next(node);
		state = rb_entry(node, struct extent_state, rb_node);
	}
	BUG_ON(state->end != end);
	spin_unlock_irq(&tree->lock);

	bio->bi_private = state;

	bio_get(bio);

@@ -1519,9 +1661,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
	if (!bio) {
		printk("failed to allocate bio nr %d\n", nr);
	}


	bio_add_page(bio, page, size, offset);
	bio->bi_end_io = end_io_func;
	bio->bi_private = tree;

	if (bio_ret) {
		*bio_ret = bio;
	} else {
@@ -1635,6 +1780,16 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
			page_offset += iosize;
			continue;
		}
		/* we have an inline extent but it didn't get marked up
		 * to date.  Error out
		 */
		if (block_start == EXTENT_MAP_INLINE) {
			SetPageError(page);
			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
			cur = cur + iosize;
			page_offset += iosize;
			continue;
		}

		ret = 0;
		if (tree->ops && tree->ops->readpage_io_hook) {
@@ -2205,7 +2360,8 @@ EXPORT_SYMBOL(extent_prepare_write);
 * map records are removed
 */
int try_release_extent_mapping(struct extent_map_tree *map,
			       struct extent_io_tree *tree, struct page *page)
			       struct extent_io_tree *tree, struct page *page,
			       gfp_t mask)
{
	struct extent_map *em;
	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
@@ -2213,6 +2369,8 @@ int try_release_extent_mapping(struct extent_map_tree *map,
	u64 orig_start = start;
	int ret = 1;

	if ((mask & __GFP_WAIT) &&
	    page->mapping->host->i_size > 16 * 1024 * 1024) {
		while (start <= end) {
			spin_lock(&map->lock);
			em = lookup_extent_mapping(map, start, end);
@@ -2220,7 +2378,13 @@ int try_release_extent_mapping(struct extent_map_tree *map,
				spin_unlock(&map->lock);
				break;
			}
		if (!test_range_bit(tree, em->start, extent_map_end(em) - 1,
			if (em->start != start) {
				spin_unlock(&map->lock);
				free_extent_map(em);
				break;
			}
			if (!test_range_bit(tree, em->start,
					    extent_map_end(em) - 1,
					    EXTENT_LOCKED, 0)) {
				remove_extent_mapping(map, em);
				/* once for the rb tree */
@@ -2232,11 +2396,15 @@ int try_release_extent_mapping(struct extent_map_tree *map,
			/* once for us */
			free_extent_map(em);
		}
	if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
	}
	if (test_range_bit(tree, orig_start, end, EXTENT_IOBITS, 0))
		ret = 0;
	else
	else {
		if ((mask & GFP_NOFS) == GFP_NOFS)
			mask = GFP_NOFS;
		clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
				 1, 1, GFP_NOFS);
				 1, 1, mask);
	}
	return ret;
}
EXPORT_SYMBOL(try_release_extent_mapping);
@@ -2553,13 +2721,13 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
			}
		}
		clear_page_dirty_for_io(page);
		write_lock_irq(&page->mapping->tree_lock);
		read_lock_irq(&page->mapping->tree_lock);
		if (!PageDirty(page)) {
			radix_tree_tag_clear(&page->mapping->page_tree,
						page_index(page),
						PAGECACHE_TAG_DIRTY);
		}
		write_unlock_irq(&page->mapping->tree_lock);
		read_unlock_irq(&page->mapping->tree_lock);
		unlock_page(page);
	}
	return 0;
+10 −5
Original line number Diff line number Diff line
@@ -23,19 +23,23 @@
#define EXTENT_PAGE_PRIVATE 1
#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3

struct extent_state;

struct extent_io_ops {
	int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
	int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
	int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
	int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end);
	void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end);
	int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
				    struct extent_state *state);
	void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
				      struct extent_state *state);
};

struct extent_io_tree {
	struct rb_root state;
	struct address_space *mapping;
	u64 dirty_bytes;
	rwlock_t lock;
	spinlock_t lock;
	struct extent_io_ops *ops;
	spinlock_t lru_lock;
	struct list_head buffer_lru;
@@ -45,8 +49,8 @@ struct extent_io_tree {
struct extent_state {
	u64 start;
	u64 end; /* inclusive */
	int in_tree;
	struct rb_node rb_node;
	struct extent_io_tree *tree;
	wait_queue_head_t wq;
	atomic_t refs;
	unsigned long state;
@@ -82,7 +86,8 @@ void extent_io_tree_init(struct extent_io_tree *tree,
			  struct address_space *mapping, gfp_t mask);
void extent_io_tree_empty_lru(struct extent_io_tree *tree);
int try_release_extent_mapping(struct extent_map_tree *map,
			       struct extent_io_tree *tree, struct page *page);
			       struct extent_io_tree *tree, struct page *page,
			       gfp_t mask);
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
+23 −12
Original line number Diff line number Diff line
@@ -331,7 +331,8 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
	return ret;
}

int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end)
int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
			       struct extent_state *state)
{
	size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
	struct inode *inode = page->mapping->host;
@@ -347,7 +348,12 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end)
	    btrfs_test_flag(inode, NODATASUM))
		return 0;

	if (state->start == start) {
		private = state->private;
		ret = 0;
	} else {
		ret = get_state_private(io_tree, start, &private);
	}
	local_irq_save(flags);
	kaddr = kmap_atomic(page, KM_IRQ0);
	if (ret) {
@@ -1830,7 +1836,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}

struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
				    size_t page_offset, u64 start, u64 len,
				    size_t pg_offset, u64 start, u64 len,
				    int create)
{
	int ret;
@@ -1865,6 +1871,9 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
			       start, len, em->start, em->len);
			WARN_ON(1);
		}
		if (em->block_start == EXTENT_MAP_INLINE && page)
			free_extent_map(em);
		else
			goto out;
	}
	em = alloc_extent_map(GFP_NOFS);
@@ -1930,6 +1939,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
		em->len = extent_end - extent_start;
		goto insert;
	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
		u64 page_start;
		unsigned long ptr;
		char *map;
		size_t size;
@@ -1959,16 +1969,17 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
			goto out;
		}

		extent_offset = ((u64)page->index << PAGE_CACHE_SHIFT) -
			extent_start + page_offset;
		copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset,
		page_start = page_offset(page) + pg_offset;
		extent_offset = page_start - extent_start;
		copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
				size - extent_offset);
		em->start = extent_start + extent_offset;
		em->len = copy_size;
		em->len = (copy_size + root->sectorsize - 1) &
			~((u64)root->sectorsize - 1);
		map = kmap(page);
		ptr = btrfs_file_extent_inline_start(item) + extent_offset;
		if (create == 0 && !PageUptodate(page)) {
			read_extent_buffer(leaf, map + page_offset, ptr,
			read_extent_buffer(leaf, map + pg_offset, ptr,
					   copy_size);
			flush_dcache_page(page);
		} else if (create && PageUptodate(page)) {
@@ -1980,7 +1991,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
				trans = btrfs_start_transaction(root, 1);
				goto again;
			}
			write_extent_buffer(leaf, map + page_offset, ptr,
			write_extent_buffer(leaf, map + pg_offset, ptr,
					    copy_size);
			btrfs_mark_buffer_dirty(leaf);
		}
@@ -2077,7 +2088,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping,
				btrfs_get_extent);
}

static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
	struct extent_io_tree *tree;
	struct extent_map_tree *map;
@@ -2085,7 +2096,7 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)

	tree = &BTRFS_I(page->mapping->host)->io_tree;
	map = &BTRFS_I(page->mapping->host)->extent_tree;
	ret = try_release_extent_mapping(map, tree, page);
	ret = try_release_extent_mapping(map, tree, page, gfp_flags);
	if (ret == 1) {
		ClearPagePrivate(page);
		set_page_private(page, 0);