Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a60637c8 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds
Browse files

mm: lockless pagecache



Combine page_cache_get_speculative with lockless radix tree lookups to
introduce lockless page cache lookups (ie.  no mapping->tree_lock on the
read-side).

The only atomicity changes this introduces is that the gang pagecache
lookup functions now behave as if they are implemented with multiple
find_get_page calls, rather than operating on a snapshot of the pages.  In
practice, this atomicity guarantee is not used anyway, and it is to
replace individual lookups, so these semantics are natural.

Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Reviewed-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e286781d
Loading
Loading
Loading
Loading
+134 −45
Original line number Diff line number Diff line
@@ -639,13 +639,33 @@ void __lock_page_nosync(struct page *page)
 */
struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
{
	void **pagep;
	struct page *page;

	read_lock_irq(&mapping->tree_lock);
	page = radix_tree_lookup(&mapping->page_tree, offset);
	if (page)
		page_cache_get(page);
	read_unlock_irq(&mapping->tree_lock);
	rcu_read_lock();
repeat:
	page = NULL;
	pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
	if (pagep) {
		page = radix_tree_deref_slot(pagep);
		if (unlikely(!page || page == RADIX_TREE_RETRY))
			goto repeat;

		if (!page_cache_get_speculative(page))
			goto repeat;

		/*
		 * Has the page moved?
		 * This is part of the lockless pagecache protocol. See
		 * include/linux/pagemap.h for details.
		 */
		if (unlikely(page != *pagep)) {
			page_cache_release(page);
			goto repeat;
		}
	}
	rcu_read_unlock();

	return page;
}
EXPORT_SYMBOL(find_get_page);
@@ -660,32 +680,22 @@ EXPORT_SYMBOL(find_get_page);
 *
 * Returns zero if the page was not present. find_lock_page() may sleep.
 */
struct page *find_lock_page(struct address_space *mapping,
				pgoff_t offset)
struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
{
	struct page *page;

repeat:
	read_lock_irq(&mapping->tree_lock);
	page = radix_tree_lookup(&mapping->page_tree, offset);
	page = find_get_page(mapping, offset);
	if (page) {
		page_cache_get(page);
		if (TestSetPageLocked(page)) {
			read_unlock_irq(&mapping->tree_lock);
			__lock_page(page);

			/* Has the page been truncated while we slept? */
		lock_page(page);
		/* Has the page been truncated? */
		if (unlikely(page->mapping != mapping)) {
			unlock_page(page);
			page_cache_release(page);
			goto repeat;
		}
		VM_BUG_ON(page->index != offset);
			goto out;
		}
	}
	read_unlock_irq(&mapping->tree_lock);
out:
	return page;
}
EXPORT_SYMBOL(find_lock_page);
@@ -751,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
{
	unsigned int i;
	unsigned int ret;
	unsigned int nr_found;

	rcu_read_lock();
restart:
	nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
				(void ***)pages, start, nr_pages);
	ret = 0;
	for (i = 0; i < nr_found; i++) {
		struct page *page;
repeat:
		page = radix_tree_deref_slot((void **)pages[i]);
		if (unlikely(!page))
			continue;
		/*
		 * this can only trigger if nr_found == 1, making livelock
		 * a non issue.
		 */
		if (unlikely(page == RADIX_TREE_RETRY))
			goto restart;

		if (!page_cache_get_speculative(page))
			goto repeat;

		/* Has the page moved? */
		if (unlikely(page != *((void **)pages[i]))) {
			page_cache_release(page);
			goto repeat;
		}

	read_lock_irq(&mapping->tree_lock);
	ret = radix_tree_gang_lookup(&mapping->page_tree,
				(void **)pages, start, nr_pages);
	for (i = 0; i < ret; i++)
		page_cache_get(pages[i]);
	read_unlock_irq(&mapping->tree_lock);
		pages[ret] = page;
		ret++;
	}
	rcu_read_unlock();
	return ret;
}

@@ -778,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
{
	unsigned int i;
	unsigned int ret;
	unsigned int nr_found;

	rcu_read_lock();
restart:
	nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
				(void ***)pages, index, nr_pages);
	ret = 0;
	for (i = 0; i < nr_found; i++) {
		struct page *page;
repeat:
		page = radix_tree_deref_slot((void **)pages[i]);
		if (unlikely(!page))
			continue;
		/*
		 * this can only trigger if nr_found == 1, making livelock
		 * a non issue.
		 */
		if (unlikely(page == RADIX_TREE_RETRY))
			goto restart;

	read_lock_irq(&mapping->tree_lock);
	ret = radix_tree_gang_lookup(&mapping->page_tree,
				(void **)pages, index, nr_pages);
	for (i = 0; i < ret; i++) {
		if (pages[i]->mapping == NULL || pages[i]->index != index)
		if (page->mapping == NULL || page->index != index)
			break;

		page_cache_get(pages[i]);
		if (!page_cache_get_speculative(page))
			goto repeat;

		/* Has the page moved? */
		if (unlikely(page != *((void **)pages[i]))) {
			page_cache_release(page);
			goto repeat;
		}

		pages[ret] = page;
		ret++;
		index++;
	}
	read_unlock_irq(&mapping->tree_lock);
	return i;
	rcu_read_unlock();
	return ret;
}
EXPORT_SYMBOL(find_get_pages_contig);

@@ -810,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
{
	unsigned int i;
	unsigned int ret;
	unsigned int nr_found;

	rcu_read_lock();
restart:
	nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
				(void ***)pages, *index, nr_pages, tag);
	ret = 0;
	for (i = 0; i < nr_found; i++) {
		struct page *page;
repeat:
		page = radix_tree_deref_slot((void **)pages[i]);
		if (unlikely(!page))
			continue;
		/*
		 * this can only trigger if nr_found == 1, making livelock
		 * a non issue.
		 */
		if (unlikely(page == RADIX_TREE_RETRY))
			goto restart;

		if (!page_cache_get_speculative(page))
			goto repeat;

		/* Has the page moved? */
		if (unlikely(page != *((void **)pages[i]))) {
			page_cache_release(page);
			goto repeat;
		}

		pages[ret] = page;
		ret++;
	}
	rcu_read_unlock();

	read_lock_irq(&mapping->tree_lock);
	ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
				(void **)pages, *index, nr_pages, tag);
	for (i = 0; i < ret; i++)
		page_cache_get(pages[i]);
	if (ret)
		*index = pages[ret - 1]->index + 1;
	read_unlock_irq(&mapping->tree_lock);

	return ret;
}
EXPORT_SYMBOL(find_get_pages_tag);