mm: speculative page references (e286781d) · Commits · e / devices / android_kernel_xiaomi_markw

drivers/net/cassini.c

+12 −0

Original line number	Diff line number	Diff line
		@@ -576,6 +576,18 @@ static void cas_spare_recover(struct cas *cp, const gfp_t flags)
		list_for_each_safe(elem, tmp, &list) {
		cas_page_t *page = list_entry(elem, cas_page_t, list);

		/*
		* With the lockless pagecache, cassini buffering scheme gets
		* slightly less accurate: we might find that a page has an
		* elevated reference count here, due to a speculative ref,
		* and skip it as in-use. Ideally we would be able to reclaim
		* it. However this would be such a rare case, it doesn't
		* matter too much as we should pick it up the next time round.
		*
		* Importantly, if we find that the page has a refcount of 1
		* here (our refcount), then we know it is definitely not inuse
		* so we can reuse it.
		*/
		if (page_count(page->buffer) > 1)
		continue;

include/linux/pagemap.h

+110 −1

Original line number	Diff line number	Diff line
		@@ -12,6 +12,7 @@
		#include <asm/uaccess.h>
		#include <linux/gfp.h>
		#include <linux/bitops.h>
		#include <linux/hardirq.h> /* for in_interrupt() */

		/*
		* Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page
		@@ -62,6 +63,98 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
		#define page_cache_release(page) put_page(page)
		void release_pages(struct page **pages, int nr, int cold);

		/*
		* speculatively take a reference to a page.
		* If the page is free (_count == 0), then _count is untouched, and 0
		* is returned. Otherwise, _count is incremented by 1 and 1 is returned.
		*
		* This function must be called inside the same rcu_read_lock() section as has
		* been used to lookup the page in the pagecache radix-tree (or page table):
		* this allows allocators to use a synchronize_rcu() to stabilize _count.
		*
		* Unless an RCU grace period has passed, the count of all pages coming out
		* of the allocator must be considered unstable. page_count may return higher
		* than expected, and put_page must be able to do the right thing when the
		* page has been finished with, no matter what it is subsequently allocated
		* for (because put_page is what is used here to drop an invalid speculative
		* reference).
		*
		* This is the interesting part of the lockless pagecache (and lockless
		* get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
		* has the following pattern:
		* 1. find page in radix tree
		* 2. conditionally increment refcount
		* 3. check the page is still in pagecache (if no, goto 1)
		*
		* Remove-side that cares about stability of _count (eg. reclaim) has the
		* following (with tree_lock held for write):
		* A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
		* B. remove page from pagecache
		* C. free the page
		*
		* There are 2 critical interleavings that matter:
		* - 2 runs before A: in this case, A sees elevated refcount and bails out
		* - A runs before 2: in this case, 2 sees zero refcount and retries;
		* subsequently, B will complete and 1 will find no page, causing the
		* lookup to return NULL.
		*
		* It is possible that between 1 and 2, the page is removed then the exact same
		* page is inserted into the same position in pagecache. That's OK: the
		* old find_get_page using tree_lock could equally have run before or after
		* such a re-insertion, depending on order that locks are granted.
		*
		* Lookups racing against pagecache insertion isn't a big problem: either 1
		* will find the page or it will not. Likewise, the old find_get_page could run
		* either before the insertion or afterwards, depending on timing.
		*/
		static inline int page_cache_get_speculative(struct page *page)
		{
		VM_BUG_ON(in_interrupt());

		#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU)
		# ifdef CONFIG_PREEMPT
		VM_BUG_ON(!in_atomic());
		# endif
		/*
		* Preempt must be disabled here - we rely on rcu_read_lock doing
		* this for us.
		*
		* Pagecache won't be truncated from interrupt context, so if we have
		* found a page in the radix tree here, we have pinned its refcount by
		* disabling preempt, and hence no need for the "speculative get" that
		* SMP requires.
		*/
		VM_BUG_ON(page_count(page) == 0);
		atomic_inc(&page->_count);

		#else
		if (unlikely(!get_page_unless_zero(page))) {
		/*
		* Either the page has been freed, or will be freed.
		* In either case, retry here and the caller should
		* do the right thing (see comments above).
		*/
		return 0;
		}
		#endif
		VM_BUG_ON(PageTail(page));

		return 1;
		}

		static inline int page_freeze_refs(struct page *page, int count)
		{
		return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
		}

		static inline void page_unfreeze_refs(struct page *page, int count)
		{
		VM_BUG_ON(page_count(page) != 0);
		VM_BUG_ON(count == 0);

		atomic_set(&page->_count, count);
		}

		#ifdef CONFIG_NUMA
		extern struct page *__page_cache_alloc(gfp_t gfp);
		#else
		@@ -133,13 +226,29 @@ static inline struct page read_mapping_page(struct address_space mapping,
		return read_cache_page(mapping, index, filler, data);
		}

		int add_to_page_cache(struct page page, struct address_space mapping,
		int add_to_page_cache_locked(struct page page, struct address_space mapping,
		pgoff_t index, gfp_t gfp_mask);
		int add_to_page_cache_lru(struct page page, struct address_space mapping,
		pgoff_t index, gfp_t gfp_mask);
		extern void remove_from_page_cache(struct page *page);
		extern void __remove_from_page_cache(struct page *page);

		/*
		* Like add_to_page_cache_locked, but used to add newly allocated pages:
		* the page is new, so we can just run SetPageLocked() against it.
		*/
		static inline int add_to_page_cache(struct page *page,
		struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
		{
		int error;

		SetPageLocked(page);
		error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
		if (unlikely(error))
		ClearPageLocked(page);
		return error;
		}

		/*
		* Return byte-offset into filesystem object for page.
		*/

mm/filemap.c

+18 −14

Original line number	Diff line number	Diff line
		@@ -442,39 +442,43 @@ int filemap_write_and_wait_range(struct address_space *mapping,
		}

		/**
		* add_to_page_cache - add newly allocated pagecache pages
		* add_to_page_cache_locked - add a locked page to the pagecache
		* @page: page to add
		* @mapping: the page's address_space
		* @offset: page index
		* @gfp_mask: page allocation mode
		*
		* This function is used to add newly allocated pagecache pages;
		* the page is new, so we can just run SetPageLocked() against it.
		* The other page state flags were set by rmqueue().
		*
		* This function is used to add a page to the pagecache. It must be locked.
		* This function does not add the page to the LRU. The caller must do that.
		*/
		int add_to_page_cache(struct page page, struct address_space mapping,
		int add_to_page_cache_locked(struct page page, struct address_space mapping,
		pgoff_t offset, gfp_t gfp_mask)
		{
		int error = mem_cgroup_cache_charge(page, current->mm,
		int error;

		VM_BUG_ON(!PageLocked(page));

		error = mem_cgroup_cache_charge(page, current->mm,
		gfp_mask & ~__GFP_HIGHMEM);
		if (error)
		goto out;

		error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
		if (error == 0) {
		write_lock_irq(&mapping->tree_lock);
		error = radix_tree_insert(&mapping->page_tree, offset, page);
		if (!error) {
		page_cache_get(page);
		SetPageLocked(page);
		page->mapping = mapping;
		page->index = offset;

		write_lock_irq(&mapping->tree_lock);
		error = radix_tree_insert(&mapping->page_tree, offset, page);
		if (likely(!error)) {
		mapping->nrpages++;
		__inc_zone_page_state(page, NR_FILE_PAGES);
		} else
		} else {
		page->mapping = NULL;
		mem_cgroup_uncharge_cache_page(page);
		page_cache_release(page);
		}

		write_unlock_irq(&mapping->tree_lock);
		radix_tree_preload_end();
		@@ -483,7 +487,7 @@ int add_to_page_cache(struct page page, struct address_space mapping,
		out:
		return error;
		}
		EXPORT_SYMBOL(add_to_page_cache);
		EXPORT_SYMBOL(add_to_page_cache_locked);

		int add_to_page_cache_lru(struct page page, struct address_space mapping,
		pgoff_t offset, gfp_t gfp_mask)

mm/migrate.c

+18 −2

Original line number	Diff line number	Diff line
		@@ -285,7 +285,15 @@ void migration_entry_wait(struct mm_struct mm, pmd_t pmd,

		page = migration_entry_to_page(entry);

		get_page(page);
		/*
		* Once radix-tree replacement of page migration started, page_count
		* must be zero. And, we don't want to call wait_on_page_locked()
		* against a page without get_page().
		* So, we use get_page_unless_zero(), here. Even failed, page fault
		* will occur again.
		*/
		if (!get_page_unless_zero(page))
		goto out;
		pte_unmap_unlock(ptep, ptl);
		wait_on_page_locked(page);
		put_page(page);
		@@ -305,6 +313,7 @@ out:
		static int migrate_page_move_mapping(struct address_space *mapping,
		struct page newpage, struct page page)
		{
		int expected_count;
		void **pslot;

		if (!mapping) {
		@@ -319,12 +328,18 @@ static int migrate_page_move_mapping(struct address_space *mapping,
		pslot = radix_tree_lookup_slot(&mapping->page_tree,
		page_index(page));

		if (page_count(page) != 2 + !!PagePrivate(page) \|\|
		expected_count = 2 + !!PagePrivate(page);
		if (page_count(page) != expected_count \|\|
		(struct page *)radix_tree_deref_slot(pslot) != page) {
		write_unlock_irq(&mapping->tree_lock);
		return -EAGAIN;
		}

		if (!page_freeze_refs(page, expected_count)) {
		write_unlock_irq(&mapping->tree_lock);
		return -EAGAIN;
		}

		/*
		* Now we know that no one else is looking at the page.
		*/
		@@ -338,6 +353,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,

		radix_tree_replace_slot(pslot, newpage);

		page_unfreeze_refs(page, expected_count);
		/*
		* Drop cache reference from old page.
		* We know this isn't the last reference.

mm/shmem.c

+3 −3

Original line number	Diff line number	Diff line
		@@ -936,7 +936,7 @@ found:
		spin_lock(&info->lock);
		ptr = shmem_swp_entry(info, idx, NULL);
		if (ptr && ptr->val == entry.val) {
		error = add_to_page_cache(page, inode->i_mapping,
		error = add_to_page_cache_locked(page, inode->i_mapping,
		idx, GFP_NOWAIT);
		/* does mem_cgroup_uncharge_cache_page on error */
		} else /* we must compensate for our precharge above */
		@@ -1301,8 +1301,8 @@ repeat:
		SetPageUptodate(filepage);
		set_page_dirty(filepage);
		swap_free(swap);
		} else if (!(error = add_to_page_cache(
		swappage, mapping, idx, GFP_NOWAIT))) {
		} else if (!(error = add_to_page_cache_locked(swappage, mapping,
		idx, GFP_NOWAIT))) {
		info->flags \|= SHMEM_PAGEIN;
		shmem_swp_set(info, entry, 0);
		shmem_swp_unmap(entry);