Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 46f65ec1 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds
Browse files

tmpfs: convert shmem_unuse_inode to radix-swap



Convert shmem_unuse_inode() to use a lockless gang lookup of the radix
tree, searching for matching swap.

This is somewhat slower than the old method: because of repeated radix
tree descents, because of copying entries up, but probably most because
the old method noted and skipped once a vector page was cleared of swap.
Perhaps we can devise a use of radix tree tagging to achieve that later.

shmem_add_to_page_cache() uses shmem_radix_tree_replace() to compensate
for the lockless lookup by checking that the expected entry is in place,
under lock.  It is not very satisfactory to be copying this much from
add_to_page_cache_locked(), but I think easier to sell than insisting
that every caller of add_to_page_cache*() go through the extras.

Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Acked-by: default avatarRik van Riel <riel@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 7a5d0fbb
Loading
Loading
Loading
Loading
+107 −26
Original line number Diff line number Diff line
@@ -261,6 +261,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
	return 0;
}

/*
 * Like add_to_page_cache_locked, but error if expected item has gone.
 */
static int shmem_add_to_page_cache(struct page *page,
				   struct address_space *mapping,
				   pgoff_t index, gfp_t gfp, void *expected)
{
	int error;

	VM_BUG_ON(!PageLocked(page));
	VM_BUG_ON(!PageSwapBacked(page));

	error = mem_cgroup_cache_charge(page, current->mm,
						gfp & GFP_RECLAIM_MASK);
	if (error)
		goto out;
	if (!expected)
		error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
	if (!error) {
		page_cache_get(page);
		page->mapping = mapping;
		page->index = index;

		spin_lock_irq(&mapping->tree_lock);
		if (!expected)
			error = radix_tree_insert(&mapping->page_tree,
							index, page);
		else
			error = shmem_radix_tree_replace(mapping, index,
							expected, page);
		if (!error) {
			mapping->nrpages++;
			__inc_zone_page_state(page, NR_FILE_PAGES);
			__inc_zone_page_state(page, NR_SHMEM);
			spin_unlock_irq(&mapping->tree_lock);
		} else {
			page->mapping = NULL;
			spin_unlock_irq(&mapping->tree_lock);
			page_cache_release(page);
		}
		if (!expected)
			radix_tree_preload_end();
	}
	if (error)
		mem_cgroup_uncharge_cache_page(page);
out:
	return error;
}

/*
 * Like find_get_pages, but collecting swap entries as well as pages.
 */
@@ -308,6 +357,42 @@ export:
	return ret;
}

/*
 * Lockless lookup of swap entry in radix tree, avoiding refcount on pages.
 */
static pgoff_t shmem_find_swap(struct address_space *mapping, void *radswap)
{
	void  **slots[PAGEVEC_SIZE];
	pgoff_t indices[PAGEVEC_SIZE];
	unsigned int nr_found;

restart:
	nr_found = 1;
	indices[0] = -1;
	while (nr_found) {
		pgoff_t index = indices[nr_found - 1] + 1;
		unsigned int i;

		rcu_read_lock();
		nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
					slots, indices, index, PAGEVEC_SIZE);
		for (i = 0; i < nr_found; i++) {
			void *item = radix_tree_deref_slot(slots[i]);
			if (radix_tree_deref_retry(item)) {
				rcu_read_unlock();
				goto restart;
			}
			if (item == radswap) {
				rcu_read_unlock();
				return indices[i];
			}
		}
		rcu_read_unlock();
		cond_resched();
	}
	return -1;
}

/*
 * Remove swap entry from radix tree, free the swap and its page cache.
 */
@@ -515,23 +600,21 @@ static void shmem_evict_inode(struct inode *inode)
	end_writeback(inode);
}

/*
 * If swap found in inode, free it and move page from swapcache to filecache.
 */
static int shmem_unuse_inode(struct shmem_inode_info *info,
			     swp_entry_t swap, struct page *page)
{
	struct address_space *mapping = info->vfs_inode.i_mapping;
	void *radswap;
	pgoff_t index;
	int error;

	for (index = 0; index < SHMEM_NR_DIRECT; index++)
		if (shmem_get_swap(info, index).val == swap.val)
			goto found;
	radswap = swp_to_radix_entry(swap);
	index = shmem_find_swap(mapping, radswap);
	if (index == -1)
		return 0;
found:
	spin_lock(&info->lock);
	if (shmem_get_swap(info, index).val != swap.val) {
		spin_unlock(&info->lock);
		return 0;
	}

	/*
	 * Move _head_ to start search for next from here.
@@ -547,23 +630,30 @@ found:
	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
	 * beneath us (pagelock doesn't help until the page is in pagecache).
	 */
	error = add_to_page_cache_locked(page, mapping, index, GFP_NOWAIT);
	error = shmem_add_to_page_cache(page, mapping, index,
						GFP_NOWAIT, radswap);
	/* which does mem_cgroup_uncharge_cache_page on error */

	if (error != -ENOMEM) {
		/*
		 * Truncation and eviction use free_swap_and_cache(), which
		 * only does trylock page: if we raced, best clean up here.
		 */
		delete_from_swap_cache(page);
		set_page_dirty(page);
		shmem_put_swap(info, index, (swp_entry_t){0});
		if (!error) {
			spin_lock(&info->lock);
			info->swapped--;
			spin_unlock(&info->lock);
			swap_free(swap);
		}
		error = 1;	/* not an error, but entry was found */
	}
	spin_unlock(&info->lock);
	return error;
}

/*
 * shmem_unuse() search for an eventually swapped out shmem page.
 * Search through swapped inodes to find and replace swap by page.
 */
int shmem_unuse(swp_entry_t swap, struct page *page)
{
@@ -576,20 +666,12 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
	 * Charge page using GFP_KERNEL while we can wait, before taking
	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
	 * Charged back to the user (not to caller) when swap account is used.
	 * add_to_page_cache() will be called with GFP_NOWAIT.
	 * shmem_add_to_page_cache() will be called with GFP_NOWAIT.
	 */
	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
	if (error)
		goto out;
	/*
	 * Try to preload while we can wait, to not make a habit of
	 * draining atomic reserves; but don't latch on to this cpu,
	 * it's okay if sometimes we get rescheduled after this.
	 */
	error = radix_tree_preload(GFP_KERNEL);
	if (error)
		goto uncharge;
	radix_tree_preload_end();
	/* No radix_tree_preload: swap entry keeps a place for page in tree */

	mutex_lock(&shmem_swaplist_mutex);
	list_for_each_safe(this, next, &shmem_swaplist) {
@@ -608,7 +690,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
	}
	mutex_unlock(&shmem_swaplist_mutex);

uncharge:
	if (!found)
		mem_cgroup_uncharge_cache_page(page);
	if (found < 0)