Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c475a8ab authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds
Browse files

[PATCH] can_share_swap_page: use page_mapcount



Remember that ironic get_user_pages race?  when the raised page_count on a
page swapped out led do_wp_page to decide that it had to copy on write, so
substituted a different page into userspace.  2.6.7 onwards have Andrea's
solution, where try_to_unmap_one backs out if it finds page_count raised.

Which works, but is unsatisfying (rmap.c has no other page_count heuristics),
and was found a few months ago to hang an intensive page migration test.  A
year ago I was hesitant to engage page_mapcount, now it seems the right fix.

So remove the page_count hack from try_to_unmap_one; and use activate_page in
unuse_mm when dropping lock, to replace its secondary effect of helping
swapoff to make progress in that case.

Simplify can_share_swap_page (now called only on anonymous pages) to check
page_mapcount + page_swapcount == 1: still needs the page lock to stabilize
their (pessimistic) sum, but does not need swapper_space.tree_lock for that.

In do_swap_page, move swap_free and unlock_page below page_add_anon_rmap, to
keep sum on the high side, and correct when can_share_swap_page called.

Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent d296e9cd
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -1687,22 +1687,22 @@ static int do_swap_page(struct mm_struct * mm,

	/* The page isn't present yet, go ahead with the fault. */

	swap_free(entry);
	if (vm_swap_full())
		remove_exclusive_swap_page(page);

	inc_mm_counter(mm, rss);
	pte = mk_pte(page, vma->vm_page_prot);
	if (write_access && can_share_swap_page(page)) {
		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
		write_access = 0;
	}
	unlock_page(page);

	flush_icache_page(vma, page);
	set_pte_at(mm, address, page_table, pte);
	page_add_anon_rmap(page, vma, address);

	swap_free(entry);
	if (vm_swap_full())
		remove_exclusive_swap_page(page);
	unlock_page(page);

	if (write_access) {
		if (do_wp_page(mm, vma, address,
				page_table, pmd, pte) == VM_FAULT_OOM)
+0 −21
Original line number Diff line number Diff line
@@ -539,27 +539,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
		goto out_unmap;
	}

	/*
	 * Don't pull an anonymous page out from under get_user_pages.
	 * GUP carefully breaks COW and raises page count (while holding
	 * page_table_lock, as we have here) to make sure that the page
	 * cannot be freed.  If we unmap that page here, a user write
	 * access to the virtual address will bring back the page, but
	 * its raised count will (ironically) be taken to mean it's not
	 * an exclusive swap page, do_wp_page will replace it by a copy
	 * page, and the user never get to see the data GUP was holding
	 * the original page for.
	 *
	 * This test is also useful for when swapoff (unuse_process) has
	 * to drop page lock: its reference to the page stops existing
	 * ptes from being unmapped, so swapoff can make progress.
	 */
	if (PageSwapCache(page) &&
	    page_count(page) != page_mapcount(page) + 2) {
		ret = SWAP_FAIL;
		goto out_unmap;
	}

	/* Nuke the page table entry. */
	flush_cache_page(vma, address, page_to_pfn(page));
	pteval = ptep_clear_flush(vma, address, pte);
+16 −39
Original line number Diff line number Diff line
@@ -276,61 +276,37 @@ void swap_free(swp_entry_t entry)
}

/*
 * Check if we're the only user of a swap page,
 * when the page is locked.
 * How many references to page are currently swapped out?
 */
static int exclusive_swap_page(struct page *page)
static inline int page_swapcount(struct page *page)
{
	int retval = 0;
	int count = 0;
	struct swap_info_struct *p;
	swp_entry_t entry;

	entry.val = page->private;
	p = swap_info_get(entry);
	if (p) {
		/* Is the only swap cache user the cache itself? */
		if (p->swap_map[swp_offset(entry)] == 1) {
			/* Recheck the page count with the swapcache lock held.. */
			write_lock_irq(&swapper_space.tree_lock);
			if (page_count(page) == 2)
				retval = 1;
			write_unlock_irq(&swapper_space.tree_lock);
		}
		/* Subtract the 1 for the swap cache itself */
		count = p->swap_map[swp_offset(entry)] - 1;
		swap_info_put(p);
	}
	return retval;
	return count;
}

/*
 * We can use this swap cache entry directly
 * if there are no other references to it.
 *
 * Here "exclusive_swap_page()" does the real
 * work, but we opportunistically check whether
 * we need to get all the locks first..
 */
int can_share_swap_page(struct page *page)
{
	int retval = 0;
	int count;

	if (!PageLocked(page))
		BUG();
	switch (page_count(page)) {
	case 3:
		if (!PagePrivate(page))
			break;
		/* Fallthrough */
	case 2:
		if (!PageSwapCache(page))
			break;
		retval = exclusive_swap_page(page);
		break;
	case 1:
		if (PageReserved(page))
			break;
		retval = 1;
	}
	return retval;
	BUG_ON(!PageLocked(page));
	count = page_mapcount(page);
	if (count <= 1 && PageSwapCache(page))
		count += page_swapcount(page);
	return count == 1;
}

/*
@@ -529,9 +505,10 @@ static int unuse_mm(struct mm_struct *mm,

	if (!down_read_trylock(&mm->mmap_sem)) {
		/*
		 * Our reference to the page stops try_to_unmap_one from
		 * unmapping its ptes, so swapoff can make progress.
		 * Activate page so shrink_cache is unlikely to unmap its
		 * ptes while lock is dropped, so swapoff can make progress.
		 */
		activate_page(page);
		unlock_page(page);
		down_read(&mm->mmap_sem);
		lock_page(page);