Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 179ef71c authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Linus Torvalds
Browse files

mm: save soft-dirty bits on swapped pages



Andy Lutomirski reported that if a page with _PAGE_SOFT_DIRTY bit set
get swapped out, the bit is getting lost and no longer available when
pte read back.

To resolve this we introduce _PTE_SWP_SOFT_DIRTY bit which is saved in
pte entry for the page being swapped out.  When such page is to be read
back from a swap cache we check for bit presence and if it's there we
clear it and restore the former _PAGE_SOFT_DIRTY bit back.

One of the problem was to find a place in pte entry where we can save
the _PTE_SWP_SOFT_DIRTY bit while page is in swap.  The _PAGE_PSE was
chosen for that, it doesn't intersect with swap entry format stored in
pte.

Reported-by: default avatarAndy Lutomirski <luto@amacapital.net>
Signed-off-by: default avatarCyrill Gorcunov <gorcunov@openvz.org>
Acked-by: default avatarPavel Emelyanov <xemul@parallels.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: default avatarMinchan Kim <minchan@kernel.org>
Reviewed-by: default avatarWanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 3e6b11df
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -314,6 +314,21 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
	return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
}

static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
	return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}

static inline int pte_swp_soft_dirty(pte_t pte)
{
	return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
}

static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
	return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}

/*
 * Mask out unsupported bits in a present pgprot.  Non-present pgprots
 * can use those bits for other purposes, so leave them be.
+13 −0
Original line number Diff line number Diff line
@@ -67,6 +67,19 @@
#define _PAGE_SOFT_DIRTY	(_AT(pteval_t, 0))
#endif

/*
 * Tracking soft dirty bit when a page goes to a swap is tricky.
 * We need a bit which can be stored in pte _and_ not conflict
 * with swap entry format. On x86 bits 6 and 7 are *not* involved
 * into swap entry computation, but bit 6 is used for nonlinear
 * file mapping, so we borrow bit 7 for soft dirty tracking.
 */
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SWP_SOFT_DIRTY	_PAGE_PSE
#else
#define _PAGE_SWP_SOFT_DIRTY	(_AT(pteval_t, 0))
#endif

#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
#else
+15 −6
Original line number Diff line number Diff line
@@ -730,8 +730,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
	 * of how soft-dirty works.
	 */
	pte_t ptent = *pte;

	if (pte_present(ptent)) {
		ptent = pte_wrprotect(ptent);
		ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
	} else if (is_swap_pte(ptent)) {
		ptent = pte_swp_clear_soft_dirty(ptent);
	}

	set_pte_at(vma->vm_mm, addr, pte, ptent);
#endif
}
@@ -752,14 +758,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
	for (; addr != end; pte++, addr += PAGE_SIZE) {
		ptent = *pte;
		if (!pte_present(ptent))
			continue;

		if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
			clear_soft_dirty(vma, addr, pte);
			continue;
		}

		if (!pte_present(ptent))
			continue;

		page = vm_normal_page(vma, addr, ptent);
		if (!page)
			continue;
@@ -930,8 +937,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
		flags = PM_PRESENT;
		page = vm_normal_page(vma, addr, pte);
	} else if (is_swap_pte(pte)) {
		swp_entry_t entry = pte_to_swp_entry(pte);

		swp_entry_t entry;
		if (pte_swp_soft_dirty(pte))
			flags2 |= __PM_SOFT_DIRTY;
		entry = pte_to_swp_entry(pte);
		frame = swp_type(entry) |
			(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
		flags = PM_SWAP;
+15 −0
Original line number Diff line number Diff line
@@ -417,6 +417,21 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
{
	return pmd;
}

static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
	return pte;
}

static inline int pte_swp_soft_dirty(pte_t pte)
{
	return 0;
}

static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
	return pte;
}
#endif

#ifndef __HAVE_PFNMAP_TRACKING
+2 −0
Original line number Diff line number Diff line
@@ -67,6 +67,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
	swp_entry_t arch_entry;

	BUG_ON(pte_file(pte));
	if (pte_swp_soft_dirty(pte))
		pte = pte_swp_clear_soft_dirty(pte);
	arch_entry = __pte_to_swp_entry(pte);
	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
}
Loading