Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7207f436 authored by Laurent Dufour's avatar Laurent Dufour Committed by Michael Ellerman
Browse files

powerpc/mm: Add page soft dirty tracking



User space checkpoint and restart tool (CRIU) needs the page's change
to be soft tracked. This allows to do a pre checkpoint and then dump
only touched pages.

This is done by using a newly assigned PTE bit (_PAGE_SOFT_DIRTY) when
the page is backed in memory, and a new _PAGE_SWP_SOFT_DIRTY bit when
the page is swapped out.

To introduce a new PTE _PAGE_SOFT_DIRTY bit value common to hash 4k
and hash 64k pte, the bits already defined in hash-*4k.h should be
shifted left by one.

The _PAGE_SWP_SOFT_DIRTY bit is dynamically put after the swap type in
the swap pte. A check is added to ensure that the bit is not
overwritten by _PAGE_HPTEFLAGS.

Signed-off-by: default avatarLaurent Dufour <ldufour@linux.vnet.ibm.com>
CC: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 2613265c
Loading
Loading
Loading
Loading
+2 −0
Original line number Original line Diff line number Diff line
@@ -559,6 +559,7 @@ choice


config PPC_4K_PAGES
config PPC_4K_PAGES
	bool "4k page size"
	bool "4k page size"
	select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S


config PPC_16K_PAGES
config PPC_16K_PAGES
	bool "16k page size"
	bool "16k page size"
@@ -567,6 +568,7 @@ config PPC_16K_PAGES
config PPC_64K_PAGES
config PPC_64K_PAGES
	bool "64k page size"
	bool "64k page size"
	depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
	depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
	select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S


config PPC_256K_PAGES
config PPC_256K_PAGES
	bool "256k page size"
	bool "256k page size"
+1 −1
Original line number Original line Diff line number Diff line
@@ -52,7 +52,7 @@
			 _PAGE_F_SECOND | _PAGE_F_GIX)
			 _PAGE_F_SECOND | _PAGE_F_GIX)


/* shift to put page number into pte */
/* shift to put page number into pte */
#define PTE_RPN_SHIFT	(17)
#define PTE_RPN_SHIFT	(18)


#define _PAGE_4K_PFN		0
#define _PAGE_4K_PFN		0
#ifndef __ASSEMBLY__
#ifndef __ASSEMBLY__
+2 −2
Original line number Original line Diff line number Diff line
@@ -25,8 +25,8 @@
#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
#define PGDIR_MASK	(~(PGDIR_SIZE-1))
#define PGDIR_MASK	(~(PGDIR_SIZE-1))


#define _PAGE_COMBO	0x00020000 /* this is a combo 4k page */
#define _PAGE_COMBO	0x00040000 /* this is a combo 4k page */
#define _PAGE_4K_PFN	0x00040000 /* PFN is for a single 4k page */
#define _PAGE_4K_PFN	0x00080000 /* PFN is for a single 4k page */
/*
/*
 * Used to track subpage group valid if _PAGE_COMBO is set
 * Used to track subpage group valid if _PAGE_COMBO is set
 * This overloads _PAGE_F_GIX and _PAGE_F_SECOND
 * This overloads _PAGE_F_GIX and _PAGE_F_SECOND
+25 −5
Original line number Original line Diff line number Diff line
@@ -33,6 +33,7 @@
#define _PAGE_F_GIX_SHIFT	12
#define _PAGE_F_GIX_SHIFT	12
#define _PAGE_F_SECOND		0x08000 /* Whether to use secondary hash or not */
#define _PAGE_F_SECOND		0x08000 /* Whether to use secondary hash or not */
#define _PAGE_SPECIAL		0x10000 /* software: special page */
#define _PAGE_SPECIAL		0x10000 /* software: special page */
#define _PAGE_SOFT_DIRTY	0x20000 /* software: software dirty tracking */


/*
/*
 * THP pages can't be special. So use the _PAGE_SPECIAL
 * THP pages can't be special. So use the _PAGE_SPECIAL
@@ -50,7 +51,7 @@
 */
 */
#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
			 _PAGE_THP_HUGE | _PAGE_PTE)
			 _PAGE_THP_HUGE | _PAGE_PTE | _PAGE_SOFT_DIRTY)


#ifdef CONFIG_PPC_64K_PAGES
#ifdef CONFIG_PPC_64K_PAGES
#include <asm/book3s/64/hash-64k.h>
#include <asm/book3s/64/hash-64k.h>
@@ -136,14 +137,16 @@
 * pgprot changes
 * pgprot changes
 */
 */
#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE)
			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
			 _PAGE_SOFT_DIRTY)
/*
/*
 * Mask of bits returned by pte_pgprot()
 * Mask of bits returned by pte_pgprot()
 */
 */
#define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
#define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
			 _PAGE_USER | _PAGE_ACCESSED |  \
			 _PAGE_USER | _PAGE_ACCESSED |  \
			 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC)
			 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC | \
			 _PAGE_SOFT_DIRTY)
/*
/*
 * We define 2 sets of base prot bits, one for basic pages (ie,
 * We define 2 sets of base prot bits, one for basic pages (ie,
 * cacheable kernel and user pages) and one for non cacheable
 * cacheable kernel and user pages) and one for non cacheable
@@ -339,7 +342,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
{
{
	unsigned long bits = pte_val(entry) &
	unsigned long bits = pte_val(entry) &
		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC |
		 _PAGE_SOFT_DIRTY);


	unsigned long old, tmp;
	unsigned long old, tmp;


@@ -366,6 +370,22 @@ static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIA
static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }


#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline bool pte_soft_dirty(pte_t pte)
{
	return !!(pte_val(pte) & _PAGE_SOFT_DIRTY);
}
static inline pte_t pte_mksoft_dirty(pte_t pte)
{
	return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
}

static inline pte_t pte_clear_soft_dirty(pte_t pte)
{
	return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
}
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */

#ifdef CONFIG_NUMA_BALANCING
#ifdef CONFIG_NUMA_BALANCING
/*
/*
 * These work without NUMA balancing but the kernel does not care. See the
 * These work without NUMA balancing but the kernel does not care. See the
@@ -424,7 +444,7 @@ static inline pte_t pte_mkwrite(pte_t pte)


static inline pte_t pte_mkdirty(pte_t pte)
static inline pte_t pte_mkdirty(pte_t pte)
{
{
	return __pte(pte_val(pte) | _PAGE_DIRTY);
	return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
}
}


static inline pte_t pte_mkyoung(pte_t pte)
static inline pte_t pte_mkyoung(pte_t pte)
+26 −0
Original line number Original line Diff line number Diff line
@@ -146,6 +146,7 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
	 * We filter HPTEFLAGS on set_pte.			\
	 * We filter HPTEFLAGS on set_pte.			\
	 */							\
	 */							\
	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);	\
	} while (0)
	} while (0)
/*
/*
 * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
 * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
@@ -161,6 +162,24 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
#define __swp_entry_to_pte(x)		__pte((x).val)
#define __swp_entry_to_pte(x)		__pte((x).val)


#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
#define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
	return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
}
static inline bool pte_swp_soft_dirty(pte_t pte)
{
	return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
}
static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
	return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
}
#else
#define _PAGE_SWP_SOFT_DIRTY	0
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */

void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);
void pgtable_cache_init(void);


@@ -201,6 +220,13 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
#define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
#define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */

#ifdef CONFIG_NUMA_BALANCING
#ifdef CONFIG_NUMA_BALANCING
static inline int pmd_protnone(pmd_t pmd)
static inline int pmd_protnone(pmd_t pmd)
{
{