Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b2fa47e6 authored by Martin Schwidefsky's avatar Martin Schwidefsky
Browse files

[S390] refactor page table functions for better pgste support



Rework the architecture page table functions to access the bits in the
page table extension array (pgste). There are a number of changes:
1) Fix missing pgste update if the attach_count for the mm is <= 1.
2) For every operation that affects the invalid bit in the pte or the
   rcp byte in the pgste the pcl lock needs to be acquired. The function
   pgste_get_lock gets the pcl lock and returns the current pgste value
   for a pte pointer. The function pgste_set_unlock stores the pgste
   and releases the lock. Between these two calls the bits in the pgste
   can be shuffled.
3) Define two software bits in the pte _PAGE_SWR and _PAGE_SWC to avoid
   calling SetPageDirty and SetPageReferenced from pgtable.h. If the
   host reference backup bit or the host change backup bit has been
   set the dirty/referenced state is transfered to the pte. The common
   code will pick up the state from the pte.
4) Add ptep_modify_prot_start and ptep_modify_prot_commit for mprotect.
5) Remove pgd_populate_kernel, pud_populate_kernel, pmd_populate_kernel
   pgd_clear_kernel, pud_clear_kernel, pmd_clear_kernel and ptep_invalidate.
6) Rename kvm_s390_test_and_clear_page_dirty to
   ptep_test_and_clear_user_dirty and add ptep_test_and_clear_user_young.
7) Define mm_exclusive() and mm_has_pgste() helper to improve readability.

Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent 2d42552d
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -9,8 +9,10 @@ typedef struct {
	unsigned long asce_bits;
	unsigned long asce_limit;
	unsigned long vdso_base;
	int has_pgste;	 /* The mmu context has extended page tables */
	int alloc_pgste; /* cloned contexts will have extended page tables */
	/* Cloned contexts will be created with extended page tables. */
	unsigned int alloc_pgste:1;
	/* The mmu context has extended page tables. */
	unsigned int has_pgste:1;
} mm_context_t;

#define INIT_MM_CONTEXT(name)						      \
+4 −0
Original line number Diff line number Diff line
@@ -90,6 +90,7 @@ static inline void copy_page(void *to, void *from)
 */

typedef struct { unsigned long pgprot; } pgprot_t;
typedef struct { unsigned long pgste; } pgste_t;
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pud; } pud_t;
@@ -97,13 +98,16 @@ typedef struct { unsigned long pgd; } pgd_t;
typedef pte_t *pgtable_t;

#define pgprot_val(x)	((x).pgprot)
#define pgste_val(x)	((x).pgste)
#define pte_val(x)	((x).pte)
#define pmd_val(x)	((x).pmd)
#define pud_val(x)	((x).pud)
#define pgd_val(x)      ((x).pgd)

#define __pgste(x)	((pgste_t) { (x) } )
#define __pte(x)        ((pte_t) { (x) } )
#define __pmd(x)        ((pmd_t) { (x) } )
#define __pud(x)	((pud_t) { (x) } )
#define __pgd(x)        ((pgd_t) { (x) } )
#define __pgprot(x)     ((pgprot_t) { (x) } )

+5 −24
Original line number Diff line number Diff line
@@ -65,10 +65,7 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
#define pmd_free(mm, x)				do { } while (0)

#define pgd_populate(mm, pgd, pud)		BUG()
#define pgd_populate_kernel(mm, pgd, pud)	BUG()

#define pud_populate(mm, pud, pmd)		BUG()
#define pud_populate_kernel(mm, pud, pmd)	BUG()

#else /* __s390x__ */

@@ -102,26 +99,14 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
}
#define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd)

static inline void pgd_populate_kernel(struct mm_struct *mm,
				       pgd_t *pgd, pud_t *pud)
{
	pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
}

static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
{
	pgd_populate_kernel(mm, pgd, pud);
}

static inline void pud_populate_kernel(struct mm_struct *mm,
				       pud_t *pud, pmd_t *pmd)
{
	pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
	pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
}

static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
	pud_populate_kernel(mm, pud, pmd);
	pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
}

#endif /* __s390x__ */
@@ -134,18 +119,14 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
}
#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)

static inline void pmd_populate_kernel(struct mm_struct *mm,
				       pmd_t *pmd, pte_t *pte)
{
	pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
}

static inline void pmd_populate(struct mm_struct *mm,
				pmd_t *pmd, pgtable_t pte)
{
	pmd_populate_kernel(mm, pmd, pte);
	pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
}

#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)

#define pmd_pgtable(pmd) \
	(pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)

+308 −198
Original line number Diff line number Diff line
@@ -31,9 +31,8 @@
#ifndef __ASSEMBLY__
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <asm/bitops.h>
#include <asm/bug.h>
#include <asm/processor.h>
#include <asm/page.h>

extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
extern void paging_init(void);
@@ -243,11 +242,13 @@ extern unsigned long VMALLOC_START;
/* Software bits in the page table entry */
#define _PAGE_SWT	0x001		/* SW pte type bit t */
#define _PAGE_SWX	0x002		/* SW pte type bit x */
#define _PAGE_SPECIAL	0x004		/* SW associated with special page */
#define _PAGE_SWC	0x004		/* SW pte changed bit (for KVM) */
#define _PAGE_SWR	0x008		/* SW pte referenced bit (for KVM) */
#define _PAGE_SPECIAL	0x010		/* SW associated with special page */
#define __HAVE_ARCH_PTE_SPECIAL

/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_SPECIAL)
#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR)

/* Six different types of pages. */
#define _PAGE_TYPE_EMPTY	0x400
@@ -293,14 +294,17 @@ extern unsigned long VMALLOC_START;
 */

/* Page status table bits for virtualization */
#define RCP_PCL_BIT	55
#define RCP_HR_BIT	54
#define RCP_HC_BIT	53
#define RCP_GR_BIT	50
#define RCP_GC_BIT	49

/* User dirty bit for KVM's migration feature */
#define KVM_UD_BIT	47
#define RCP_ACC_BITS	0xf000000000000000UL
#define RCP_FP_BIT	0x0800000000000000UL
#define RCP_PCL_BIT	0x0080000000000000UL
#define RCP_HR_BIT	0x0040000000000000UL
#define RCP_HC_BIT	0x0020000000000000UL
#define RCP_GR_BIT	0x0004000000000000UL
#define RCP_GC_BIT	0x0002000000000000UL

/* User dirty / referenced bit for KVM's migration feature */
#define KVM_UR_BIT	0x0000800000000000UL
#define KVM_UC_BIT	0x0000400000000000UL

#ifndef __s390x__

@@ -407,17 +411,20 @@ extern unsigned long VMALLOC_START;
#define __S110	PAGE_RW
#define __S111	PAGE_RW

/*
 * Certain architectures need to do special things when PTEs
 * within a page table are directly modified.  Thus, the following
 * hook is made available.
 */
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep, pte_t entry)
static inline int mm_exclusive(struct mm_struct *mm)
{
	*ptep = entry;
	return likely(mm == current->active_mm &&
		      atomic_read(&mm->context.attach_count) <= 1);
}

static inline int mm_has_pgste(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
	if (unlikely(mm->context.has_pgste))
		return 1;
#endif
	return 0;
}
/*
 * pgd/pmd/pte query functions
 */
@@ -530,52 +537,129 @@ static inline int pte_special(pte_t pte)
}

#define __HAVE_ARCH_PTE_SAME
#define pte_same(a,b)  (pte_val(a) == pte_val(b))
static inline int pte_same(pte_t a, pte_t b)
{
	return pte_val(a) == pte_val(b);
}

static inline void rcp_lock(pte_t *ptep)
static inline pgste_t pgste_get_lock(pte_t *ptep)
{
	unsigned long new = 0;
#ifdef CONFIG_PGSTE
	unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
	unsigned long old;

	preempt_disable();
	while (test_and_set_bit(RCP_PCL_BIT, pgste))
		;
	asm(
		"	lg	%0,%2\n"
		"0:	lgr	%1,%0\n"
		"	nihh	%0,0xff7f\n"	/* clear RCP_PCL_BIT in old */
		"	oihh	%1,0x0080\n"	/* set RCP_PCL_BIT in new */
		"	csg	%0,%1,%2\n"
		"	jl	0b\n"
		: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
		: "Q" (ptep[PTRS_PER_PTE]) : "cc");
#endif
	return __pgste(new);
}

static inline void rcp_unlock(pte_t *ptep)
static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
	unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
	clear_bit(RCP_PCL_BIT, pgste);
	asm(
		"	nihh	%1,0xff7f\n"	/* clear RCP_PCL_BIT */
		"	stg	%1,%0\n"
		: "=Q" (ptep[PTRS_PER_PTE])
		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc");
	preempt_enable();
#endif
}

#include <linux/page-flags.h>
static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
	unsigned long pfn, bits;
	unsigned char skey;

	pfn = pte_val(*ptep) >> PAGE_SHIFT;
	skey = page_get_storage_key(pfn);
	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
	/* Clear page changed & referenced bit in the storage key */
	if (bits) {
		skey ^= bits;
		page_set_storage_key(pfn, skey, 1);
	}
	/* Transfer page changed & referenced bit to guest bits in pgste */
	pgste_val(pgste) |= bits << 48;		/* RCP_GR_BIT & RCP_GC_BIT */
	/* Get host changed & referenced bits from pgste */
	bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52;
	/* Clear host bits in pgste. */
	pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT);
	pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT);
	/* Copy page access key and fetch protection bit to pgste */
	pgste_val(pgste) |=
		(unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
	/* Transfer changed and referenced to kvm user bits */
	pgste_val(pgste) |= bits << 45;		/* KVM_UR_BIT & KVM_UC_BIT */
	/* Transfer changed & referenced to pte sofware bits */
	pte_val(*ptep) |= bits << 1;		/* _PAGE_SWR & _PAGE_SWC */
#endif
	return pgste;

}

static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
	int young;

	young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
	/* Transfer page referenced bit to pte software bit (host view) */
	if (young || (pgste_val(pgste) & RCP_HR_BIT))
		pte_val(*ptep) |= _PAGE_SWR;
	/* Clear host referenced bit in pgste. */
	pgste_val(pgste) &= ~RCP_HR_BIT;
	/* Transfer page referenced bit to guest bit in pgste */
	pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */
#endif
	return pgste;

}

static inline void ptep_rcp_copy(pte_t *ptep)
static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
	struct page *page = virt_to_page(pte_val(*ptep));
	unsigned int skey;
	unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);

	skey = page_get_storage_key(pte_val(*ptep) >> PAGE_SHIFT);
	if (skey & _PAGE_CHANGED) {
		set_bit_simple(RCP_GC_BIT, pgste);
		set_bit_simple(KVM_UD_BIT, pgste);
	}
	if (skey & _PAGE_REFERENCED)
		set_bit_simple(RCP_GR_BIT, pgste);
	if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) {
		SetPageDirty(page);
		set_bit_simple(KVM_UD_BIT, pgste);
	}
	if (test_and_clear_bit_simple(RCP_HR_BIT, pgste))
		SetPageReferenced(page);
	unsigned long pfn;
	unsigned long okey, nkey;

	pfn = pte_val(*ptep) >> PAGE_SHIFT;
	okey = nkey = page_get_storage_key(pfn);
	nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT);
	/* Set page access key and fetch protection bit from pgste */
	nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
	if (okey != nkey)
		page_set_storage_key(pfn, nkey, 1);
#endif
}

/*
 * Certain architectures need to do special things when PTEs
 * within a page table are directly modified.  Thus, the following
 * hook is made available.
 */
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep, pte_t entry)
{
	pgste_t pgste;

	if (mm_has_pgste(mm)) {
		pgste = pgste_get_lock(ptep);
		pgste_set_pte(ptep, pgste);
		*ptep = entry;
		pgste_set_unlock(ptep, pgste);
	} else
		*ptep = entry;
}

/*
 * query functions pte_write/pte_dirty/pte_young only work if
 * pte_present() is true. Undefined behaviour if not..
@@ -587,19 +671,19 @@ static inline int pte_write(pte_t pte)

static inline int pte_dirty(pte_t pte)
{
	/* A pte is neither clean nor dirty on s/390. The dirty bit
	 * is in the storage key. See page_test_and_clear_dirty for
	 * details.
	 */
#ifdef CONFIG_PGSTE
	if (pte_val(pte) & _PAGE_SWC)
		return 1;
#endif
	return 0;
}

static inline int pte_young(pte_t pte)
{
	/* A pte is neither young nor old on s/390. The young bit
	 * is in the storage key. See page_test_and_clear_young for
	 * details.
	 */
#ifdef CONFIG_PGSTE
	if (pte_val(pte) & _PAGE_SWR)
		return 1;
#endif
	return 0;
}

@@ -607,46 +691,27 @@ static inline int pte_young(pte_t pte)
 * pgd/pmd/pte modification functions
 */

#ifndef __s390x__

#define pgd_clear(pgd)		do { } while (0)
#define pud_clear(pud)		do { } while (0)

#else /* __s390x__ */

static inline void pgd_clear_kernel(pgd_t * pgd)
static inline void pgd_clear(pgd_t *pgd)
{
#ifdef __s390x__
	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
		pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
#endif
}

static inline void pgd_clear(pgd_t * pgd)
{
	pgd_clear_kernel(pgd);
}

static inline void pud_clear_kernel(pud_t *pud)
static inline void pud_clear(pud_t *pud)
{
#ifdef __s390x__
	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
		pud_val(*pud) = _REGION3_ENTRY_EMPTY;
#endif
}

static inline void pud_clear(pud_t *pud)
{
	pud_clear_kernel(pud);
}
#endif /* __s390x__ */

static inline void pmd_clear_kernel(pmd_t * pmdp)
static inline void pmd_clear(pmd_t *pmdp)
{
	pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
}

static inline void pmd_clear(pmd_t *pmd)
{
	pmd_clear_kernel(pmd);
}

static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
@@ -679,35 +744,27 @@ static inline pte_t pte_mkwrite(pte_t pte)

static inline pte_t pte_mkclean(pte_t pte)
{
	/* The only user of pte_mkclean is the fork() code.
	   We must *not* clear the *physical* page dirty bit
	   just because fork() wants to clear the dirty bit in
	   *one* of the page's mappings.  So we just do nothing. */
#ifdef CONFIG_PGSTE
	pte_val(pte) &= ~_PAGE_SWC;
#endif
	return pte;
}

static inline pte_t pte_mkdirty(pte_t pte)
{
	/* We do not explicitly set the dirty bit because the
	 * sske instruction is slow. It is faster to let the
	 * next instruction set the dirty bit.
	 */
	return pte;
}

static inline pte_t pte_mkold(pte_t pte)
{
	/* S/390 doesn't keep its dirty/referenced bit in the pte.
	 * There is no point in clearing the real referenced bit.
	 */
#ifdef CONFIG_PGSTE
	pte_val(pte) &= ~_PAGE_SWR;
#endif
	return pte;
}

static inline pte_t pte_mkyoung(pte_t pte)
{
	/* S/390 doesn't keep its dirty/referenced bit in the pte.
	 * There is no point in setting the real referenced bit.
	 */
	return pte;
}

@@ -745,64 +802,60 @@ static inline pte_t pte_mkhuge(pte_t pte)
}
#endif

#ifdef CONFIG_PGSTE
/*
 * Get (and clear) the user dirty bit for a PTE.
 * Get (and clear) the user dirty bit for a pte.
 */
static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm,
static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
						 pte_t *ptep)
{
	int dirty;
	unsigned long *pgste;
	unsigned long pfn;
	struct page *page;
	unsigned int skey;

	if (!mm->context.has_pgste)
		return -EINVAL;
	rcp_lock(ptep);
	pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
	pfn = pte_val(*ptep) >> PAGE_SHIFT;
	page = pfn_to_page(pfn);
	skey = page_get_storage_key(pfn);
	if (skey & _PAGE_CHANGED) {
		set_bit_simple(RCP_GC_BIT, pgste);
		set_bit_simple(KVM_UD_BIT, pgste);
	}
	if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) {
		SetPageDirty(page);
		set_bit_simple(KVM_UD_BIT, pgste);
	}
	dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste);
	if (skey & _PAGE_CHANGED)
		page_set_storage_key(pfn, skey & ~_PAGE_CHANGED, 1);
	rcp_unlock(ptep);
	pgste_t pgste;
	int dirty = 0;

	if (mm_has_pgste(mm)) {
		pgste = pgste_get_lock(ptep);
		pgste = pgste_update_all(ptep, pgste);
		dirty = !!(pgste_val(pgste) & KVM_UC_BIT);
		pgste_val(pgste) &= ~KVM_UC_BIT;
		pgste_set_unlock(ptep, pgste);
		return dirty;
	}
#endif
	return dirty;
}

/*
 * Get (and clear) the user referenced bit for a pte.
 */
static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
						 pte_t *ptep)
{
	pgste_t pgste;
	int young = 0;

	if (mm_has_pgste(mm)) {
		pgste = pgste_get_lock(ptep);
		pgste = pgste_update_young(ptep, pgste);
		young = !!(pgste_val(pgste) & KVM_UR_BIT);
		pgste_val(pgste) &= ~KVM_UR_BIT;
		pgste_set_unlock(ptep, pgste);
	}
	return young;
}

#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
					    unsigned long addr, pte_t *ptep)
{
#ifdef CONFIG_PGSTE
	unsigned long pfn;
	int young;
	unsigned long *pgste;
	pgste_t pgste;
	pte_t pte;

	if (!vma->vm_mm->context.has_pgste)
		return 0;
	pfn = pte_val(*ptep) >> PAGE_SHIFT;
	pgste = (unsigned long *) (ptep + PTRS_PER_PTE);

	young = ((page_get_storage_key(pfn) & _PAGE_REFERENCED) != 0);
	rcp_lock(ptep);
	if (young)
		set_bit_simple(RCP_GR_BIT, pgste);
	young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste);
	rcp_unlock(ptep);
	return young;
#endif
	if (mm_has_pgste(vma->vm_mm)) {
		pgste = pgste_get_lock(ptep);
		pgste = pgste_update_young(ptep, pgste);
		pte = *ptep;
		*ptep = pte_mkold(pte);
		pgste_set_unlock(ptep, pgste);
		return pte_young(pte);
	}
	return 0;
}

@@ -814,10 +867,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
	 * On s390 reference bits are in storage key and never in TLB
	 * With virtualization we handle the reference bit, without we
	 * we can simply return */
#ifdef CONFIG_PGSTE
	return ptep_test_and_clear_young(vma, address, ptep);
#endif
	return 0;
}

static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
@@ -837,21 +887,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
	}
}

static inline void ptep_invalidate(struct mm_struct *mm,
				   unsigned long address, pte_t *ptep)
{
	if (mm->context.has_pgste) {
		rcp_lock(ptep);
		__ptep_ipte(address, ptep);
		ptep_rcp_copy(ptep);
		pte_val(*ptep) = _PAGE_TYPE_EMPTY;
		rcp_unlock(ptep);
		return;
	}
	__ptep_ipte(address, ptep);
	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
}

/*
 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
 * both clear the TLB for the unmapped pte. The reason is that
@@ -866,24 +901,72 @@ static inline void ptep_invalidate(struct mm_struct *mm,
 * is a nop.
 */
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
#define ptep_get_and_clear(__mm, __address, __ptep)			\
({									\
	pte_t __pte = *(__ptep);					\
	(__mm)->context.flush_mm = 1;					\
	if (atomic_read(&(__mm)->context.attach_count) > 1 ||		\
	    (__mm) != current->active_mm)				\
		ptep_invalidate(__mm, __address, __ptep);		\
	else								\
		pte_clear((__mm), (__address), (__ptep));		\
	__pte;								\
})
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
				       unsigned long address, pte_t *ptep)
{
	pgste_t pgste;
	pte_t pte;

	mm->context.flush_mm = 1;
	if (mm_has_pgste(mm))
		pgste = pgste_get_lock(ptep);

	pte = *ptep;
	if (!mm_exclusive(mm))
		__ptep_ipte(address, ptep);
	pte_val(*ptep) = _PAGE_TYPE_EMPTY;

	if (mm_has_pgste(mm)) {
		pgste = pgste_update_all(&pte, pgste);
		pgste_set_unlock(ptep, pgste);
	}
	return pte;
}

#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
					   unsigned long address,
					   pte_t *ptep)
{
	pte_t pte;

	mm->context.flush_mm = 1;
	if (mm_has_pgste(mm))
		pgste_get_lock(ptep);

	pte = *ptep;
	if (!mm_exclusive(mm))
		__ptep_ipte(address, ptep);
	return pte;
}

static inline void ptep_modify_prot_commit(struct mm_struct *mm,
					   unsigned long address,
					   pte_t *ptep, pte_t pte)
{
	*ptep = pte;
	if (mm_has_pgste(mm))
		pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE));
}

#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
				     unsigned long address, pte_t *ptep)
{
	pte_t pte = *ptep;
	ptep_invalidate(vma->vm_mm, address, ptep);
	pgste_t pgste;
	pte_t pte;

	if (mm_has_pgste(vma->vm_mm))
		pgste = pgste_get_lock(ptep);

	pte = *ptep;
	__ptep_ipte(address, ptep);
	pte_val(*ptep) = _PAGE_TYPE_EMPTY;

	if (mm_has_pgste(vma->vm_mm)) {
		pgste = pgste_update_all(&pte, pgste);
		pgste_set_unlock(ptep, pgste);
	}
	return pte;
}

@@ -896,41 +979,68 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
 */
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
					    unsigned long addr,
					    unsigned long address,
					    pte_t *ptep, int full)
{
	pte_t pte = *ptep;
	pgste_t pgste;
	pte_t pte;

	if (full)
		pte_clear(mm, addr, ptep);
	else
		ptep_invalidate(mm, addr, ptep);
	if (mm_has_pgste(mm))
		pgste = pgste_get_lock(ptep);

	pte = *ptep;
	if (!full)
		__ptep_ipte(address, ptep);
	pte_val(*ptep) = _PAGE_TYPE_EMPTY;

	if (mm_has_pgste(mm)) {
		pgste = pgste_update_all(&pte, pgste);
		pgste_set_unlock(ptep, pgste);
	}
	return pte;
}

#define __HAVE_ARCH_PTEP_SET_WRPROTECT
#define ptep_set_wrprotect(__mm, __addr, __ptep)			\
({									\
	pte_t __pte = *(__ptep);					\
	if (pte_write(__pte)) {						\
		(__mm)->context.flush_mm = 1;				\
		if (atomic_read(&(__mm)->context.attach_count) > 1 ||	\
		    (__mm) != current->active_mm)			\
			ptep_invalidate(__mm, __addr, __ptep);		\
		set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte));	\
	}								\
})
static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
				       unsigned long address, pte_t *ptep)
{
	pgste_t pgste;
	pte_t pte = *ptep;

	if (pte_write(pte)) {
		mm->context.flush_mm = 1;
		if (mm_has_pgste(mm))
			pgste = pgste_get_lock(ptep);

		if (!mm_exclusive(mm))
			__ptep_ipte(address, ptep);
		*ptep = pte_wrprotect(pte);

		if (mm_has_pgste(mm))
			pgste_set_unlock(ptep, pgste);
	}
	return pte;
}

#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty)	\
({									\
	int __changed = !pte_same(*(__ptep), __entry);			\
	if (__changed) {						\
		ptep_invalidate((__vma)->vm_mm, __addr, __ptep);	\
		set_pte_at((__vma)->vm_mm, __addr, __ptep, __entry);	\
	}								\
	__changed;							\
})
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
					unsigned long address, pte_t *ptep,
					pte_t entry, int dirty)
{
	pgste_t pgste;

	if (pte_same(*ptep, entry))
		return 0;
	if (mm_has_pgste(vma->vm_mm))
		pgste = pgste_get_lock(ptep);

	__ptep_ipte(address, ptep);
	*ptep = entry;

	if (mm_has_pgste(vma->vm_mm))
		pgste_set_unlock(ptep, pgste);
	return 1;
}

/*
 * Conversion functions: convert a page and protection to a page entry,
+2 −1
Original line number Diff line number Diff line
@@ -175,7 +175,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
		pmd = pmd_offset(pud, address);
		pte = pte_offset_kernel(pmd, address);
		if (!enable) {
			ptep_invalidate(&init_mm, address, pte);
			__ptep_ipte(address, pte);
			pte_val(*pte) = _PAGE_TYPE_EMPTY;
			continue;
		}
		*pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
Loading