Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1aea9b3f authored by Martin Schwidefsky's avatar Martin Schwidefsky
Browse files

s390/mm: implement 5 level pages tables



Add the logic to upgrade the page table for a 64-bit process to
five levels. This increases the TASK_SIZE from 8PB to 16EB-4K.

Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent 16ddcc34
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -184,7 +184,7 @@ config SCHED_OMIT_FRAME_POINTER

config PGTABLE_LEVELS
	int
	default 4
	default 5

source "init/Kconfig"

+3 −0
Original line number Diff line number Diff line
@@ -74,6 +74,7 @@ typedef struct { unsigned long pgste; } pgste_t;
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pud; } pud_t;
typedef struct { unsigned long p4d; } p4d_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef pte_t *pgtable_t;

@@ -82,12 +83,14 @@ typedef pte_t *pgtable_t;
#define pte_val(x)	((x).pte)
#define pmd_val(x)	((x).pmd)
#define pud_val(x)	((x).pud)
#define p4d_val(x)	((x).p4d)
#define pgd_val(x)      ((x).pgd)

#define __pgste(x)	((pgste_t) { (x) } )
#define __pte(x)        ((pte_t) { (x) } )
#define __pmd(x)        ((pmd_t) { (x) } )
#define __pud(x)	((pud_t) { (x) } )
#define __p4d(x)	((p4d_t) { (x) } )
#define __pgd(x)        ((pgd_t) { (x) } )
#define __pgprot(x)     ((pgprot_t) { (x) } )

+21 −4
Original line number Diff line number Diff line
@@ -51,12 +51,24 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
		return _SEGMENT_ENTRY_EMPTY;
	if (mm->context.asce_limit <= (1UL << 42))
		return _REGION3_ENTRY_EMPTY;
	if (mm->context.asce_limit <= (1UL << 53))
		return _REGION2_ENTRY_EMPTY;
	return _REGION1_ENTRY_EMPTY;
}

int crst_table_upgrade(struct mm_struct *);
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
void crst_table_downgrade(struct mm_struct *);

static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
{
	unsigned long *table = crst_table_alloc(mm);

	if (table)
		crst_table_init(table, _REGION2_ENTRY_EMPTY);
	return (p4d_t *) table;
}
#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d)

static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
	unsigned long *table = crst_table_alloc(mm);
@@ -86,9 +98,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
	crst_table_free(mm, (unsigned long *) pmd);
}

static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{
	pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d);
}

static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
	pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
	p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud);
}

static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+67 −13
Original line number Diff line number Diff line
@@ -24,7 +24,6 @@
 * the S390 page table tree.
 */
#ifndef __ASSEMBLY__
#include <asm-generic/5level-fixup.h>
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
@@ -87,12 +86,15 @@ extern unsigned long zero_page_mask;
 */
#define PMD_SHIFT	20
#define PUD_SHIFT	31
#define PGDIR_SHIFT	42
#define P4D_SHIFT	42
#define PGDIR_SHIFT	53

#define PMD_SIZE        (1UL << PMD_SHIFT)
#define PMD_MASK        (~(PMD_SIZE-1))
#define PUD_SIZE	(1UL << PUD_SHIFT)
#define PUD_MASK	(~(PUD_SIZE-1))
#define P4D_SIZE	(1UL << P4D_SHIFT)
#define P4D_MASK	(~(P4D_SIZE-1))
#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
#define PGDIR_MASK	(~(PGDIR_SIZE-1))

@@ -105,6 +107,7 @@ extern unsigned long zero_page_mask;
#define PTRS_PER_PTE	256
#define PTRS_PER_PMD	2048
#define PTRS_PER_PUD	2048
#define PTRS_PER_P4D	2048
#define PTRS_PER_PGD	2048

#define FIRST_USER_ADDRESS  0UL
@@ -115,6 +118,8 @@ extern unsigned long zero_page_mask;
	printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
#define pud_ERROR(e) \
	printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
#define p4d_ERROR(e) \
	printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e))
#define pgd_ERROR(e) \
	printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))

@@ -310,8 +315,8 @@ static inline int is_module_addr(void *addr)
#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
#endif

#define _REGION_ENTRY_BITS	 0xfffffffffffff227UL
#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL
#define _REGION_ENTRY_BITS	 0xfffffffffffff22fUL
#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL

/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
@@ -564,14 +569,14 @@ static inline void crdte(unsigned long old, unsigned long new,
 */
static inline int pgd_present(pgd_t pgd)
{
	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1)
		return 1;
	return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
}

static inline int pgd_none(pgd_t pgd)
{
	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1)
		return 0;
	return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
}
@@ -589,6 +594,28 @@ static inline int pgd_bad(pgd_t pgd)
	return (pgd_val(pgd) & mask) != 0;
}

static inline int p4d_present(p4d_t p4d)
{
	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
		return 1;
	return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL;
}

static inline int p4d_none(p4d_t p4d)
{
	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
		return 0;
	return p4d_val(p4d) == _REGION2_ENTRY_EMPTY;
}

static inline unsigned long p4d_pfn(p4d_t p4d)
{
	unsigned long origin_mask;

	origin_mask = _REGION_ENTRY_ORIGIN;
	return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT;
}

static inline int pud_present(pud_t pud)
{
	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
@@ -641,6 +668,13 @@ static inline int pud_bad(pud_t pud)
	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
}

static inline int p4d_bad(p4d_t p4d)
{
	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
		return pud_bad(__pud(p4d_val(p4d)));
	return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
}

static inline int pmd_present(pmd_t pmd)
{
	return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
@@ -794,8 +828,14 @@ static inline int pte_unused(pte_t pte)

static inline void pgd_clear(pgd_t *pgd)
{
	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
		pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
		pgd_val(*pgd) = _REGION1_ENTRY_EMPTY;
}

static inline void p4d_clear(p4d_t *p4d)
{
	if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
		p4d_val(*p4d) = _REGION2_ENTRY_EMPTY;
}

static inline void pud_clear(pud_t *pud)
@@ -1089,6 +1129,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
}

#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
@@ -1098,19 +1139,31 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)

#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)

static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
	pud_t *pud = (pud_t *) pgd;
	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
		pud = (pud_t *) pgd_deref(*pgd);
	p4d_t *p4d = (p4d_t *) pgd;

	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
		p4d = (p4d_t *) pgd_deref(*pgd);
	return p4d + p4d_index(address);
}

static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
	pud_t *pud = (pud_t *) p4d;

	if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
		pud = (pud_t *) p4d_deref(*p4d);
	return pud + pud_index(address);
}

static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
	pmd_t *pmd = (pmd_t *) pud;

	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
		pmd = (pmd_t *) pud_deref(*pud);
	return pmd + pmd_index(address);
@@ -1122,6 +1175,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)

#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
#define pud_page(pud) pfn_to_page(pud_pfn(pud))
#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d))

/* Find an entry in the lowest level page table.. */
#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
+2 −2
Original line number Diff line number Diff line
@@ -92,11 +92,11 @@ extern void execve_tail(void);
 */

#define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_31BIT) ? \
					(1UL << 31) : (1UL << 53))
					(1UL << 31) : -PAGE_SIZE)
#define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \
					(1UL << 30) : (1UL << 41))
#define TASK_SIZE		TASK_SIZE_OF(current)
#define TASK_SIZE_MAX		(1UL << 53)
#define TASK_SIZE_MAX		(-PAGE_SIZE)

#define STACK_TOP		(test_thread_flag(TIF_31BIT) ? \
					(1UL << 31) : (1UL << 42))
Loading