Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 907cd439 authored by Xiong Zhang's avatar Xiong Zhang Committed by Ingo Molnar
Browse files

x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d



Split these helpers into a couple of per-level functions and add support for
an additional page table level.

Signed-off-by: default avatarXiong Zhang <xiong.y.zhang@intel.com>
[ Split off into separate patch ]
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20170317185515.8636-6-kirill.shutemov@linux.intel.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent d691a3cf
Loading
Loading
Loading
Loading
+149 −96
Original line number Diff line number Diff line
@@ -593,6 +593,64 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
}
#endif	/* CONFIG_PGTABLE_LEVELS == 4 */

static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
		bool last, unsigned long limit)
{
	int i, nr, flush = 0;

	nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
	for (i = 0; i < nr; i++) {
		if (!pmd_none(pmd[i]))
			flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
	}
	return flush;
}

static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
		bool last, unsigned long limit)
{
	int i, nr, flush = 0;

	nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
	for (i = 0; i < nr; i++) {
		pmd_t *pmd;

		if (pud_none(pud[i]))
			continue;

		pmd = pmd_offset(&pud[i], 0);
		if (PTRS_PER_PMD > 1)
			flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
		flush |= xen_pmd_walk(mm, pmd, func,
				last && i == nr - 1, limit);
	}
	return flush;
}

static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
		bool last, unsigned long limit)
{
	int i, nr, flush = 0;

	nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
	for (i = 0; i < nr; i++) {
		pud_t *pud;

		if (p4d_none(p4d[i]))
			continue;

		pud = pud_offset(&p4d[i], 0);
		if (PTRS_PER_PUD > 1)
			flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
		flush |= xen_pud_walk(mm, pud, func,
				last && i == nr - 1, limit);
	}
	return flush;
}

/*
 * (Yet another) pagetable walker.  This one is intended for pinning a
 * pagetable.  This means that it walks a pagetable and calls the
@@ -613,10 +671,8 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
				      enum pt_level),
			  unsigned long limit)
{
	int flush = 0;
	int i, nr, flush = 0;
	unsigned hole_low, hole_high;
	unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
	unsigned pgdidx, pudidx, pmdidx;

	/* The limit is the last byte to be touched */
	limit--;
@@ -633,65 +689,22 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
	hole_low = pgd_index(USER_LIMIT);
	hole_high = pgd_index(PAGE_OFFSET);

	pgdidx_limit = pgd_index(limit);
#if PTRS_PER_PUD > 1
	pudidx_limit = pud_index(limit);
#else
	pudidx_limit = 0;
#endif
#if PTRS_PER_PMD > 1
	pmdidx_limit = pmd_index(limit);
#else
	pmdidx_limit = 0;
#endif

	for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
		pud_t *pud;
	nr = pgd_index(limit) + 1;
	for (i = 0; i < nr; i++) {
		p4d_t *p4d;

		if (pgdidx >= hole_low && pgdidx < hole_high)
		if (i >= hole_low && i < hole_high)
			continue;

		if (!pgd_val(pgd[pgdidx]))
		if (pgd_none(pgd[i]))
			continue;

		pud = pud_offset(&pgd[pgdidx], 0);

		if (PTRS_PER_PUD > 1) /* not folded */
			flush |= (*func)(mm, virt_to_page(pud), PT_PUD);

		for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
			pmd_t *pmd;

			if (pgdidx == pgdidx_limit &&
			    pudidx > pudidx_limit)
				goto out;

			if (pud_none(pud[pudidx]))
				continue;

			pmd = pmd_offset(&pud[pudidx], 0);

			if (PTRS_PER_PMD > 1) /* not folded */
				flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);

			for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
				struct page *pte;

				if (pgdidx == pgdidx_limit &&
				    pudidx == pudidx_limit &&
				    pmdidx > pmdidx_limit)
					goto out;

				if (pmd_none(pmd[pmdidx]))
					continue;

				pte = pmd_page(pmd[pmdidx]);
				flush |= (*func)(mm, pte, PT_PTE);
			}
		}
		p4d = p4d_offset(&pgd[i], 0);
		if (PTRS_PER_P4D > 1)
			flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
		flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
	}

out:
	/* Do the top level last, so that the callbacks can use it as
	   a cue to do final things like tlb flushes. */
	flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
@@ -1150,57 +1163,97 @@ static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
	xen_free_ro_pages(pa, PAGE_SIZE);
}

/*
 * Since it is well isolated we can (and since it is perhaps large we should)
 * also free the page tables mapping the initial P->M table.
 */
static void __init xen_cleanmfnmap(unsigned long vaddr)
static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
{
	unsigned long va = vaddr & PMD_MASK;
	unsigned long pa;
	pgd_t *pgd = pgd_offset_k(va);
	pud_t *pud_page = pud_offset(pgd, 0);
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	unsigned int i;
	bool unpin;
	pte_t *pte_tbl;
	int i;

	unpin = (vaddr == 2 * PGDIR_SIZE);
	set_pgd(pgd, __pgd(0));
	do {
		pud = pud_page + pud_index(va);
		if (pud_none(*pud)) {
			va += PUD_SIZE;
		} else if (pud_large(*pud)) {
			pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
			xen_free_ro_pages(pa, PUD_SIZE);
			va += PUD_SIZE;
		} else {
			pmd = pmd_offset(pud, va);
	if (pmd_large(*pmd)) {
		pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
		xen_free_ro_pages(pa, PMD_SIZE);
			} else if (!pmd_none(*pmd)) {
				pte = pte_offset_kernel(pmd, va);
				set_pmd(pmd, __pmd(0));
				for (i = 0; i < PTRS_PER_PTE; ++i) {
					if (pte_none(pte[i]))
						break;
					pa = pte_pfn(pte[i]) << PAGE_SHIFT;
		return;
	}

	pte_tbl = pte_offset_kernel(pmd, 0);
	for (i = 0; i < PTRS_PER_PTE; i++) {
		if (pte_none(pte_tbl[i]))
			continue;
		pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
		xen_free_ro_pages(pa, PAGE_SIZE);
	}
				xen_cleanmfnmap_free_pgtbl(pte, unpin);
	set_pmd(pmd, __pmd(0));
	xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
}

static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
{
	unsigned long pa;
	pmd_t *pmd_tbl;
	int i;

	if (pud_large(*pud)) {
		pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
		xen_free_ro_pages(pa, PUD_SIZE);
		return;
	}
			va += PMD_SIZE;
			if (pmd_index(va))

	pmd_tbl = pmd_offset(pud, 0);
	for (i = 0; i < PTRS_PER_PMD; i++) {
		if (pmd_none(pmd_tbl[i]))
			continue;
		xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
	}
	set_pud(pud, __pud(0));
			xen_cleanmfnmap_free_pgtbl(pmd, unpin);
	xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
}

static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
{
	unsigned long pa;
	pud_t *pud_tbl;
	int i;

	if (p4d_large(*p4d)) {
		pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
		xen_free_ro_pages(pa, P4D_SIZE);
		return;
	}

	} while (pud_index(va) || pmd_index(va));
	xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
	pud_tbl = pud_offset(p4d, 0);
	for (i = 0; i < PTRS_PER_PUD; i++) {
		if (pud_none(pud_tbl[i]))
			continue;
		xen_cleanmfnmap_pud(pud_tbl + i, unpin);
	}
	set_p4d(p4d, __p4d(0));
	xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
}

/*
 * Since it is well isolated we can (and since it is perhaps large we should)
 * also free the page tables mapping the initial P->M table.
 */
static void __init xen_cleanmfnmap(unsigned long vaddr)
{
	pgd_t *pgd;
	p4d_t *p4d;
	unsigned int i;
	bool unpin;

	unpin = (vaddr == 2 * PGDIR_SIZE);
	vaddr &= PMD_MASK;
	pgd = pgd_offset_k(vaddr);
	p4d = p4d_offset(pgd, 0);
	for (i = 0; i < PTRS_PER_P4D; i++) {
		if (p4d_none(p4d[i]))
			continue;
		xen_cleanmfnmap_p4d(p4d + i, unpin);
	}
	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
		set_pgd(pgd, __pgd(0));
		xen_cleanmfnmap_free_pgtbl(p4d, unpin);
	}
}

static void __init xen_pagetable_p2m_free(void)
+1 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@

enum pt_level {
	PT_PGD,
	PT_P4D,
	PT_PUD,
	PT_PMD,
	PT_PTE