Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 98104c34 authored by Konrad Rzeszutek Wilk's avatar Konrad Rzeszutek Wilk
Browse files

Merge branch 'stable/128gb.v5.1' into stable/for-linus-3.7



* stable/128gb.v5.1:
  xen/mmu: If the revector fails, don't attempt to revector anything else.
  xen/p2m: When revectoring deal with holes in the P2M array.
  xen/mmu: Release just the MFN list, not MFN list and part of pagetables.
  xen/mmu: Remove from __ka space PMD entries for pagetables.
  xen/mmu: Copy and revector the P2M tree.
  xen/p2m: Add logic to revector a P2M tree to use __va leafs.
  xen/mmu: Recycle the Xen provided L4, L3, and L2 pages
  xen/mmu: For 64-bit do not call xen_map_identity_early
  xen/mmu: use copy_page instead of memcpy.
  xen/mmu: Provide comments describing the _ka and _va aliasing issue
  xen/mmu: The xen_setup_kernel_pagetable doesn't need to return anything.
  Revert "xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain." and "xen/x86: Use memblock_reserve for sensitive areas."
  xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain.
  xen/x86: Use memblock_reserve for sensitive areas.
  xen/p2m: Fix the comment describing the P2M tree.

Conflicts:
	arch/x86/xen/mmu.c

The pagetable_init is the old xen_pagetable_setup_done and xen_pagetable_setup_start
rolled in one.

Signed-off-by: default avatarKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
parents 25a765b7 32873187
Loading
Loading
Loading
Loading
+1 −4
Original line number Diff line number Diff line
@@ -1290,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void)
{
	struct physdev_set_iopl set_iopl;
	int rc;
	pgd_t *pgd;

	if (!xen_start_info)
		return;
@@ -1382,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void)
	acpi_numa = -1;
#endif

	pgd = (pgd_t *)xen_start_info->pt_base;

	/* Don't do the full vcpu_info placement stuff until we have a
	   possible map and a non-dummy shared_info. */
	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
@@ -1392,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void)
	early_boot_irqs_disabled = true;

	xen_raw_console_write("mapping kernel into physical memory\n");
	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
	xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);

	/* Allocate and initialize top and mid mfn levels for p2m structure */
	xen_build_mfn_list_list();
+148 −35
Original line number Diff line number Diff line
@@ -84,6 +84,7 @@
 */
DEFINE_SPINLOCK(xen_reservation_lock);

#ifdef CONFIG_X86_32
/*
 * Identity map, in addition to plain kernel map.  This needs to be
 * large enough to allocate page table pages to allocate the rest.
@@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock);
 */
#define LEVEL1_IDENT_ENTRIES	(PTRS_PER_PTE * 4)
static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);

#endif
#ifdef CONFIG_X86_64
/* l3 pud for userspace vsyscall mapping */
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
@@ -1176,13 +1177,6 @@ static void xen_exit_mmap(struct mm_struct *mm)

static void xen_post_allocator_init(void);

static void __init xen_pagetable_init(void)
{
	paging_init();
	xen_setup_shared_info();
	xen_post_allocator_init();
}

static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
{
	/* reserve the range used */
@@ -1197,6 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
	}
}

#ifdef CONFIG_X86_64
static void __init xen_cleanhighmap(unsigned long vaddr,
				    unsigned long vaddr_end)
{
	unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
	pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);

	/* NOTE: The loop is more greedy than the cleanup_highmap variant.
	 * We include the PMD passed in on _both_ boundaries. */
	for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
			pmd++, vaddr += PMD_SIZE) {
		if (pmd_none(*pmd))
			continue;
		if (vaddr < (unsigned long) _text || vaddr > kernel_end)
			set_pmd(pmd, __pmd(0));
	}
	/* In case we did something silly, we should crash in this function
	 * instead of somewhere later and be confusing. */
	xen_mc_flush();
}
#endif
static void __init xen_pagetable_init(void)
{
#ifdef CONFIG_X86_64
	unsigned long size;
	unsigned long addr;
#endif
	paging_init();
	xen_setup_shared_info();
#ifdef CONFIG_X86_64
	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
		unsigned long new_mfn_list;

		size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));

		/* On 32-bit, we get zero so this never gets executed. */
		new_mfn_list = xen_revector_p2m_tree();
		if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) {
			/* using __ka address and sticking INVALID_P2M_ENTRY! */
			memset((void *)xen_start_info->mfn_list, 0xff, size);

			/* We should be in __ka space. */
			BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
			addr = xen_start_info->mfn_list;
			/* We roundup to the PMD, which means that if anybody at this stage is
			 * using the __ka address of xen_start_info or xen_start_info->shared_info
			 * they are in going to crash. Fortunatly we have already revectored
			 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
			size = roundup(size, PMD_SIZE);
			xen_cleanhighmap(addr, addr + size);

			size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
			memblock_free(__pa(xen_start_info->mfn_list), size);
			/* And revector! Bye bye old array */
			xen_start_info->mfn_list = new_mfn_list;
		} else
			goto skip;
	}
	/* At this stage, cleanup_highmap has already cleaned __ka space
	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
	 * the ramdisk). We continue on, erasing PMD entries that point to page
	 * tables - do note that they are accessible at this stage via __va.
	 * For good measure we also round up to the PMD - which means that if
	 * anybody is using __ka address to the initial boot-stack - and try
	 * to use it - they are going to crash. The xen_start_info has been
	 * taken care of already in xen_setup_kernel_pagetable. */
	addr = xen_start_info->pt_base;
	size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);

	xen_cleanhighmap(addr, addr + size);
	xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
#ifdef DEBUG
	/* This is superflous and is not neccessary, but you know what
	 * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
	 * anything at this stage. */
	xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
#endif
skip:
#endif
	xen_post_allocator_init();
}
static void xen_write_cr2(unsigned long cr2)
{
	this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
@@ -1652,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
		BUG();
}

#ifdef CONFIG_X86_32
static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
{
	unsigned pmdidx, pteidx;
@@ -1703,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)

	set_page_prot(pmd, PAGE_KERNEL_RO);
}

#endif
void __init xen_setup_machphys_mapping(void)
{
	struct xen_machphys_mapping mapping;
@@ -1731,7 +1806,20 @@ static void convert_pfn_mfn(void *v)
	for (i = 0; i < PTRS_PER_PTE; i++)
		pte[i] = xen_make_pte(pte[i].pte);
}

static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
				 unsigned long addr)
{
	if (*pt_base == PFN_DOWN(__pa(addr))) {
		set_page_prot((void *)addr, PAGE_KERNEL);
		clear_page((void *)addr);
		(*pt_base)++;
	}
	if (*pt_end == PFN_DOWN(__pa(addr))) {
		set_page_prot((void *)addr, PAGE_KERNEL);
		clear_page((void *)addr);
		(*pt_end)--;
	}
}
/*
 * Set up the initial kernel pagetable.
 *
@@ -1743,11 +1831,13 @@ static void convert_pfn_mfn(void *v)
 * of the physical mapping once some sort of allocator has been set
 * up.
 */
pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
					 unsigned long max_pfn)
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{
	pud_t *l3;
	pmd_t *l2;
	unsigned long addr[3];
	unsigned long pt_base, pt_end;
	unsigned i;

	/* max_pfn_mapped is the last pfn mapped in the initial memory
	 * mappings. Considering that on Xen after the kernel mappings we
@@ -1755,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
	 * set max_pfn_mapped to the last real pfn mapped. */
	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));

	pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
	pt_end = pt_base + xen_start_info->nr_pt_frames;

	/* Zap identity mapping */
	init_level4_pgt[0] = __pgd(0);

	/* Pre-constructed entries are in pfn, so convert to mfn */
	/* L4[272] -> level3_ident_pgt
	 * L4[511] -> level3_kernel_pgt */
	convert_pfn_mfn(init_level4_pgt);

	/* L3_i[0] -> level2_ident_pgt */
	convert_pfn_mfn(level3_ident_pgt);
	/* L3_k[510] -> level2_kernel_pgt
	 * L3_i[511] -> level2_fixmap_pgt */
	convert_pfn_mfn(level3_kernel_pgt);

	/* We get [511][511] and have Xen's version of level2_kernel_pgt */
	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);

	memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
	memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);

	addr[0] = (unsigned long)pgd;
	addr[1] = (unsigned long)l3;
	addr[2] = (unsigned long)l2;
	/* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
	 * Both L4[272][0] and L4[511][511] have entries that point to the same
	 * L2 (PMD) tables. Meaning that if you modify it in __va space
	 * it will be also modified in the __ka space! (But if you just
	 * modify the PMD table to point to other PTE's or none, then you
	 * are OK - which is what cleanup_highmap does) */
	copy_page(level2_ident_pgt, l2);
	/* Graft it onto L4[511][511] */
	copy_page(level2_kernel_pgt, l2);

	/* Get [511][510] and graft that in level2_fixmap_pgt */
	l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);

	/* Set up identity map */
	xen_map_identity_early(level2_ident_pgt, max_pfn);
	copy_page(level2_fixmap_pgt, l2);
	/* Note that we don't do anything with level1_fixmap_pgt which
	 * we don't need. */

	/* Make pagetable pieces RO */
	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
	set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);

@@ -1791,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
	/* Unpin Xen-provided one */
	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));

	/* Switch over */
	pgd = init_level4_pgt;

	/*
	 * At this stage there can be no user pgd, and no page
	 * structure to attach it to, so make sure we just set kernel
	 * pgd.
	 */
	xen_mc_batch();
	__xen_write_cr3(true, __pa(pgd));
	__xen_write_cr3(true, __pa(init_level4_pgt));
	xen_mc_issue(PARAVIRT_LAZY_CPU);

	memblock_reserve(__pa(xen_start_info->pt_base),
			 xen_start_info->nr_pt_frames * PAGE_SIZE);
	/* We can't that easily rip out L3 and L2, as the Xen pagetables are
	 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ...  for
	 * the initial domain. For guests using the toolstack, they are in:
	 * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only
	 * rip out the [L4] (pgd), but for guests we shave off three pages.
	 */
	for (i = 0; i < ARRAY_SIZE(addr); i++)
		check_pt_base(&pt_base, &pt_end, addr[i]);

	return pgd;
	/* Our (by three pages) smaller Xen pagetable that we are using */
	memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
	/* Revector the xen_start_info */
	xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
}
#else	/* !CONFIG_X86_64 */
static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
@@ -1831,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
	 */
	swapper_kernel_pmd =
		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
	memcpy(swapper_kernel_pmd, initial_kernel_pmd,
	       sizeof(pmd_t) * PTRS_PER_PMD);
	copy_page(swapper_kernel_pmd, initial_kernel_pmd);
	swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
		__pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
	set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
@@ -1849,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
	pv_mmu_ops.write_cr3 = &xen_write_cr3;
}

pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
					 unsigned long max_pfn)
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{
	pmd_t *kernel_pmd;

@@ -1862,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
				  512*1024);

	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
	memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
	copy_page(initial_kernel_pmd, kernel_pmd);

	xen_map_identity_early(initial_kernel_pmd, max_pfn);

	memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
	copy_page(initial_page_table, pgd);
	initial_page_table[KERNEL_PGD_BOUNDARY] =
		__pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);

@@ -1882,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,

	memblock_reserve(__pa(xen_start_info->pt_base),
			 xen_start_info->nr_pt_frames * PAGE_SIZE);

	return initial_page_table;
}
#endif	/* CONFIG_X86_64 */

+85 −7
Original line number Diff line number Diff line
@@ -139,11 +139,11 @@
 *      /    | ~0, ~0, ....  |
 *     |     \---------------/
 *     |
 *     p2m_missing             p2m_missing
 * /------------------\     /------------\
 * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
 * | [p2m_mid_missing]+---->| ..., ~0    |
 * \------------------/     \------------/
 *   p2m_mid_missing           p2m_missing
 * /-----------------\     /------------\
 * | [p2m_missing]   +---->| ~0, ~0, ~0 |
 * | [p2m_missing]   +---->| ..., ~0    |
 * \-----------------/     \------------/
 *
 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
 */
@@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void)

	m2p_override_init();
}
#ifdef CONFIG_X86_64
#include <linux/bootmem.h>
unsigned long __init xen_revector_p2m_tree(void)
{
	unsigned long va_start;
	unsigned long va_end;
	unsigned long pfn;
	unsigned long pfn_free = 0;
	unsigned long *mfn_list = NULL;
	unsigned long size;

	va_start = xen_start_info->mfn_list;
	/*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
	 * so make sure it is rounded up to that */
	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
	va_end = va_start + size;

	/* If we were revectored already, don't do it again. */
	if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
		return 0;

	mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
	if (!mfn_list) {
		pr_warn("Could not allocate space for a new P2M tree!\n");
		return xen_start_info->mfn_list;
	}
	/* Fill it out with INVALID_P2M_ENTRY value */
	memset(mfn_list, 0xFF, size);

	for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
		unsigned topidx = p2m_top_index(pfn);
		unsigned mididx;
		unsigned long *mid_p;

		if (!p2m_top[topidx])
			continue;

		if (p2m_top[topidx] == p2m_mid_missing)
			continue;

		mididx = p2m_mid_index(pfn);
		mid_p = p2m_top[topidx][mididx];
		if (!mid_p)
			continue;
		if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
			continue;

		if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
			continue;

		/* The old va. Rebase it on mfn_list */
		if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
			unsigned long *new;

			if (pfn_free  > (size / sizeof(unsigned long))) {
				WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
				     size / sizeof(unsigned long), pfn_free);
				return 0;
			}
			new = &mfn_list[pfn_free];

			copy_page(new, mid_p);
			p2m_top[topidx][mididx] = &mfn_list[pfn_free];
			p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);

			pfn_free += P2M_PER_PAGE;

		}
		/* This should be the leafs allocated for identity from _brk. */
	}
	return (unsigned long)mfn_list;

}
#else
unsigned long __init xen_revector_p2m_tree(void)
{
	return 0;
}
#endif
unsigned long get_phys_to_machine(unsigned long pfn)
{
	unsigned topidx, mididx, idx;
+18 −0
Original line number Diff line number Diff line
@@ -431,6 +431,24 @@ char * __init xen_memory_setup(void)
	 *  - mfn_list
	 *  - xen_start_info
	 * See comment above "struct start_info" in <xen/interface/xen.h>
	 * We tried to make the the memblock_reserve more selective so
	 * that it would be clear what region is reserved. Sadly we ran
	 * in the problem wherein on a 64-bit hypervisor with a 32-bit
	 * initial domain, the pt_base has the cr3 value which is not
	 * neccessarily where the pagetable starts! As Jan put it: "
	 * Actually, the adjustment turns out to be correct: The page
	 * tables for a 32-on-64 dom0 get allocated in the order "first L1",
	 * "first L2", "first L3", so the offset to the page table base is
	 * indeed 2. When reading xen/include/public/xen.h's comment
	 * very strictly, this is not a violation (since there nothing is said
	 * that the first thing in the page table space is pointed to by
	 * pt_base; I admit that this seems to be implied though, namely
	 * do I think that it is implied that the page table space is the
	 * range [pt_base, pt_base + nt_pt_frames), whereas that
	 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
	 * which - without a priori knowledge - the kernel would have
	 * difficulty to figure out)." - so lets just fall back to the
	 * easy way and reserve the whole region.
	 */
	memblock_reserve(__pa(xen_start_info->mfn_list),
			 xen_start_info->pt_base - xen_start_info->mfn_list);
+2 −1
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void);
void xen_setup_shared_info(void);
void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);
pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_reserve_top(void);
extern unsigned long xen_max_p2m_pfn;

@@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void);
void xen_unplug_emulated_devices(void);

void __init xen_build_dynamic_phys_to_machine(void);
unsigned long __init xen_revector_p2m_tree(void);

void xen_init_irq_ops(void);
void xen_setup_timer(int cpu);