Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0c5e1577 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'stable/bug-fixes-for-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen

* 'stable/bug-fixes-for-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  x86/mm: Fix section mismatch derived from native_pagetable_reserve()
  x86,xen: introduce x86_init.mapping.pagetable_reserve
  Revert "xen/mmu: Add workaround "x86-64, mm: Put early page table high""
parents 982b2035 53f8023f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);

extern void native_pagetable_reserve(u64 start, u64 end);
#ifdef CONFIG_X86_32
extern void native_pagetable_setup_start(pgd_t *base);
extern void native_pagetable_setup_done(pgd_t *base);
+12 −0
Original line number Diff line number Diff line
@@ -67,6 +67,17 @@ struct x86_init_oem {
	void (*banner)(void);
};

/**
 * struct x86_init_mapping - platform specific initial kernel pagetable setup
 * @pagetable_reserve:	reserve a range of addresses for kernel pagetable usage
 *
 * For more details on the purpose of this hook, look in
 * init_memory_mapping and the commit that added it.
 */
struct x86_init_mapping {
	void (*pagetable_reserve)(u64 start, u64 end);
};

/**
 * struct x86_init_paging - platform specific paging functions
 * @pagetable_setup_start:	platform specific pre paging_init() call
@@ -123,6 +134,7 @@ struct x86_init_ops {
	struct x86_init_mpparse		mpparse;
	struct x86_init_irqs		irqs;
	struct x86_init_oem		oem;
	struct x86_init_mapping		mapping;
	struct x86_init_paging		paging;
	struct x86_init_timers		timers;
	struct x86_init_iommu		iommu;
+4 −0
Original line number Diff line number Diff line
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
		.banner			= default_banner,
	},

	.mapping = {
		.pagetable_reserve		= native_pagetable_reserve,
	},

	.paging = {
		.pagetable_setup_start	= native_pagetable_setup_start,
		.pagetable_setup_done	= native_pagetable_setup_done,
+22 −2
Original line number Diff line number Diff line
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
		end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
}

void __init native_pagetable_reserve(u64 start, u64 end)
{
	memblock_x86_reserve_range(start, end, "PGTABLE");
}

struct map_range {
	unsigned long start;
	unsigned long end;
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,

	__flush_tlb_all();

	/*
	 * Reserve the kernel pagetable pages we used (pgt_buf_start -
	 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
	 * so that they can be reused for other purposes.
	 *
	 * On native it just means calling memblock_x86_reserve_range, on Xen it
	 * also means marking RW the pagetable pages that we allocated before
	 * but that haven't been used.
	 *
	 * In fact on xen we mark RO the whole range pgt_buf_start -
	 * pgt_buf_top, because we have to make sure that when
	 * init_memory_mapping reaches the pagetable pages area, it maps
	 * RO all the pagetable pages, including the ones that are beyond
	 * pgt_buf_end at that time.
	 */
	if (!after_bootmem && pgt_buf_end > pgt_buf_start)
		memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
				 pgt_buf_end << PAGE_SHIFT, "PGTABLE");
		x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
				PFN_PHYS(pgt_buf_end));

	if (!after_bootmem)
		early_memtest(start, end);
+15 −123
Original line number Diff line number Diff line
@@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
{
}

static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
{
	/* reserve the range used */
	native_pagetable_reserve(start, end);

	/* set as RW the rest */
	printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
			PFN_PHYS(pgt_buf_top));
	while (end < PFN_PHYS(pgt_buf_top)) {
		make_lowmem_page_readwrite(__va(end));
		end += PAGE_SIZE;
	}
}

static void xen_post_allocator_init(void);

static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -1463,119 +1477,6 @@ static int xen_pgd_alloc(struct mm_struct *mm)
	return ret;
}

#ifdef CONFIG_X86_64
static __initdata u64 __last_pgt_set_rw = 0;
static __initdata u64 __pgt_buf_start = 0;
static __initdata u64 __pgt_buf_end = 0;
static __initdata u64 __pgt_buf_top = 0;
/*
 * As a consequence of the commit:
 * 
 * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
 * Author: Yinghai Lu <yinghai@kernel.org>
 * Date:   Fri Dec 17 16:58:28 2010 -0800
 * 
 *     x86-64, mm: Put early page table high
 * 
 * at some point init_memory_mapping is going to reach the pagetable pages
 * area and map those pages too (mapping them as normal memory that falls
 * in the range of addresses passed to init_memory_mapping as argument).
 * Some of those pages are already pagetable pages (they are in the range
 * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
 * everything is fine.
 * Some of these pages are not pagetable pages yet (they fall in the range
 * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
 * are going to be mapped RW.  When these pages become pagetable pages and
 * are hooked into the pagetable, xen will find that the guest has already
 * a RW mapping of them somewhere and fail the operation.
 * The reason Xen requires pagetables to be RO is that the hypervisor needs
 * to verify that the pagetables are valid before using them. The validation
 * operations are called "pinning".
 * 
 * In order to fix the issue we mark all the pages in the entire range
 * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
 * is completed only the range pgt_buf_start-pgt_buf_end is reserved by
 * init_memory_mapping. Hence the kernel is going to crash as soon as one
 * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
 * ranges are RO).
 * 
 * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_
 * the init_memory_mapping has completed (in a perfect world we would
 * call this function from init_memory_mapping, but lets ignore that).
 * 
 * Because we are called _after_ init_memory_mapping the pgt_buf_[start,
 * end,top] have all changed to new values (b/c init_memory_mapping
 * is called and setting up another new page-table). Hence, the first time
 * we enter this function, we save away the pgt_buf_start value and update
 * the pgt_buf_[end,top].
 * 
 * When we detect that the "old" pgt_buf_start through pgt_buf_end
 * PFNs have been reserved (so memblock_x86_reserve_range has been called),
 * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top.
 * 
 * And then we update those "old" pgt_buf_[end|top] with the new ones
 * so that we can redo this on the next pagetable.
 */
static __init void mark_rw_past_pgt(void) {

	if (pgt_buf_end > pgt_buf_start) {
		u64 addr, size;

		/* Save it away. */
		if (!__pgt_buf_start) {
			__pgt_buf_start = pgt_buf_start;
			__pgt_buf_end = pgt_buf_end;
			__pgt_buf_top = pgt_buf_top;
			return;
		}
		/* If we get the range that starts at __pgt_buf_end that means
		 * the range is reserved, and that in 'init_memory_mapping'
		 * the 'memblock_x86_reserve_range' has been called with the
		 * outdated __pgt_buf_start, __pgt_buf_end (the "new"
		 * pgt_buf_[start|end|top] refer now to a new pagetable.
		 * Note: we are called _after_ the pgt_buf_[..] have been
		 * updated.*/

		addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start),
						       &size, PAGE_SIZE);

		/* Still not reserved, meaning 'memblock_x86_reserve_range'
		 * hasn't been called yet. Update the _end and _top.*/
		if (addr == PFN_PHYS(__pgt_buf_start)) {
			__pgt_buf_end = pgt_buf_end;
			__pgt_buf_top = pgt_buf_top;
			return;
		}

		/* OK, the area is reserved, meaning it is time for us to
		 * set RW for the old end->top PFNs. */

		/* ..unless we had already done this. */
		if (__pgt_buf_end == __last_pgt_set_rw)
			return;

		addr = PFN_PHYS(__pgt_buf_end);
		
		/* set as RW the rest */
		printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n",
			PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top));
		
		while (addr < PFN_PHYS(__pgt_buf_top)) {
			make_lowmem_page_readwrite(__va(addr));
			addr += PAGE_SIZE;
		}
		/* And update everything so that we are ready for the next
		 * pagetable (the one created for regions past 4GB) */
		__last_pgt_set_rw = __pgt_buf_end;
		__pgt_buf_start = pgt_buf_start;
		__pgt_buf_end = pgt_buf_end;
		__pgt_buf_top = pgt_buf_top;
	}
	return;
}
#else
static __init void mark_rw_past_pgt(void) { }
#endif
static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
#ifdef CONFIG_X86_64
@@ -1601,14 +1502,6 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
{
	unsigned long pfn = pte_pfn(pte);

	/*
	 * A bit of optimization. We do not need to call the workaround
	 * when xen_set_pte_init is called with a PTE with 0 as PFN.
	 * That is b/c the pagetable at that point are just being populated
	 * with empty values and we can save some cycles by not calling
	 * the 'memblock' code.*/
	if (pfn)
		mark_rw_past_pgt();
	/*
	 * If the new pfn is within the range of the newly allocated
	 * kernel pagetable, and it isn't being mapped into an
@@ -2118,8 +2011,6 @@ __init void xen_ident_map_ISA(void)

static __init void xen_post_allocator_init(void)
{
	mark_rw_past_pgt();

#ifdef CONFIG_XEN_DEBUG
	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
#endif
@@ -2228,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {

void __init xen_init_mmu_ops(void)
{
	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
	pv_mmu_ops = xen_mmu_ops;