Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 520045db authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branches 'upstream/xenfs' and 'upstream/core' of...

Merge branches 'upstream/xenfs' and 'upstream/core' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen

* 'upstream/xenfs' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen:
  xen/privcmd: make privcmd visible in domU
  xen/privcmd: move remap_domain_mfn_range() to core xen code and export.
  privcmd: MMAPBATCH: Fix error handling/reporting
  xenbus: export xen_store_interface for xenfs
  xen/privcmd: make sure vma is ours before doing anything to it
  xen/privcmd: print SIGBUS faults
  xen/xenfs: set_page_dirty is supposed to return true if it dirties
  xen/privcmd: create address space to allow writable mmaps
  xen: add privcmd driver
  xen: add variable hypercall caller
  xen: add xen_set_domain_pte()
  xen: add /proc/xen/xsd_{kva,port} to xenfs

* 'upstream/core' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen: (29 commits)
  xen: include xen/xen.h for definition of xen_initial_domain()
  xen: use host E820 map for dom0
  xen: correctly rebuild mfn list list after migration.
  xen: improvements to VIRQ_DEBUG output
  xen: set up IRQ before binding virq to evtchn
  xen: ensure that all event channels start off bound to VCPU 0
  xen/hvc: only notify if we actually sent something
  xen: don't add extra_pages for RAM after mem_end
  xen: add support for PAT
  xen: make sure xen_max_p2m_pfn is up to date
  xen: limit extra memory to a certain ratio of base
  xen: add extra pages for E820 RAM regions, even if beyond mem_end
  xen: make sure xen_extra_mem_start is beyond all non-RAM e820
  xen: implement "extra" memory to reserve space for pages not present at boot
  xen: Use host-provided E820 map
  xen: don't map missing memory
  xen: defer building p2m mfn structures until kernel is mapped
  xen: add return value to set_phys_to_machine()
  xen: convert p2m to a 3 level tree
  xen: make install_p2mtop_page() static
  ...

Fix up trivial conflict in arch/x86/xen/mmu.c, and fix the use of
'reserve_early()' - in the new memblock world order it is now
'memblock_x86_reserve_range()' instead. Pointed out by Jeremy.
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -200,6 +200,23 @@ extern struct { char _entry[32]; } hypercall_page[];
	(type)__res;							\
})

static inline long
privcmd_call(unsigned call,
	     unsigned long a1, unsigned long a2,
	     unsigned long a3, unsigned long a4,
	     unsigned long a5)
{
	__HYPERCALL_DECLS;
	__HYPERCALL_5ARG(a1, a2, a3, a4, a5);

	asm volatile("call *%[call]"
		     : __HYPERCALL_5PARAM
		     : [call] "a" (&hypercall_page[call])
		     : __HYPERCALL_CLOBBER5);

	return (long)__res;
}

static inline int
HYPERVISOR_set_trap_table(struct trap_info *table)
{
+10 −2
Original line number Diff line number Diff line
@@ -37,14 +37,21 @@ typedef struct xpaddr {


extern unsigned long get_phys_to_machine(unsigned long pfn);
extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);

static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
	unsigned long mfn;

	if (xen_feature(XENFEAT_auto_translated_physmap))
		return pfn;

	return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
	mfn = get_phys_to_machine(pfn);

	if (mfn != INVALID_P2M_ENTRY)
		mfn &= ~FOREIGN_FRAME_BIT;

	return mfn;
}

static inline int phys_to_machine_mapping_valid(unsigned long pfn)
@@ -159,6 +166,7 @@ static inline pte_t __pte_ma(pteval_t x)

#define pgd_val_ma(x)	((x).pgd)

void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);

xmaddr_t arbitrary_virt_to_machine(void *address);
unsigned long arbitrary_virt_to_mfn(void *vaddr);
+4 −7
Original line number Diff line number Diff line
@@ -19,15 +19,12 @@ config XEN_PVHVM
	depends on X86_LOCAL_APIC

config XEN_MAX_DOMAIN_MEMORY
       int "Maximum allowed size of a domain in gigabytes"
       default 8 if X86_32
       default 32 if X86_64
       int
       default 128
       depends on XEN
       help
         The pseudo-physical to machine address array is sized
         according to the maximum possible memory size of a Xen
         domain.  This array uses 1 page per gigabyte, so there's no
         need to be too stingy here.
         This only affects the sizing of some bss arrays, the unused
         portions of which are freed.

config XEN_SAVE_RESTORE
       bool
+8 −8
Original line number Diff line number Diff line
@@ -136,9 +136,6 @@ static void xen_vcpu_setup(int cpu)
	info.mfn = arbitrary_virt_to_mfn(vcpup);
	info.offset = offset_in_page(vcpup);

	printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
	       cpu, vcpup, info.mfn, info.offset);

	/* Check to see if the hypervisor will put the vcpu_info
	   structure where we want it, which allows direct access via
	   a percpu-variable. */
@@ -152,9 +149,6 @@ static void xen_vcpu_setup(int cpu)
		/* This cpu is using the registered vcpu info, even if
		   later ones fail to. */
		per_cpu(xen_vcpu, cpu) = vcpup;

		printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n",
		       cpu, vcpup);
	}
}

@@ -836,6 +830,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
		   Xen console noise. */
		break;

	case MSR_IA32_CR_PAT:
		if (smp_processor_id() == 0)
			xen_set_pat(((u64)high << 32) | low);
		break;

	default:
		ret = native_write_msr_safe(msr, low, high);
	}
@@ -874,8 +873,6 @@ void xen_setup_vcpu_info_placement(void)
	/* xen_vcpu_setup managed to place the vcpu_info within the
	   percpu area for all cpus, so make use of it */
	if (have_vcpu_info_placement) {
		printk(KERN_INFO "Xen: using vcpu_info placement\n");

		pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
		pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
		pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
@@ -1189,6 +1186,9 @@ asmlinkage void __init xen_start_kernel(void)
	xen_raw_console_write("mapping kernel into physical memory\n");
	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);

	/* Allocate and initialize top and mid mfn levels for p2m structure */
	xen_build_mfn_list_list();

	init_mm.pgd = pgd;

	/* keep using Xen gdt for now; no urgent need to change it */
+425 −76
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@
#include <asm/linkage.h>
#include <asm/page.h>
#include <asm/init.h>
#include <asm/pat.h>

#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -140,7 +141,8 @@ static inline void check_zero(void)
 * large enough to allocate page table pages to allocate the rest.
 * Each page can map 2MB.
 */
static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
#define LEVEL1_IDENT_ENTRIES	(PTRS_PER_PTE * 4)
static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);

#ifdef CONFIG_X86_64
/* l3 pud for userspace vsyscall mapping */
@@ -171,49 +173,182 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
 */
#define USER_LIMIT	((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)

/*
 * Xen leaves the responsibility for maintaining p2m mappings to the
 * guests themselves, but it must also access and update the p2m array
 * during suspend/resume when all the pages are reallocated.
 *
 * The p2m table is logically a flat array, but we implement it as a
 * three-level tree to allow the address space to be sparse.
 *
 *                               Xen
 *                                |
 *     p2m_top              p2m_top_mfn
 *       /  \                   /   \
 * p2m_mid p2m_mid	p2m_mid_mfn p2m_mid_mfn
 *    / \      / \         /           /
 *  p2m p2m p2m p2m p2m p2m p2m ...
 *
 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
 *
 * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
 * maximum representable pseudo-physical address space is:
 *  P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
 *
 * P2M_PER_PAGE depends on the architecture, as a mfn is always
 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
 * 512 and 1024 entries respectively. 
 */

unsigned long xen_max_p2m_pfn __read_mostly;

#define P2M_ENTRIES_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long))
#define TOP_ENTRIES		(MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
#define P2M_PER_PAGE		(PAGE_SIZE / sizeof(unsigned long))
#define P2M_MID_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long *))
#define P2M_TOP_PER_PAGE	(PAGE_SIZE / sizeof(unsigned long **))

/* Placeholder for holes in the address space */
static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
		{ [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
#define MAX_P2M_PFN		(P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)

 /* Array of pointers to pages containing p2m entries */
static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
		{ [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
/* Placeholders for holes in the address space */
static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);

/* Arrays of p2m arrays expressed in mfns used for save/restore */
static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);

static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
	__page_aligned_bss;
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));

static inline unsigned p2m_top_index(unsigned long pfn)
{
	BUG_ON(pfn >= MAX_DOMAIN_PAGES);
	return pfn / P2M_ENTRIES_PER_PAGE;
	BUG_ON(pfn >= MAX_P2M_PFN);
	return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
}

static inline unsigned p2m_mid_index(unsigned long pfn)
{
	return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
}

static inline unsigned p2m_index(unsigned long pfn)
{
	return pfn % P2M_ENTRIES_PER_PAGE;
	return pfn % P2M_PER_PAGE;
}

static void p2m_top_init(unsigned long ***top)
{
	unsigned i;

	for (i = 0; i < P2M_TOP_PER_PAGE; i++)
		top[i] = p2m_mid_missing;
}

static void p2m_top_mfn_init(unsigned long *top)
{
	unsigned i;

	for (i = 0; i < P2M_TOP_PER_PAGE; i++)
		top[i] = virt_to_mfn(p2m_mid_missing_mfn);
}

static void p2m_top_mfn_p_init(unsigned long **top)
{
	unsigned i;

	for (i = 0; i < P2M_TOP_PER_PAGE; i++)
		top[i] = p2m_mid_missing_mfn;
}

/* Build the parallel p2m_top_mfn structures */
static void p2m_mid_init(unsigned long **mid)
{
	unsigned i;

	for (i = 0; i < P2M_MID_PER_PAGE; i++)
		mid[i] = p2m_missing;
}

static void p2m_mid_mfn_init(unsigned long *mid)
{
	unsigned i;

	for (i = 0; i < P2M_MID_PER_PAGE; i++)
		mid[i] = virt_to_mfn(p2m_missing);
}

static void p2m_init(unsigned long *p2m)
{
	unsigned i;

	for (i = 0; i < P2M_MID_PER_PAGE; i++)
		p2m[i] = INVALID_P2M_ENTRY;
}

/*
 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
 *
 * This is called both at boot time, and after resuming from suspend:
 * - At boot time we're called very early, and must use extend_brk()
 *   to allocate memory.
 *
 * - After resume we're called from within stop_machine, but the mfn
 *   tree should alreay be completely allocated.
 */
void xen_build_mfn_list_list(void)
{
	unsigned pfn, idx;
	unsigned long pfn;

	/* Pre-initialize p2m_top_mfn to be completely missing */
	if (p2m_top_mfn == NULL) {
		p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_mid_mfn_init(p2m_mid_missing_mfn);

		p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_top_mfn_p_init(p2m_top_mfn_p);

	for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
		p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_top_mfn_init(p2m_top_mfn);
	} else {
		/* Reinitialise, mfn's all change after migration */
		p2m_mid_mfn_init(p2m_mid_missing_mfn);
	}

	for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
		unsigned topidx = p2m_top_index(pfn);
		unsigned mididx = p2m_mid_index(pfn);
		unsigned long **mid;
		unsigned long *mid_mfn_p;

		p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
		mid = p2m_top[topidx];
		mid_mfn_p = p2m_top_mfn_p[topidx];

		/* Don't bother allocating any mfn mid levels if
		 * they're just missing, just update the stored mfn,
		 * since all could have changed over a migrate.
		 */
		if (mid == p2m_mid_missing) {
			BUG_ON(mididx);
			BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
			p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
			pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
			continue;
		}

		if (mid_mfn_p == p2m_mid_missing_mfn) {
			/*
			 * XXX boot-time only!  We should never find
			 * missing parts of the mfn tree after
			 * runtime.  extend_brk() will BUG if we call
			 * it too late.
			 */
			mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
			p2m_mid_mfn_init(mid_mfn_p);

			p2m_top_mfn_p[topidx] = mid_mfn_p;
		}

	for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
		unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
		p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
		p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
		mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
	}
}

@@ -222,8 +357,8 @@ void xen_setup_mfn_list_list(void)
	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);

	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
		virt_to_mfn(p2m_top_mfn_list);
	HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
		virt_to_mfn(p2m_top_mfn);
	HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
}

/* Set up p2m_top to point to the domain-builder provided p2m pages */
@@ -231,98 +366,176 @@ void __init xen_build_dynamic_phys_to_machine(void)
{
	unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
	unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
	unsigned pfn;
	unsigned long pfn;

	xen_max_p2m_pfn = max_pfn;

	p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
	p2m_init(p2m_missing);

	p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
	p2m_mid_init(p2m_mid_missing);

	for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
	p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
	p2m_top_init(p2m_top);

	/*
	 * The domain builder gives us a pre-constructed p2m array in
	 * mfn_list for all the pages initially given to us, so we just
	 * need to graft that into our tree structure.
	 */
	for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
		unsigned topidx = p2m_top_index(pfn);
		unsigned mididx = p2m_mid_index(pfn);

		p2m_top[topidx] = &mfn_list[pfn];
		if (p2m_top[topidx] == p2m_mid_missing) {
			unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
			p2m_mid_init(mid);

			p2m_top[topidx] = mid;
		}

	xen_build_mfn_list_list();
		p2m_top[topidx][mididx] = &mfn_list[pfn];
	}
}

unsigned long get_phys_to_machine(unsigned long pfn)
{
	unsigned topidx, idx;
	unsigned topidx, mididx, idx;

	if (unlikely(pfn >= MAX_DOMAIN_PAGES))
	if (unlikely(pfn >= MAX_P2M_PFN))
		return INVALID_P2M_ENTRY;

	topidx = p2m_top_index(pfn);
	mididx = p2m_mid_index(pfn);
	idx = p2m_index(pfn);
	return p2m_top[topidx][idx];

	return p2m_top[topidx][mididx][idx];
}
EXPORT_SYMBOL_GPL(get_phys_to_machine);

/* install a  new p2m_top page */
bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
static void *alloc_p2m_page(void)
{
	unsigned topidx = p2m_top_index(pfn);
	unsigned long **pfnp, *mfnp;
	unsigned i;
	return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
}

	pfnp = &p2m_top[topidx];
	mfnp = &p2m_top_mfn[topidx];
static void free_p2m_page(void *p)
{
	free_page((unsigned long)p);
}

	for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
		p[i] = INVALID_P2M_ENTRY;
/* 
 * Fully allocate the p2m structure for a given pfn.  We need to check
 * that both the top and mid levels are allocated, and make sure the
 * parallel mfn tree is kept in sync.  We may race with other cpus, so
 * the new pages are installed with cmpxchg; if we lose the race then
 * simply free the page we allocated and use the one that's there.
 */
static bool alloc_p2m(unsigned long pfn)
{
	unsigned topidx, mididx;
	unsigned long ***top_p, **mid;
	unsigned long *top_mfn_p, *mid_mfn;

	if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
		*mfnp = virt_to_mfn(p);
		return true;
	topidx = p2m_top_index(pfn);
	mididx = p2m_mid_index(pfn);

	top_p = &p2m_top[topidx];
	mid = *top_p;

	if (mid == p2m_mid_missing) {
		/* Mid level is missing, allocate a new one */
		mid = alloc_p2m_page();
		if (!mid)
			return false;

		p2m_mid_init(mid);

		if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
			free_p2m_page(mid);
	}

	top_mfn_p = &p2m_top_mfn[topidx];
	mid_mfn = p2m_top_mfn_p[topidx];

	BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);

	if (mid_mfn == p2m_mid_missing_mfn) {
		/* Separately check the mid mfn level */
		unsigned long missing_mfn;
		unsigned long mid_mfn_mfn;

		mid_mfn = alloc_p2m_page();
		if (!mid_mfn)
			return false;

		p2m_mid_mfn_init(mid_mfn);

		missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
		mid_mfn_mfn = virt_to_mfn(mid_mfn);
		if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
			free_p2m_page(mid_mfn);
		else
			p2m_top_mfn_p[topidx] = mid_mfn;
	}

static void alloc_p2m(unsigned long pfn)
{
	unsigned long *p;
	if (p2m_top[topidx][mididx] == p2m_missing) {
		/* p2m leaf page is missing */
		unsigned long *p2m;

	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
	BUG_ON(p == NULL);
		p2m = alloc_p2m_page();
		if (!p2m)
			return false;

	if (!install_p2mtop_page(pfn, p))
		free_page((unsigned long)p);
		p2m_init(p2m);

		if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
			free_p2m_page(p2m);
		else
			mid_mfn[mididx] = virt_to_mfn(p2m);
	}

	return true;
}

/* Try to install p2m mapping; fail if intermediate bits missing */
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
	unsigned topidx, idx;
	unsigned topidx, mididx, idx;

	if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
	if (unlikely(pfn >= MAX_P2M_PFN)) {
		BUG_ON(mfn != INVALID_P2M_ENTRY);
		return true;
	}

	topidx = p2m_top_index(pfn);
	if (p2m_top[topidx] == p2m_missing) {
		if (mfn == INVALID_P2M_ENTRY)
			return true;
		return false;
	}

	mididx = p2m_mid_index(pfn);
	idx = p2m_index(pfn);
	p2m_top[topidx][idx] = mfn;

	if (p2m_top[topidx][mididx] == p2m_missing)
		return mfn == INVALID_P2M_ENTRY;

	p2m_top[topidx][mididx][idx] = mfn;

	return true;
}

void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
		return;
		return true;
	}

	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
		alloc_p2m(pfn);
		if (!alloc_p2m(pfn))
			return false;

		if (!__set_phys_to_machine(pfn, mfn))
			BUG();
			return false;
	}

	return true;
}

unsigned long arbitrary_virt_to_mfn(void *vaddr)
@@ -399,7 +612,7 @@ static bool xen_iomap_pte(pte_t pte)
	return pte_flags(pte) & _PAGE_IOMAP;
}

static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
{
	struct multicall_space mcs;
	struct mmu_update *u;
@@ -411,10 +624,16 @@ static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
	u->ptr = arbitrary_virt_to_machine(ptep).maddr;
	u->val = pte_val_ma(pteval);

	MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO);
	MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);

	xen_mc_issue(PARAVIRT_LAZY_MMU);
}
EXPORT_SYMBOL_GPL(xen_set_domain_pte);

static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
{
	xen_set_domain_pte(ptep, pteval, DOMID_IO);
}

static void xen_extend_mmu_update(const struct mmu_update *update)
{
@@ -561,7 +780,20 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
	if (val & _PAGE_PRESENT) {
		unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
		pteval_t flags = val & PTE_FLAGS_MASK;
		val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
		unsigned long mfn = pfn_to_mfn(pfn);

		/*
		 * If there's no mfn for the pfn, then just create an
		 * empty non-present pte.  Unfortunately this loses
		 * information about the original pfn, so
		 * pte_mfn_to_pfn is asymmetric.
		 */
		if (unlikely(mfn == INVALID_P2M_ENTRY)) {
			mfn = 0;
			flags = 0;
		}

		val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
	}

	return val;
@@ -583,10 +815,18 @@ static pteval_t iomap_pte(pteval_t val)

pteval_t xen_pte_val(pte_t pte)
{
	if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP))
		return pte.pte;
	pteval_t pteval = pte.pte;

	/* If this is a WC pte, convert back from Xen WC to Linux WC */
	if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
		WARN_ON(!pat_enabled);
		pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
	}

	return pte_mfn_to_pfn(pte.pte);
	if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
		return pteval;

	return pte_mfn_to_pfn(pteval);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);

@@ -596,10 +836,48 @@ pgdval_t xen_pgd_val(pgd_t pgd)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);

/*
 * Xen's PAT setup is part of its ABI, though I assume entries 6 & 7
 * are reserved for now, to correspond to the Intel-reserved PAT
 * types.
 *
 * We expect Linux's PAT set as follows:
 *
 * Idx  PTE flags        Linux    Xen    Default
 * 0                     WB       WB     WB
 * 1            PWT      WC       WT     WT
 * 2        PCD          UC-      UC-    UC-
 * 3        PCD PWT      UC       UC     UC
 * 4    PAT              WB       WC     WB
 * 5    PAT     PWT      WC       WP     WT
 * 6    PAT PCD          UC-      UC     UC-
 * 7    PAT PCD PWT      UC       UC     UC
 */

void xen_set_pat(u64 pat)
{
	/* We expect Linux to use a PAT setting of
	 * UC UC- WC WB (ignoring the PAT flag) */
	WARN_ON(pat != 0x0007010600070106ull);
}

pte_t xen_make_pte(pteval_t pte)
{
	phys_addr_t addr = (pte & PTE_PFN_MASK);

	/* If Linux is trying to set a WC pte, then map to the Xen WC.
	 * If _PAGE_PAT is set, then it probably means it is really
	 * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
	 * things work out OK...
	 *
	 * (We should never see kernel mappings with _PAGE_PSE set,
	 * but we could see hugetlbfs mappings, I think.).
	 */
	if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) {
		if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
			pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
	}

	/*
	 * Unprivileged domains are allowed to do IOMAPpings for
	 * PCI passthrough, but not map ISA space.  The ISA
@@ -1712,6 +1990,9 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
	unsigned ident_pte;
	unsigned long pfn;

	level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
				      PAGE_SIZE);

	ident_pte = 0;
	pfn = 0;
	for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
@@ -1722,7 +2003,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
			pte_page = m2v(pmd[pmdidx].pmd);
		else {
			/* Check for free pte pages */
			if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
			if (ident_pte == LEVEL1_IDENT_ENTRIES)
				break;

			pte_page = &level1_ident_pgt[ident_pte];
@@ -1837,13 +2118,15 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
	return pgd;
}
#else	/* !CONFIG_X86_64 */
static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD);

__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
					 unsigned long max_pfn)
{
	pmd_t *kernel_pmd;

	level2_kernel_pgt = extend_brk(sizeof(pmd_t *) * PTRS_PER_PMD, PAGE_SIZE);

	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
				  xen_start_info->nr_pt_frames * PAGE_SIZE +
				  512*1024);
@@ -2269,6 +2552,72 @@ void __init xen_hvm_init_mmu_ops(void)
}
#endif

#define REMAP_BATCH_SIZE 16

struct remap_data {
	unsigned long mfn;
	pgprot_t prot;
	struct mmu_update *mmu_update;
};

static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
				 unsigned long addr, void *data)
{
	struct remap_data *rmd = data;
	pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));

	rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
	rmd->mmu_update->val = pte_val_ma(pte);
	rmd->mmu_update++;

	return 0;
}

int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
			       unsigned long addr,
			       unsigned long mfn, int nr,
			       pgprot_t prot, unsigned domid)
{
	struct remap_data rmd;
	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
	int batch;
	unsigned long range;
	int err = 0;

	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);

	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;

	rmd.mfn = mfn;
	rmd.prot = prot;

	while (nr) {
		batch = min(REMAP_BATCH_SIZE, nr);
		range = (unsigned long)batch << PAGE_SHIFT;

		rmd.mmu_update = mmu_update;
		err = apply_to_page_range(vma->vm_mm, addr, range,
					  remap_area_mfn_pte_fn, &rmd);
		if (err)
			goto out;

		err = -EFAULT;
		if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
			goto out;

		nr -= batch;
		addr += range;
	}

	err = 0;
out:

	flush_tlb_all();

	return err;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);

#ifdef CONFIG_XEN_DEBUG_FS

static struct dentry *d_mmu_debug;
Loading