Merge branches 'upstream/xenfs' and 'upstream/core' of... (520045db) · Commits · e / devices / android_kernel_samsung_universal8895

arch/x86/include/asm/xen/hypercall.h

+17 −0

Original line number	Diff line number	Diff line
		@@ -200,6 +200,23 @@ extern struct { char _entry[32]; } hypercall_page[];
		(type)__res; \
		})

		static inline long
		privcmd_call(unsigned call,
		unsigned long a1, unsigned long a2,
		unsigned long a3, unsigned long a4,
		unsigned long a5)
		{
		__HYPERCALL_DECLS;
		__HYPERCALL_5ARG(a1, a2, a3, a4, a5);

		asm volatile("call *%[call]"
		: __HYPERCALL_5PARAM
		: [call] "a" (&hypercall_page[call])
		: __HYPERCALL_CLOBBER5);

		return (long)__res;
		}

		static inline int
		HYPERVISOR_set_trap_table(struct trap_info *table)
		{

arch/x86/include/asm/xen/page.h

+10 −2

Original line number	Diff line number	Diff line
		@@ -37,14 +37,21 @@ typedef struct xpaddr {


		extern unsigned long get_phys_to_machine(unsigned long pfn);
		extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
		extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);

		static inline unsigned long pfn_to_mfn(unsigned long pfn)
		{
		unsigned long mfn;

		if (xen_feature(XENFEAT_auto_translated_physmap))
		return pfn;

		return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
		mfn = get_phys_to_machine(pfn);

		if (mfn != INVALID_P2M_ENTRY)
		mfn &= ~FOREIGN_FRAME_BIT;

		return mfn;
		}

		static inline int phys_to_machine_mapping_valid(unsigned long pfn)
		@@ -159,6 +166,7 @@ static inline pte_t __pte_ma(pteval_t x)

		#define pgd_val_ma(x) ((x).pgd)

		void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);

		xmaddr_t arbitrary_virt_to_machine(void *address);
		unsigned long arbitrary_virt_to_mfn(void *vaddr);

arch/x86/xen/Kconfig

+4 −7

Original line number	Diff line number	Diff line
		@@ -19,15 +19,12 @@ config XEN_PVHVM
		depends on X86_LOCAL_APIC

		config XEN_MAX_DOMAIN_MEMORY
		int "Maximum allowed size of a domain in gigabytes"
		default 8 if X86_32
		default 32 if X86_64
		int
		default 128
		depends on XEN
		help
		The pseudo-physical to machine address array is sized
		according to the maximum possible memory size of a Xen
		domain. This array uses 1 page per gigabyte, so there's no
		need to be too stingy here.
		This only affects the sizing of some bss arrays, the unused
		portions of which are freed.

		config XEN_SAVE_RESTORE
		bool

arch/x86/xen/enlighten.c

+8 −8

Original line number	Diff line number	Diff line
		@@ -136,9 +136,6 @@ static void xen_vcpu_setup(int cpu)
		info.mfn = arbitrary_virt_to_mfn(vcpup);
		info.offset = offset_in_page(vcpup);

		printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
		cpu, vcpup, info.mfn, info.offset);

		/* Check to see if the hypervisor will put the vcpu_info
		structure where we want it, which allows direct access via
		a percpu-variable. */
		@@ -152,9 +149,6 @@ static void xen_vcpu_setup(int cpu)
		/* This cpu is using the registered vcpu info, even if
		later ones fail to. */
		per_cpu(xen_vcpu, cpu) = vcpup;

		printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n",
		cpu, vcpup);
		}
		}

		@@ -836,6 +830,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
		Xen console noise. */
		break;

		case MSR_IA32_CR_PAT:
		if (smp_processor_id() == 0)
		xen_set_pat(((u64)high << 32) \| low);
		break;

		default:
		ret = native_write_msr_safe(msr, low, high);
		}
		@@ -874,8 +873,6 @@ void xen_setup_vcpu_info_placement(void)
		/* xen_vcpu_setup managed to place the vcpu_info within the
		percpu area for all cpus, so make use of it */
		if (have_vcpu_info_placement) {
		printk(KERN_INFO "Xen: using vcpu_info placement\n");

		pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
		pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
		pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
		@@ -1189,6 +1186,9 @@ asmlinkage void __init xen_start_kernel(void)
		xen_raw_console_write("mapping kernel into physical memory\n");
		pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);

		/* Allocate and initialize top and mid mfn levels for p2m structure */
		xen_build_mfn_list_list();

		init_mm.pgd = pgd;

		/* keep using Xen gdt for now; no urgent need to change it */

arch/x86/xen/mmu.c

+425 −76

Original line number	Diff line number	Diff line
		@@ -57,6 +57,7 @@
		#include <asm/linkage.h>
		#include <asm/page.h>
		#include <asm/init.h>
		#include <asm/pat.h>

		#include <asm/xen/hypercall.h>
		#include <asm/xen/hypervisor.h>
		@@ -140,7 +141,8 @@ static inline void check_zero(void)
		* large enough to allocate page table pages to allocate the rest.
		* Each page can map 2MB.
		*/
		static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
		#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
		static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);

		#ifdef CONFIG_X86_64
		/* l3 pud for userspace vsyscall mapping */
		@@ -171,49 +173,182 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
		*/
		#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)

		/*
		* Xen leaves the responsibility for maintaining p2m mappings to the
		* guests themselves, but it must also access and update the p2m array
		* during suspend/resume when all the pages are reallocated.
		*
		* The p2m table is logically a flat array, but we implement it as a
		* three-level tree to allow the address space to be sparse.
		*
		* Xen
		* \|
		* p2m_top p2m_top_mfn
		* / \ / \
		* p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn
		* / \ / \ / /
		* p2m p2m p2m p2m p2m p2m p2m ...
		*
		* The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
		*
		* The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
		* maximum representable pseudo-physical address space is:
		* P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
		*
		* P2M_PER_PAGE depends on the architecture, as a mfn is always
		* unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
		* 512 and 1024 entries respectively.
		*/

		unsigned long xen_max_p2m_pfn __read_mostly;

		#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
		#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
		#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
		#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
		#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))

		/* Placeholder for holes in the address space */
		static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
		{ [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
		#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)

		/* Array of pointers to pages containing p2m entries */
		static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
		{ [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
		/* Placeholders for holes in the address space */
		static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
		static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
		static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);

		/* Arrays of p2m arrays expressed in mfns used for save/restore */
		static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
		static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
		static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
		static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);

		static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
		__page_aligned_bss;
		RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
		RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));

		static inline unsigned p2m_top_index(unsigned long pfn)
		{
		BUG_ON(pfn >= MAX_DOMAIN_PAGES);
		return pfn / P2M_ENTRIES_PER_PAGE;
		BUG_ON(pfn >= MAX_P2M_PFN);
		return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
		}

		static inline unsigned p2m_mid_index(unsigned long pfn)
		{
		return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
		}

		static inline unsigned p2m_index(unsigned long pfn)
		{
		return pfn % P2M_ENTRIES_PER_PAGE;
		return pfn % P2M_PER_PAGE;
		}

		static void p2m_top_init(unsigned long ***top)
		{
		unsigned i;

		for (i = 0; i < P2M_TOP_PER_PAGE; i++)
		top[i] = p2m_mid_missing;
		}

		static void p2m_top_mfn_init(unsigned long *top)
		{
		unsigned i;

		for (i = 0; i < P2M_TOP_PER_PAGE; i++)
		top[i] = virt_to_mfn(p2m_mid_missing_mfn);
		}

		static void p2m_top_mfn_p_init(unsigned long **top)
		{
		unsigned i;

		for (i = 0; i < P2M_TOP_PER_PAGE; i++)
		top[i] = p2m_mid_missing_mfn;
		}

		/* Build the parallel p2m_top_mfn structures */
		static void p2m_mid_init(unsigned long **mid)
		{
		unsigned i;

		for (i = 0; i < P2M_MID_PER_PAGE; i++)
		mid[i] = p2m_missing;
		}

		static void p2m_mid_mfn_init(unsigned long *mid)
		{
		unsigned i;

		for (i = 0; i < P2M_MID_PER_PAGE; i++)
		mid[i] = virt_to_mfn(p2m_missing);
		}

		static void p2m_init(unsigned long *p2m)
		{
		unsigned i;

		for (i = 0; i < P2M_MID_PER_PAGE; i++)
		p2m[i] = INVALID_P2M_ENTRY;
		}

		/*
		* Build the parallel p2m_top_mfn and p2m_mid_mfn structures
		*
		* This is called both at boot time, and after resuming from suspend:
		* - At boot time we're called very early, and must use extend_brk()
		* to allocate memory.
		*
		* - After resume we're called from within stop_machine, but the mfn
		* tree should alreay be completely allocated.
		*/
		void xen_build_mfn_list_list(void)
		{
		unsigned pfn, idx;
		unsigned long pfn;

		/* Pre-initialize p2m_top_mfn to be completely missing */
		if (p2m_top_mfn == NULL) {
		p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_mid_mfn_init(p2m_mid_missing_mfn);

		p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_top_mfn_p_init(p2m_top_mfn_p);

		for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
		p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_top_mfn_init(p2m_top_mfn);
		} else {
		/* Reinitialise, mfn's all change after migration */
		p2m_mid_mfn_init(p2m_mid_missing_mfn);
		}

		for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
		unsigned topidx = p2m_top_index(pfn);
		unsigned mididx = p2m_mid_index(pfn);
		unsigned long **mid;
		unsigned long *mid_mfn_p;

		p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
		mid = p2m_top[topidx];
		mid_mfn_p = p2m_top_mfn_p[topidx];

		/* Don't bother allocating any mfn mid levels if
		* they're just missing, just update the stored mfn,
		* since all could have changed over a migrate.
		*/
		if (mid == p2m_mid_missing) {
		BUG_ON(mididx);
		BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
		p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
		pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
		continue;
		}

		if (mid_mfn_p == p2m_mid_missing_mfn) {
		/*
		* XXX boot-time only! We should never find
		* missing parts of the mfn tree after
		* runtime. extend_brk() will BUG if we call
		* it too late.
		*/
		mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_mid_mfn_init(mid_mfn_p);

		p2m_top_mfn_p[topidx] = mid_mfn_p;
		}

		for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
		unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
		p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
		p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
		mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
		}
		}

		@@ -222,8 +357,8 @@ void xen_setup_mfn_list_list(void)
		BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);

		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
		virt_to_mfn(p2m_top_mfn_list);
		HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
		virt_to_mfn(p2m_top_mfn);
		HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
		}

		/* Set up p2m_top to point to the domain-builder provided p2m pages */
		@@ -231,98 +366,176 @@ void __init xen_build_dynamic_phys_to_machine(void)
		{
		unsigned long mfn_list = (unsigned long )xen_start_info->mfn_list;
		unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
		unsigned pfn;
		unsigned long pfn;

		xen_max_p2m_pfn = max_pfn;

		p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_init(p2m_missing);

		p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_mid_init(p2m_mid_missing);

		for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
		p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_top_init(p2m_top);

		/*
		* The domain builder gives us a pre-constructed p2m array in
		* mfn_list for all the pages initially given to us, so we just
		* need to graft that into our tree structure.
		*/
		for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
		unsigned topidx = p2m_top_index(pfn);
		unsigned mididx = p2m_mid_index(pfn);

		p2m_top[topidx] = &mfn_list[pfn];
		if (p2m_top[topidx] == p2m_mid_missing) {
		unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
		p2m_mid_init(mid);

		p2m_top[topidx] = mid;
		}

		xen_build_mfn_list_list();
		p2m_top[topidx][mididx] = &mfn_list[pfn];
		}
		}

		unsigned long get_phys_to_machine(unsigned long pfn)
		{
		unsigned topidx, idx;
		unsigned topidx, mididx, idx;

		if (unlikely(pfn >= MAX_DOMAIN_PAGES))
		if (unlikely(pfn >= MAX_P2M_PFN))
		return INVALID_P2M_ENTRY;

		topidx = p2m_top_index(pfn);
		mididx = p2m_mid_index(pfn);
		idx = p2m_index(pfn);
		return p2m_top[topidx][idx];

		return p2m_top[topidx][mididx][idx];
		}
		EXPORT_SYMBOL_GPL(get_phys_to_machine);

		/* install a new p2m_top page */
		bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
		static void *alloc_p2m_page(void)
		{
		unsigned topidx = p2m_top_index(pfn);
		unsigned long *pfnp, mfnp;
		unsigned i;
		return (void *)__get_free_page(GFP_KERNEL \| __GFP_REPEAT);
		}

		pfnp = &p2m_top[topidx];
		mfnp = &p2m_top_mfn[topidx];
		static void free_p2m_page(void *p)
		{
		free_page((unsigned long)p);
		}

		for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
		p[i] = INVALID_P2M_ENTRY;
		/*
		* Fully allocate the p2m structure for a given pfn. We need to check
		* that both the top and mid levels are allocated, and make sure the
		* parallel mfn tree is kept in sync. We may race with other cpus, so
		* the new pages are installed with cmpxchg; if we lose the race then
		* simply free the page we allocated and use the one that's there.
		*/
		static bool alloc_p2m(unsigned long pfn)
		{
		unsigned topidx, mididx;
		unsigned long *top_p, mid;
		unsigned long top_mfn_p, mid_mfn;

		if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
		*mfnp = virt_to_mfn(p);
		return true;
		topidx = p2m_top_index(pfn);
		mididx = p2m_mid_index(pfn);

		top_p = &p2m_top[topidx];
		mid = *top_p;

		if (mid == p2m_mid_missing) {
		/* Mid level is missing, allocate a new one */
		mid = alloc_p2m_page();
		if (!mid)
		return false;

		p2m_mid_init(mid);

		if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
		free_p2m_page(mid);
		}

		top_mfn_p = &p2m_top_mfn[topidx];
		mid_mfn = p2m_top_mfn_p[topidx];

		BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);

		if (mid_mfn == p2m_mid_missing_mfn) {
		/* Separately check the mid mfn level */
		unsigned long missing_mfn;
		unsigned long mid_mfn_mfn;

		mid_mfn = alloc_p2m_page();
		if (!mid_mfn)
		return false;

		p2m_mid_mfn_init(mid_mfn);

		missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
		mid_mfn_mfn = virt_to_mfn(mid_mfn);
		if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
		free_p2m_page(mid_mfn);
		else
		p2m_top_mfn_p[topidx] = mid_mfn;
		}

		static void alloc_p2m(unsigned long pfn)
		{
		unsigned long *p;
		if (p2m_top[topidx][mididx] == p2m_missing) {
		/* p2m leaf page is missing */
		unsigned long *p2m;

		p = (void *)__get_free_page(GFP_KERNEL \| __GFP_NOFAIL);
		BUG_ON(p == NULL);
		p2m = alloc_p2m_page();
		if (!p2m)
		return false;

		if (!install_p2mtop_page(pfn, p))
		free_page((unsigned long)p);
		p2m_init(p2m);

		if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
		free_p2m_page(p2m);
		else
		mid_mfn[mididx] = virt_to_mfn(p2m);
		}

		return true;
		}

		/* Try to install p2m mapping; fail if intermediate bits missing */
		bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
		{
		unsigned topidx, idx;
		unsigned topidx, mididx, idx;

		if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
		if (unlikely(pfn >= MAX_P2M_PFN)) {
		BUG_ON(mfn != INVALID_P2M_ENTRY);
		return true;
		}

		topidx = p2m_top_index(pfn);
		if (p2m_top[topidx] == p2m_missing) {
		if (mfn == INVALID_P2M_ENTRY)
		return true;
		return false;
		}

		mididx = p2m_mid_index(pfn);
		idx = p2m_index(pfn);
		p2m_top[topidx][idx] = mfn;

		if (p2m_top[topidx][mididx] == p2m_missing)
		return mfn == INVALID_P2M_ENTRY;

		p2m_top[topidx][mididx][idx] = mfn;

		return true;
		}

		void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
		bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
		{
		if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
		return;
		return true;
		}

		if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
		alloc_p2m(pfn);
		if (!alloc_p2m(pfn))
		return false;

		if (!__set_phys_to_machine(pfn, mfn))
		BUG();
		return false;
		}

		return true;
		}

		unsigned long arbitrary_virt_to_mfn(void *vaddr)
		@@ -399,7 +612,7 @@ static bool xen_iomap_pte(pte_t pte)
		return pte_flags(pte) & _PAGE_IOMAP;
		}

		static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
		void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
		{
		struct multicall_space mcs;
		struct mmu_update *u;
		@@ -411,10 +624,16 @@ static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
		u->ptr = arbitrary_virt_to_machine(ptep).maddr;
		u->val = pte_val_ma(pteval);

		MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO);
		MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);

		xen_mc_issue(PARAVIRT_LAZY_MMU);
		}
		EXPORT_SYMBOL_GPL(xen_set_domain_pte);

		static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
		{
		xen_set_domain_pte(ptep, pteval, DOMID_IO);
		}

		static void xen_extend_mmu_update(const struct mmu_update *update)
		{
		@@ -561,7 +780,20 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
		if (val & _PAGE_PRESENT) {
		unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
		pteval_t flags = val & PTE_FLAGS_MASK;
		val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) \| flags;
		unsigned long mfn = pfn_to_mfn(pfn);

		/*
		* If there's no mfn for the pfn, then just create an
		* empty non-present pte. Unfortunately this loses
		* information about the original pfn, so
		* pte_mfn_to_pfn is asymmetric.
		*/
		if (unlikely(mfn == INVALID_P2M_ENTRY)) {
		mfn = 0;
		flags = 0;
		}

		val = ((pteval_t)mfn << PAGE_SHIFT) \| flags;
		}

		return val;
		@@ -583,10 +815,18 @@ static pteval_t iomap_pte(pteval_t val)

		pteval_t xen_pte_val(pte_t pte)
		{
		if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP))
		return pte.pte;
		pteval_t pteval = pte.pte;

		/* If this is a WC pte, convert back from Xen WC to Linux WC */
		if ((pteval & (_PAGE_PAT \| _PAGE_PCD \| _PAGE_PWT)) == _PAGE_PAT) {
		WARN_ON(!pat_enabled);
		pteval = (pteval & ~_PAGE_PAT) \| _PAGE_PWT;
		}

		return pte_mfn_to_pfn(pte.pte);
		if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
		return pteval;

		return pte_mfn_to_pfn(pteval);
		}
		PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);

		@@ -596,10 +836,48 @@ pgdval_t xen_pgd_val(pgd_t pgd)
		}
		PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);

		/*
		* Xen's PAT setup is part of its ABI, though I assume entries 6 & 7
		* are reserved for now, to correspond to the Intel-reserved PAT
		* types.
		*
		* We expect Linux's PAT set as follows:
		*
		* Idx PTE flags Linux Xen Default
		* 0 WB WB WB
		* 1 PWT WC WT WT
		* 2 PCD UC- UC- UC-
		* 3 PCD PWT UC UC UC
		* 4 PAT WB WC WB
		* 5 PAT PWT WC WP WT
		* 6 PAT PCD UC- UC UC-
		* 7 PAT PCD PWT UC UC UC
		*/

		void xen_set_pat(u64 pat)
		{
		/* We expect Linux to use a PAT setting of
		* UC UC- WC WB (ignoring the PAT flag) */
		WARN_ON(pat != 0x0007010600070106ull);
		}

		pte_t xen_make_pte(pteval_t pte)
		{
		phys_addr_t addr = (pte & PTE_PFN_MASK);

		/* If Linux is trying to set a WC pte, then map to the Xen WC.
		* If _PAGE_PAT is set, then it probably means it is really
		* _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
		* things work out OK...
		*
		* (We should never see kernel mappings with _PAGE_PSE set,
		* but we could see hugetlbfs mappings, I think.).
		*/
		if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) {
		if ((pte & (_PAGE_PCD \| _PAGE_PWT)) == _PAGE_PWT)
		pte = (pte & ~(_PAGE_PCD \| _PAGE_PWT)) \| _PAGE_PAT;
		}

		/*
		* Unprivileged domains are allowed to do IOMAPpings for
		* PCI passthrough, but not map ISA space. The ISA
		@@ -1712,6 +1990,9 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
		unsigned ident_pte;
		unsigned long pfn;

		level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
		PAGE_SIZE);

		ident_pte = 0;
		pfn = 0;
		for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
		@@ -1722,7 +2003,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
		pte_page = m2v(pmd[pmdidx].pmd);
		else {
		/* Check for free pte pages */
		if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
		if (ident_pte == LEVEL1_IDENT_ENTRIES)
		break;

		pte_page = &level1_ident_pgt[ident_pte];
		@@ -1837,13 +2118,15 @@ __init pgd_t xen_setup_kernel_pagetable(pgd_t pgd,
		return pgd;
		}
		#else /* !CONFIG_X86_64 */
		static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
		static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD);

		__init pgd_t xen_setup_kernel_pagetable(pgd_t pgd,
		unsigned long max_pfn)
		{
		pmd_t *kernel_pmd;

		level2_kernel_pgt = extend_brk(sizeof(pmd_t ) PTRS_PER_PMD, PAGE_SIZE);

		max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
		xen_start_info->nr_pt_frames * PAGE_SIZE +
		512*1024);
		@@ -2269,6 +2552,72 @@ void __init xen_hvm_init_mmu_ops(void)
		}
		#endif

		#define REMAP_BATCH_SIZE 16

		struct remap_data {
		unsigned long mfn;
		pgprot_t prot;
		struct mmu_update *mmu_update;
		};

		static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
		unsigned long addr, void *data)
		{
		struct remap_data *rmd = data;
		pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));

		rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
		rmd->mmu_update->val = pte_val_ma(pte);
		rmd->mmu_update++;

		return 0;
		}

		int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
		unsigned long addr,
		unsigned long mfn, int nr,
		pgprot_t prot, unsigned domid)
		{
		struct remap_data rmd;
		struct mmu_update mmu_update[REMAP_BATCH_SIZE];
		int batch;
		unsigned long range;
		int err = 0;

		prot = __pgprot(pgprot_val(prot) \| _PAGE_IOMAP);

		vma->vm_flags \|= VM_IO \| VM_RESERVED \| VM_PFNMAP;

		rmd.mfn = mfn;
		rmd.prot = prot;

		while (nr) {
		batch = min(REMAP_BATCH_SIZE, nr);
		range = (unsigned long)batch << PAGE_SHIFT;

		rmd.mmu_update = mmu_update;
		err = apply_to_page_range(vma->vm_mm, addr, range,
		remap_area_mfn_pte_fn, &rmd);
		if (err)
		goto out;

		err = -EFAULT;
		if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
		goto out;

		nr -= batch;
		addr += range;
		}

		err = 0;
		out:

		flush_tlb_all();

		return err;
		}
		EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);

		#ifdef CONFIG_XEN_DEBUG_FS

		static struct dentry *d_mmu_debug;