Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 26756087 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'stable/for-linus-3.6-rc3-tag' of...

Merge tag 'stable/for-linus-3.6-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen

Pull three xen bug-fixes from Konrad Rzeszutek Wilk:
 - Revert the kexec fix which caused on non-kexec shutdowns a race.
 - Reuse existing P2M leafs - instead of requiring to allocate a large
   area of bootup virtual address estate.
 - Fix a one-off error when adding PFNs for balloon pages.

* tag 'stable/for-linus-3.6-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen/setup: Fix one-off error when adding for-balloon PFNs to the P2M.
  xen/p2m: Reuse existing P2M leafs if they are filled with 1:1 PFNs or INVALID.
  Revert "xen PVonHVM: move shared_info to MMIO before kexec"
parents 4ae46147 c96aae1f
Loading
Loading
Loading
Loading
+11 −107
Original line number Original line Diff line number Diff line
@@ -31,7 +31,6 @@
#include <linux/pci.h>
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/memblock.h>
#include <linux/syscore_ops.h>


#include <xen/xen.h>
#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/interface/xen.h>
@@ -1470,130 +1469,38 @@ asmlinkage void __init xen_start_kernel(void)
#endif
#endif
}
}


#ifdef CONFIG_XEN_PVHVM
void __ref xen_hvm_init_shared_info(void)
/*
 * The pfn containing the shared_info is located somewhere in RAM. This
 * will cause trouble if the current kernel is doing a kexec boot into a
 * new kernel. The new kernel (and its startup code) can not know where
 * the pfn is, so it can not reserve the page. The hypervisor will
 * continue to update the pfn, and as a result memory corruption occours
 * in the new kernel.
 *
 * One way to work around this issue is to allocate a page in the
 * xen-platform pci device's BAR memory range. But pci init is done very
 * late and the shared_info page is already in use very early to read
 * the pvclock. So moving the pfn from RAM to MMIO is racy because some
 * code paths on other vcpus could access the pfn during the small
 * window when the old pfn is moved to the new pfn. There is even a
 * small window were the old pfn is not backed by a mfn, and during that
 * time all reads return -1.
 *
 * Because it is not known upfront where the MMIO region is located it
 * can not be used right from the start in xen_hvm_init_shared_info.
 *
 * To minimise trouble the move of the pfn is done shortly before kexec.
 * This does not eliminate the race because all vcpus are still online
 * when the syscore_ops will be called. But hopefully there is no work
 * pending at this point in time. Also the syscore_op is run last which
 * reduces the risk further.
 */

static struct shared_info *xen_hvm_shared_info;

static void xen_hvm_connect_shared_info(unsigned long pfn)
{
{
	int cpu;
	struct xen_add_to_physmap xatp;
	struct xen_add_to_physmap xatp;
	static struct shared_info *shared_info_page = 0;


	if (!shared_info_page)
		shared_info_page = (struct shared_info *)
			extend_brk(PAGE_SIZE, PAGE_SIZE);
	xatp.domid = DOMID_SELF;
	xatp.domid = DOMID_SELF;
	xatp.idx = 0;
	xatp.idx = 0;
	xatp.space = XENMAPSPACE_shared_info;
	xatp.space = XENMAPSPACE_shared_info;
	xatp.gpfn = pfn;
	xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
		BUG();
		BUG();


}
	HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
static void xen_hvm_set_shared_info(struct shared_info *sip)
{
	int cpu;

	HYPERVISOR_shared_info = sip;


	/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
	/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
	 * page, we use it in the event channel upcall and in some pvclock
	 * page, we use it in the event channel upcall and in some pvclock
	 * related functions. We don't need the vcpu_info placement
	 * related functions. We don't need the vcpu_info placement
	 * optimizations because we don't use any pv_mmu or pv_irq op on
	 * optimizations because we don't use any pv_mmu or pv_irq op on
	 * HVM.
	 * HVM.
	 * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is
	 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
	 * online but xen_hvm_set_shared_info is run at resume time too and
	 * online but xen_hvm_init_shared_info is run at resume time too and
	 * in that case multiple vcpus might be online. */
	 * in that case multiple vcpus might be online. */
	for_each_online_cpu(cpu) {
	for_each_online_cpu(cpu) {
		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
	}
	}
}
}


/* Reconnect the shared_info pfn to a mfn */
#ifdef CONFIG_XEN_PVHVM
void xen_hvm_resume_shared_info(void)
{
	xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
}

#ifdef CONFIG_KEXEC
static struct shared_info *xen_hvm_shared_info_kexec;
static unsigned long xen_hvm_shared_info_pfn_kexec;

/* Remember a pfn in MMIO space for kexec reboot */
void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn)
{
	xen_hvm_shared_info_kexec = sip;
	xen_hvm_shared_info_pfn_kexec = pfn;
}

static void xen_hvm_syscore_shutdown(void)
{
	struct xen_memory_reservation reservation = {
		.domid = DOMID_SELF,
		.nr_extents = 1,
	};
	unsigned long prev_pfn;
	int rc;

	if (!xen_hvm_shared_info_kexec)
		return;

	prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT;
	set_xen_guest_handle(reservation.extent_start, &prev_pfn);

	/* Move pfn to MMIO, disconnects previous pfn from mfn */
	xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec);

	/* Update pointers, following hypercall is also a memory barrier */
	xen_hvm_set_shared_info(xen_hvm_shared_info_kexec);

	/* Allocate new mfn for previous pfn */
	do {
		rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
		if (rc == 0)
			msleep(123);
	} while (rc == 0);

	/* Make sure the previous pfn is really connected to a (new) mfn */
	BUG_ON(rc != 1);
}

static struct syscore_ops xen_hvm_syscore_ops = {
	.shutdown = xen_hvm_syscore_shutdown,
};
#endif

/* Use a pfn in RAM, may move to MMIO before kexec. */
static void __init xen_hvm_init_shared_info(void)
{
	/* Remember pointer for resume */
	xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
	xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
	xen_hvm_set_shared_info(xen_hvm_shared_info);
}

static void __init init_hvm_pv_info(void)
static void __init init_hvm_pv_info(void)
{
{
	int major, minor;
	int major, minor;
@@ -1644,9 +1551,6 @@ static void __init xen_hvm_guest_init(void)
	init_hvm_pv_info();
	init_hvm_pv_info();


	xen_hvm_init_shared_info();
	xen_hvm_init_shared_info();
#ifdef CONFIG_KEXEC
	register_syscore_ops(&xen_hvm_syscore_ops);
#endif


	if (xen_feature(XENFEAT_hvm_callback_vector))
	if (xen_feature(XENFEAT_hvm_callback_vector))
		xen_have_vector_callback = 1;
		xen_have_vector_callback = 1;
+92 −3
Original line number Original line Diff line number Diff line
@@ -196,9 +196,11 @@ RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);


/* When we populate back during bootup, the amount of pages can vary. The
/* When we populate back during bootup, the amount of pages can vary. The
 * max we have is seen is 395979, but that does not mean it can't be more.
 * max we have is seen is 395979, but that does not mean it can't be more.
 * But some machines can have 3GB I/O holes even. So lets reserve enough
 * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
 * for 4GB of I/O and E820 holes. */
 * it can re-use Xen provided mfn_list array, so we only need to allocate at
RESERVE_BRK(p2m_populated, PMD_SIZE * 4);
 * most three P2M top nodes. */
RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);

static inline unsigned p2m_top_index(unsigned long pfn)
static inline unsigned p2m_top_index(unsigned long pfn)
{
{
	BUG_ON(pfn >= MAX_P2M_PFN);
	BUG_ON(pfn >= MAX_P2M_PFN);
@@ -575,12 +577,99 @@ static bool __init early_alloc_p2m(unsigned long pfn)
	}
	}
	return true;
	return true;
}
}

/*
 * Skim over the P2M tree looking at pages that are either filled with
 * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and
 * replace the P2M leaf with a p2m_missing or p2m_identity.
 * Stick the old page in the new P2M tree location.
 */
bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn)
{
	unsigned topidx;
	unsigned mididx;
	unsigned ident_pfns;
	unsigned inv_pfns;
	unsigned long *p2m;
	unsigned long *mid_mfn_p;
	unsigned idx;
	unsigned long pfn;

	/* We only look when this entails a P2M middle layer */
	if (p2m_index(set_pfn))
		return false;

	for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
		topidx = p2m_top_index(pfn);

		if (!p2m_top[topidx])
			continue;

		if (p2m_top[topidx] == p2m_mid_missing)
			continue;

		mididx = p2m_mid_index(pfn);
		p2m = p2m_top[topidx][mididx];
		if (!p2m)
			continue;

		if ((p2m == p2m_missing) || (p2m == p2m_identity))
			continue;

		if ((unsigned long)p2m == INVALID_P2M_ENTRY)
			continue;

		ident_pfns = 0;
		inv_pfns = 0;
		for (idx = 0; idx < P2M_PER_PAGE; idx++) {
			/* IDENTITY_PFNs are 1:1 */
			if (p2m[idx] == IDENTITY_FRAME(pfn + idx))
				ident_pfns++;
			else if (p2m[idx] == INVALID_P2M_ENTRY)
				inv_pfns++;
			else
				break;
		}
		if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE))
			goto found;
	}
	return false;
found:
	/* Found one, replace old with p2m_identity or p2m_missing */
	p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
	/* And the other for save/restore.. */
	mid_mfn_p = p2m_top_mfn_p[topidx];
	/* NOTE: Even if it is a p2m_identity it should still be point to
	 * a page filled with INVALID_P2M_ENTRY entries. */
	mid_mfn_p[mididx] = virt_to_mfn(p2m_missing);

	/* Reset where we want to stick the old page in. */
	topidx = p2m_top_index(set_pfn);
	mididx = p2m_mid_index(set_pfn);

	/* This shouldn't happen */
	if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
		early_alloc_p2m(set_pfn);

	if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
		return false;

	p2m_init(p2m);
	p2m_top[topidx][mididx] = p2m;
	mid_mfn_p = p2m_top_mfn_p[topidx];
	mid_mfn_p[mididx] = virt_to_mfn(p2m);

	return true;
}
bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
{
	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
		if (!early_alloc_p2m(pfn))
		if (!early_alloc_p2m(pfn))
			return false;
			return false;


		if (early_can_reuse_p2m_middle(pfn, mfn))
			return __set_phys_to_machine(pfn, mfn);

		if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))
		if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))
			return false;
			return false;


+8 −1
Original line number Original line Diff line number Diff line
@@ -78,10 +78,17 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
	memblock_reserve(start, size);
	memblock_reserve(start, size);


	xen_max_p2m_pfn = PFN_DOWN(start + size);
	xen_max_p2m_pfn = PFN_DOWN(start + size);
	for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
		unsigned long mfn = pfn_to_mfn(pfn);

		if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
			continue;
		WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
			pfn, mfn);


	for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
	}
	}
}


static unsigned long __init xen_do_chunk(unsigned long start,
static unsigned long __init xen_do_chunk(unsigned long start,
					 unsigned long end, bool release)
					 unsigned long end, bool release)
+1 −1
Original line number Original line Diff line number Diff line
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
{
{
#ifdef CONFIG_XEN_PVHVM
#ifdef CONFIG_XEN_PVHVM
	int cpu;
	int cpu;
	xen_hvm_resume_shared_info();
	xen_hvm_init_shared_info();
	xen_callback_vector();
	xen_callback_vector();
	xen_unplug_emulated_devices();
	xen_unplug_emulated_devices();
	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
+1 −1
Original line number Original line Diff line number Diff line
@@ -41,7 +41,7 @@ void xen_enable_syscall(void);
void xen_vcpu_restore(void);
void xen_vcpu_restore(void);


void xen_callback_vector(void);
void xen_callback_vector(void);
void xen_hvm_resume_shared_info(void);
void xen_hvm_init_shared_info(void);
void xen_unplug_emulated_devices(void);
void xen_unplug_emulated_devices(void);


void __init xen_build_dynamic_phys_to_machine(void);
void __init xen_build_dynamic_phys_to_machine(void);
Loading