Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4e8c0c8c authored by David Vrabel's avatar David Vrabel
Browse files

xen/privcmd: improve performance of MMAPBATCH_V2



Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map
multiple frames at a time rather than one at a time, despite the pages
being non-consecutive GFNs.

xen_remap_foreign_mfn_array() is added which maps an array of GFNs
(instead of a consecutive range of GFNs).

Since per-frame errors are returned in an array, privcmd must set the
MMAPBATCH_V1 error bits as part of the "report errors" phase, after
all the frames are mapped.

Migrate times are significantly improved (when using a PV toolstack
domain).  For example, for an idle 12 GiB PV guest:

        Before     After
  real  0m38.179s  0m26.868s
  user  0m15.096s  0m13.652s
  sys   0m28.988s  0m18.732s

Signed-off-by: default avatarDavid Vrabel <david.vrabel@citrix.com>
Reviewed-by: default avatarStefano Stabellini <stefano.stabellini@eu.citrix.com>
parent 628c28ee
Loading
Loading
Loading
Loading
+16 −4
Original line number Diff line number Diff line
@@ -53,14 +53,26 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);

static __read_mostly int xen_events_irq = -1;

int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
			       unsigned long addr,
			       xen_pfn_t *mfn, int nr,
			       int *err_ptr, pgprot_t prot,
			       unsigned domid,
			       struct page **pages)
{
	return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
					 prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);

/* Not used by XENFEAT_auto_translated guests. */
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
                              unsigned long addr,
                              xen_pfn_t mfn, int nr,
                              pgprot_t prot, unsigned domid,
                              struct page **pages)
{
	return xen_xlate_remap_gfn_range(vma, addr, mfn, nr,
					 prot, domid, pages);
	return -ENOSYS;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);

+82 −19
Original line number Diff line number Diff line
@@ -2439,7 +2439,8 @@ void __init xen_hvm_init_mmu_ops(void)
#define REMAP_BATCH_SIZE 16

struct remap_data {
	unsigned long mfn;
	xen_pfn_t *mfn;
	bool contiguous;
	pgprot_t prot;
	struct mmu_update *mmu_update;
};
@@ -2448,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
				 unsigned long addr, void *data)
{
	struct remap_data *rmd = data;
	pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
	pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));

	/* If we have a contigious range, just update the mfn itself,
	   else update pointer to be "next mfn". */
	if (rmd->contiguous)
		(*rmd->mfn)++;
	else
		rmd->mfn++;

	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
	rmd->mmu_update->val = pte_val_ma(pte);
@@ -2457,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
	return 0;
}

int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
static int do_remap_mfn(struct vm_area_struct *vma,
			unsigned long addr,
			       xen_pfn_t mfn, int nr,
			       pgprot_t prot, unsigned domid,
			xen_pfn_t *mfn, int nr,
			int *err_ptr, pgprot_t prot,
			unsigned domid,
			struct page **pages)

{
	int err = 0;
	struct remap_data rmd;
	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
	int batch;
	unsigned long range;
	int err = 0;
	int mapped = 0;

	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));

	if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef CONFIG_XEN_PVH
		/* We need to update the local page tables and the xen HAP */
		return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
						 domid, pages);
		return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
						 prot, domid, pages);
#else
		return -EINVAL;
#endif
@@ -2484,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,

	rmd.mfn = mfn;
	rmd.prot = prot;
	/* We use the err_ptr to indicate if there we are doing a contigious
	 * mapping or a discontigious mapping. */
	rmd.contiguous = !err_ptr;

	while (nr) {
		batch = min(REMAP_BATCH_SIZE, nr);
		int index = 0;
		int done = 0;
		int batch = min(REMAP_BATCH_SIZE, nr);
		int batch_left = batch;
		range = (unsigned long)batch << PAGE_SHIFT;

		rmd.mmu_update = mmu_update;
@@ -2495,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
		if (err)
			goto out;

		err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
		if (err < 0)
		/* We record the error for each page that gives an error, but
		 * continue mapping until the whole set is done */
		do {
			int i;

			err = HYPERVISOR_mmu_update(&mmu_update[index],
						    batch_left, &done, domid);

			/*
			 * @err_ptr may be the same buffer as @mfn, so
			 * only clear it after each chunk of @mfn is
			 * used.
			 */
			if (err_ptr) {
				for (i = index; i < index + done; i++)
					err_ptr[i] = 0;
			}
			if (err < 0) {
				if (!err_ptr)
					goto out;
				err_ptr[i] = err;
				done++; /* Skip failed frame. */
			} else
				mapped += done;
			batch_left -= done;
			index += done;
		} while (batch_left);

		nr -= batch;
		addr += range;
		if (err_ptr)
			err_ptr += batch;
	}

	err = 0;
out:

	xen_flush_tlb_all();

	return err;
	return err < 0 ? err : mapped;
}

int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
			       unsigned long addr,
			       xen_pfn_t mfn, int nr,
			       pgprot_t prot, unsigned domid,
			       struct page **pages)
{
	return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);

int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
			       unsigned long addr,
			       xen_pfn_t *mfn, int nr,
			       int *err_ptr, pgprot_t prot,
			       unsigned domid, struct page **pages)
{
	/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
	 * and the consequences later is quite hard to detect what the actual
	 * cause of "wrong memory was mapped in".
	 */
	BUG_ON(err_ptr == NULL);
	return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);


/* Returns: 0 success */
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
			       int numpgs, struct page **pages)
+82 −35
Original line number Diff line number Diff line
@@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size,
	return ret;
}

/*
 * Similar to traverse_pages, but use each page as a "block" of
 * data to be processed as one unit.
 */
static int traverse_pages_block(unsigned nelem, size_t size,
				struct list_head *pos,
				int (*fn)(void *data, int nr, void *state),
				void *state)
{
	void *pagedata;
	unsigned pageidx;
	int ret = 0;

	BUG_ON(size > PAGE_SIZE);

	pageidx = PAGE_SIZE;

	while (nelem) {
		int nr = (PAGE_SIZE/size);
		struct page *page;
		if (nr > nelem)
			nr = nelem;
		pos = pos->next;
		page = list_entry(pos, struct page, lru);
		pagedata = page_address(page);
		ret = (*fn)(pagedata, nr, state);
		if (ret)
			break;
		nelem -= nr;
	}

	return ret;
}

struct mmap_mfn_state {
	unsigned long va;
	struct vm_area_struct *vma;
@@ -274,39 +308,25 @@ struct mmap_batch_state {
/* auto translated dom0 note: if domU being created is PV, then mfn is
 * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
 */
static int mmap_batch_fn(void *data, void *state)
static int mmap_batch_fn(void *data, int nr, void *state)
{
	xen_pfn_t *mfnp = data;
	struct mmap_batch_state *st = state;
	struct vm_area_struct *vma = st->vma;
	struct page **pages = vma->vm_private_data;
	struct page *cur_page = NULL;
	struct page **cur_pages = NULL;
	int ret;

	if (xen_feature(XENFEAT_auto_translated_physmap))
		cur_page = pages[st->index++];
		cur_pages = &pages[st->index];

	ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
					 st->vma->vm_page_prot, st->domain,
					 &cur_page);

	/* Store error code for second pass. */
	if (st->version == 1) {
		if (ret < 0) {
			/*
			 * V1 encodes the error codes in the 32bit top nibble of the
			 * mfn (with its known limitations vis-a-vis 64 bit callers).
			 */
			*mfnp |= (ret == -ENOENT) ?
						PRIVCMD_MMAPBATCH_PAGED_ERROR :
						PRIVCMD_MMAPBATCH_MFN_ERROR;
		}
	} else { /* st->version == 2 */
		*((int *) mfnp) = ret;
	}
	BUG_ON(nr < 0);
	ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr,
					 (int *)mfnp, st->vma->vm_page_prot,
					 st->domain, cur_pages);

	/* And see if it affects the global_error. */
	if (ret < 0) {
	/* Adjust the global_error? */
	if (ret != nr) {
		if (ret == -ENOENT)
			st->global_error = -ENOENT;
		else {
@@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state)
				st->global_error = 1;
		}
	}
	st->va += PAGE_SIZE;
	st->va += PAGE_SIZE * nr;
	st->index += nr;

	return 0;
}

static int mmap_return_errors(void *data, void *state)
static int mmap_return_error(int err, struct mmap_batch_state *st)
{
	struct mmap_batch_state *st = state;
	int ret;

	if (st->version == 1) {
		xen_pfn_t mfnp = *((xen_pfn_t *) data);
		if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
			return __put_user(mfnp, st->user_mfn++);
		else
		if (err) {
			xen_pfn_t mfn;

			ret = get_user(mfn, st->user_mfn);
			if (ret < 0)
				return ret;
			/*
			 * V1 encodes the error codes in the 32bit top
			 * nibble of the mfn (with its known
			 * limitations vis-a-vis 64 bit callers).
			 */
			mfn |= (err == -ENOENT) ?
				PRIVCMD_MMAPBATCH_PAGED_ERROR :
				PRIVCMD_MMAPBATCH_MFN_ERROR;
			return __put_user(mfn, st->user_mfn++);
		} else
			st->user_mfn++;
	} else { /* st->version == 2 */
		int err = *((int *) data);
		if (err)
			return __put_user(err, st->user_err++);
		else
@@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state)
	return 0;
}

static int mmap_return_errors(void *data, int nr, void *state)
{
	struct mmap_batch_state *st = state;
	int *errs = data;
	int i;
	int ret;

	for (i = 0; i < nr; i++) {
		ret = mmap_return_error(errs[i], st);
		if (ret < 0)
			return ret;
	}
	return 0;
}

/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
 * the vma with the page info to use later.
 * Returns: 0 if success, otherwise -errno
@@ -472,7 +519,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
	state.version       = version;

	/* mmap_batch_fn guarantees ret == 0 */
	BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
	BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
				    &pagelist, mmap_batch_fn, &state));

	up_write(&mm->mmap_sem);
@@ -481,7 +528,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
		/* Write back errors in second pass. */
		state.user_mfn = (xen_pfn_t *)m.arr;
		state.user_err = m.err;
		ret = traverse_pages(m.num, sizeof(xen_pfn_t),
		ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
					   &pagelist, mmap_return_errors, &state);
	} else
		ret = 0;
+28 −18
Original line number Diff line number Diff line
@@ -62,13 +62,15 @@ static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
}

struct remap_data {
	xen_pfn_t fgmfn; /* foreign domain's gmfn */
	xen_pfn_t *fgmfn; /* foreign domain's gmfn */
	pgprot_t prot;
	domid_t  domid;
	struct vm_area_struct *vma;
	int index;
	struct page **pages;
	struct xen_remap_mfn_info *info;
	int *err_ptr;
	int mapped;
};

static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
@@ -80,38 +82,46 @@ static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
	pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
	int rc;

	rc = map_foreign_page(pfn, info->fgmfn, info->domid);
	if (rc < 0)
		return rc;
	rc = map_foreign_page(pfn, *info->fgmfn, info->domid);
	*info->err_ptr++ = rc;
	if (!rc) {
		set_pte_at(info->vma->vm_mm, addr, ptep, pte);
		info->mapped++;
	}
	info->fgmfn++;

	return 0;
}

int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
			      unsigned long addr,
			      xen_pfn_t gfn, int nr,
			      pgprot_t prot, unsigned domid,
			      xen_pfn_t *mfn, int nr,
			      int *err_ptr, pgprot_t prot,
			      unsigned domid,
			      struct page **pages)
{
	int err;
	struct remap_data data;
	unsigned long range = nr << PAGE_SHIFT;

	/* TBD: Batching, current sole caller only does page at a time */
	if (nr > 1)
		return -EINVAL;
	/* Kept here for the purpose of making sure code doesn't break
	   x86 PVOPS */
	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));

	data.fgmfn = gfn;
	data.fgmfn = mfn;
	data.prot  = prot;
	data.domid = domid;
	data.vma   = vma;
	data.index = 0;
	data.pages = pages;
	err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
	data.index = 0;
	data.err_ptr = err_ptr;
	data.mapped = 0;

	err = apply_to_page_range(vma->vm_mm, addr, range,
				  remap_pte_fn, &data);
	return err;
	return err < 0 ? err : data.mapped;
}
EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_range);
EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array);

int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
			      int nr, struct page **pages)
+41 −4
Original line number Diff line number Diff line
@@ -27,17 +27,54 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);

struct vm_area_struct;

/*
 * xen_remap_domain_mfn_array() - map an array of foreign frames
 * @vma:     VMA to map the pages into
 * @addr:    Address at which to map the pages
 * @gfn:     Array of GFNs to map
 * @nr:      Number entries in the GFN array
 * @err_ptr: Returns per-GFN error status.
 * @prot:    page protection mask
 * @domid:   Domain owning the pages
 * @pages:   Array of pages if this domain has an auto-translated physmap
 *
 * @gfn and @err_ptr may point to the same buffer, the GFNs will be
 * overwritten by the error codes after they are mapped.
 *
 * Returns the number of successfully mapped frames, or a -ve error
 * code.
 */
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
			       unsigned long addr,
			       xen_pfn_t *gfn, int nr,
			       int *err_ptr, pgprot_t prot,
			       unsigned domid,
			       struct page **pages);

/* xen_remap_domain_mfn_range() - map a range of foreign frames
 * @vma:     VMA to map the pages into
 * @addr:    Address at which to map the pages
 * @gfn:     First GFN to map.
 * @nr:      Number frames to map
 * @prot:    page protection mask
 * @domid:   Domain owning the pages
 * @pages:   Array of pages if this domain has an auto-translated physmap
 *
 * Returns the number of successfully mapped frames, or a -ve error
 * code.
 */
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
			       unsigned long addr,
			       xen_pfn_t mfn, int nr,
			       xen_pfn_t gfn, int nr,
			       pgprot_t prot, unsigned domid,
			       struct page **pages);
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
			       int numpgs, struct page **pages);
int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
			      unsigned long addr,
			      xen_pfn_t gfn, int nr,
			      pgprot_t prot,
			      xen_pfn_t *gfn, int nr,
			      int *err_ptr, pgprot_t prot,
			      unsigned domid,
			      struct page **pages);
int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,