Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 79e542f5 authored by Changbin Du's avatar Changbin Du Committed by Zhenyu Wang
Browse files

drm/i915/kvmgt: Support setting dma map for huge pages



To support huge gtt, we need to support huge pages in kvmgt first.
This patch adds a 'size' param to the intel_gvt_mpt::dma_map_guest_page
API and implements it in kvmgt.

v2: rebase.

Signed-off-by: default avatarChangbin Du <changbin.du@intel.com>
Signed-off-by: default avatarZhenyu Wang <zhenyuw@linux.intel.com>
parent eb3a3530
Loading
Loading
Loading
Loading
+3 −3
Original line number Original line Diff line number Diff line
@@ -1106,7 +1106,7 @@ static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,


	for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
	for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
		ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
		ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
						start_gfn + i, &dma_addr);
					start_gfn + i, PAGE_SIZE, &dma_addr);
		if (ret)
		if (ret)
			return ret;
			return ret;


@@ -1152,7 +1152,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
	};
	};


	/* direct shadow */
	/* direct shadow */
	ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr);
	ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
	if (ret)
	if (ret)
		return -ENXIO;
		return -ENXIO;


@@ -2080,7 +2080,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
		}
		}


		ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
		ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
							      &dma_addr);
							PAGE_SIZE, &dma_addr);
		if (ret) {
		if (ret) {
			gvt_vgpu_err("fail to populate guest ggtt entry\n");
			gvt_vgpu_err("fail to populate guest ggtt entry\n");
			/* guest driver may read/write the entry when partial
			/* guest driver may read/write the entry when partial
+1 −1
Original line number Original line Diff line number Diff line
@@ -53,7 +53,7 @@ struct intel_gvt_mpt {
	unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn);
	unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn);


	int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn,
	int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn,
				  dma_addr_t *dma_addr);
				  unsigned long size, dma_addr_t *dma_addr);
	void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr);
	void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr);


	int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,
	int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,
+94 −32
Original line number Original line Diff line number Diff line
@@ -94,6 +94,7 @@ struct gvt_dma {
	struct rb_node dma_addr_node;
	struct rb_node dma_addr_node;
	gfn_t gfn;
	gfn_t gfn;
	dma_addr_t dma_addr;
	dma_addr_t dma_addr;
	unsigned long size;
	struct kref ref;
	struct kref ref;
};
};


@@ -106,45 +107,103 @@ static int kvmgt_guest_init(struct mdev_device *mdev);
static void intel_vgpu_release_work(struct work_struct *work);
static void intel_vgpu_release_work(struct work_struct *work);
static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);


static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
		dma_addr_t *dma_addr)
		unsigned long size)
{
{
	struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
	int total_pages;
	struct page *page;
	int npage;
	unsigned long pfn;
	int ret;

	total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;

	for (npage = 0; npage < total_pages; npage++) {
		unsigned long cur_gfn = gfn + npage;

		ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1);
		WARN_ON(ret != 1);
	}
}

/* Pin a normal or compound guest page for dma. */
static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
		unsigned long size, struct page **page)
{
	unsigned long base_pfn = 0;
	int total_pages;
	int npage;
	int ret;
	int ret;


	/* Pin the page first. */
	total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
	ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1,
	/*
	 * We pin the pages one-by-one to avoid allocating a big arrary
	 * on stack to hold pfns.
	 */
	for (npage = 0; npage < total_pages; npage++) {
		unsigned long cur_gfn = gfn + npage;
		unsigned long pfn;

		ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1,
				     IOMMU_READ | IOMMU_WRITE, &pfn);
				     IOMMU_READ | IOMMU_WRITE, &pfn);
		if (ret != 1) {
		if (ret != 1) {
		gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
			gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
			     gfn, ret);
				     cur_gfn, ret);
		return -EINVAL;
			goto err;
		}
		}


	/* Setup DMA mapping. */
		if (!pfn_valid(pfn)) {
	page = pfn_to_page(pfn);
			gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
	*dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE,
			npage++;
				 PCI_DMA_BIDIRECTIONAL);
			ret = -EFAULT;
	if (dma_mapping_error(dev, *dma_addr)) {
			goto err;
		gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn);
		vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
		return -ENOMEM;
		}
		}


		if (npage == 0)
			base_pfn = pfn;
		else if (base_pfn + npage != pfn) {
			gvt_vgpu_err("The pages are not continuous\n");
			ret = -EINVAL;
			npage++;
			goto err;
		}
	}

	*page = pfn_to_page(base_pfn);
	return 0;
	return 0;
err:
	gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
	return ret;
}
}


static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
		dma_addr_t dma_addr)
		dma_addr_t *dma_addr, unsigned long size)
{
{
	struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
	struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
	struct page *page = NULL;
	int ret;
	int ret;


	dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
	ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
	ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1);
	if (ret)
	WARN_ON(ret != 1);
		return ret;

	/* Setup DMA mapping. */
	*dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL);
	ret = dma_mapping_error(dev, *dma_addr);
	if (ret) {
		gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
			     page_to_pfn(page), ret);
		gvt_unpin_guest_page(vgpu, gfn, size);
	}

	return ret;
}

static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
		dma_addr_t dma_addr, unsigned long size)
{
	struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;

	dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
	gvt_unpin_guest_page(vgpu, gfn, size);
}
}


static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
@@ -185,7 +244,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
}
}


static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
		dma_addr_t dma_addr)
		dma_addr_t dma_addr, unsigned long size)
{
{
	struct gvt_dma *new, *itr;
	struct gvt_dma *new, *itr;
	struct rb_node **link, *parent = NULL;
	struct rb_node **link, *parent = NULL;
@@ -197,6 +256,7 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
	new->vgpu = vgpu;
	new->vgpu = vgpu;
	new->gfn = gfn;
	new->gfn = gfn;
	new->dma_addr = dma_addr;
	new->dma_addr = dma_addr;
	new->size = size;
	kref_init(&new->ref);
	kref_init(&new->ref);


	/* gfn_cache maps gfn to struct gvt_dma. */
	/* gfn_cache maps gfn to struct gvt_dma. */
@@ -254,7 +314,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu)
			break;
			break;
		}
		}
		dma = rb_entry(node, struct gvt_dma, gfn_node);
		dma = rb_entry(node, struct gvt_dma, gfn_node);
		gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr);
		gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
		__gvt_cache_remove_entry(vgpu, dma);
		__gvt_cache_remove_entry(vgpu, dma);
		mutex_unlock(&vgpu->vdev.cache_lock);
		mutex_unlock(&vgpu->vdev.cache_lock);
	}
	}
@@ -509,7 +569,8 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
			if (!entry)
			if (!entry)
				continue;
				continue;


			gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr);
			gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
					   entry->size);
			__gvt_cache_remove_entry(vgpu, entry);
			__gvt_cache_remove_entry(vgpu, entry);
		}
		}
		mutex_unlock(&vgpu->vdev.cache_lock);
		mutex_unlock(&vgpu->vdev.cache_lock);
@@ -1616,7 +1677,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
}
}


int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
		dma_addr_t *dma_addr)
		unsigned long size, dma_addr_t *dma_addr)
{
{
	struct kvmgt_guest_info *info;
	struct kvmgt_guest_info *info;
	struct intel_vgpu *vgpu;
	struct intel_vgpu *vgpu;
@@ -1633,11 +1694,11 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,


	entry = __gvt_cache_find_gfn(info->vgpu, gfn);
	entry = __gvt_cache_find_gfn(info->vgpu, gfn);
	if (!entry) {
	if (!entry) {
		ret = gvt_dma_map_page(vgpu, gfn, dma_addr);
		ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
		if (ret)
		if (ret)
			goto err_unlock;
			goto err_unlock;


		ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr);
		ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size);
		if (ret)
		if (ret)
			goto err_unmap;
			goto err_unmap;
	} else {
	} else {
@@ -1649,7 +1710,7 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn,
	return 0;
	return 0;


err_unmap:
err_unmap:
	gvt_dma_unmap_page(vgpu, gfn, *dma_addr);
	gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
err_unlock:
err_unlock:
	mutex_unlock(&info->vgpu->vdev.cache_lock);
	mutex_unlock(&info->vgpu->vdev.cache_lock);
	return ret;
	return ret;
@@ -1659,7 +1720,8 @@ static void __gvt_dma_release(struct kref *ref)
{
{
	struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
	struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);


	gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr);
	gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
			   entry->size);
	__gvt_cache_remove_entry(entry->vgpu, entry);
	__gvt_cache_remove_entry(entry->vgpu, entry);
}
}


+4 −3
Original line number Original line Diff line number Diff line
@@ -230,17 +230,18 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn(
/**
/**
 * intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page
 * intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page
 * @vgpu: a vGPU
 * @vgpu: a vGPU
 * @gpfn: guest pfn
 * @gfn: guest pfn
 * @size: page size
 * @dma_addr: retrieve allocated dma addr
 * @dma_addr: retrieve allocated dma addr
 *
 *
 * Returns:
 * Returns:
 * 0 on success, negative error code if failed.
 * 0 on success, negative error code if failed.
 */
 */
static inline int intel_gvt_hypervisor_dma_map_guest_page(
static inline int intel_gvt_hypervisor_dma_map_guest_page(
		struct intel_vgpu *vgpu, unsigned long gfn,
		struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size,
		dma_addr_t *dma_addr)
		dma_addr_t *dma_addr)
{
{
	return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn,
	return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, size,
						      dma_addr);
						      dma_addr);
}
}