Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 62d02fd1 authored by Chuanxiao Dong's avatar Chuanxiao Dong Committed by Zhenyu Wang
Browse files

drm/i915/gvt: Fix possible recursive locking issue



vfio_unpin_pages will hold a read semaphore however it is already hold
in the same thread by vfio ioctl. It will cause below warning:

[ 5102.127454] ============================================
[ 5102.133379] WARNING: possible recursive locking detected
[ 5102.139304] 4.12.0-rc4+ #3 Not tainted
[ 5102.143483] --------------------------------------------
[ 5102.149407] qemu-system-x86/1620 is trying to acquire lock:
[ 5102.155624]  (&container->group_lock){++++++}, at: [<ffffffff817768c6>] vfio_unpin_pages+0x96/0xf0
[ 5102.165626]
but task is already holding lock:
[ 5102.172134]  (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280
[ 5102.182522]
other info that might help us debug this:
[ 5102.189806]  Possible unsafe locking scenario:

[ 5102.196411]        CPU0
[ 5102.199136]        ----
[ 5102.201861]   lock(&container->group_lock);
[ 5102.206527]   lock(&container->group_lock);
[ 5102.211191]
*** DEADLOCK ***

[ 5102.217796]  May be due to missing lock nesting notation

[ 5102.225370] 3 locks held by qemu-system-x86/1620:
[ 5102.230618]  #0:  (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280
[ 5102.241482]  #1:  (&(&iommu->notifier)->rwsem){++++..}, at: [<ffffffff810de775>] __blocking_notifier_call_chain+0x35/0x70
[ 5102.253713]  #2:  (&vgpu->vdev.cache_lock){+.+...}, at: [<ffffffff8157b007>] intel_vgpu_iommu_notifier+0x77/0x120
[ 5102.265163]
stack backtrace:
[ 5102.270022] CPU: 5 PID: 1620 Comm: qemu-system-x86 Not tainted 4.12.0-rc4+ #3
[ 5102.277991] Hardware name: Intel Corporation S1200RP/S1200RP, BIOS S1200RP.86B.03.01.APER.061220151418 06/12/2015
[ 5102.289445] Call Trace:
[ 5102.292175]  dump_stack+0x85/0xc7
[ 5102.295871]  validate_chain.isra.21+0x9da/0xaf0
[ 5102.300925]  __lock_acquire+0x405/0x820
[ 5102.305202]  lock_acquire+0xc7/0x220
[ 5102.309191]  ? vfio_unpin_pages+0x96/0xf0
[ 5102.313666]  down_read+0x2b/0x50
[ 5102.317259]  ? vfio_unpin_pages+0x96/0xf0
[ 5102.321732]  vfio_unpin_pages+0x96/0xf0
[ 5102.326024]  intel_vgpu_iommu_notifier+0xe5/0x120
[ 5102.331283]  notifier_call_chain+0x4a/0x70
[ 5102.335851]  __blocking_notifier_call_chain+0x4d/0x70
[ 5102.341490]  blocking_notifier_call_chain+0x16/0x20
[ 5102.346935]  vfio_iommu_type1_ioctl+0x87b/0x920
[ 5102.351994]  vfio_fops_unl_ioctl+0x81/0x280
[ 5102.356660]  ? __fget+0xf0/0x210
[ 5102.360261]  do_vfs_ioctl+0x93/0x6a0
[ 5102.364247]  ? __fget+0x111/0x210
[ 5102.367942]  SyS_ioctl+0x41/0x70
[ 5102.371542]  entry_SYSCALL_64_fastpath+0x1f/0xbe

put the vfio_unpin_pages in a workqueue can fix this.

v2:
- use for style instead of do{}while(1). (Zhenyu)
v3:
- rename gvt_cache_mark to gvt_cache_mark_remove. (Zhenyu)

Fixes: 659643f7 ("drm/i915/gvt/kvmgt: add vfio/mdev support to KVMGT")
Signed-off-by: default avatarChuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: default avatarZhenyu Wang <zhenyuw@linux.intel.com>
parent e274086e
Loading
Loading
Loading
Loading
+3 −0
Original line number Original line Diff line number Diff line
@@ -183,6 +183,9 @@ struct intel_vgpu {
		struct kvm *kvm;
		struct kvm *kvm;
		struct work_struct release_work;
		struct work_struct release_work;
		atomic_t released;
		atomic_t released;
		struct work_struct unpin_work;
		spinlock_t unpin_lock; /* To protect unpin_list */
		struct list_head unpin_list;
	} vdev;
	} vdev;
#endif
#endif
};
};
+45 −10
Original line number Original line Diff line number Diff line
@@ -78,6 +78,7 @@ struct gvt_dma {
	struct rb_node node;
	struct rb_node node;
	gfn_t gfn;
	gfn_t gfn;
	unsigned long iova;
	unsigned long iova;
	struct list_head list;
};
};


static inline bool handle_valid(unsigned long handle)
static inline bool handle_valid(unsigned long handle)
@@ -166,6 +167,7 @@ static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,


	new->gfn = gfn;
	new->gfn = gfn;
	new->iova = iova;
	new->iova = iova;
	INIT_LIST_HEAD(&new->list);


	mutex_lock(&vgpu->vdev.cache_lock);
	mutex_lock(&vgpu->vdev.cache_lock);
	while (*link) {
	while (*link) {
@@ -197,26 +199,52 @@ static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
	kfree(entry);
	kfree(entry);
}
}


static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
static void intel_vgpu_unpin_work(struct work_struct *work)
{
{
	struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
					       vdev.unpin_work);
	struct device *dev = mdev_dev(vgpu->vdev.mdev);
	struct device *dev = mdev_dev(vgpu->vdev.mdev);
	struct gvt_dma *this;
	struct gvt_dma *this;
	unsigned long g1;
	unsigned long gfn;
	int rc;

	for (;;) {
		spin_lock(&vgpu->vdev.unpin_lock);
		if (list_empty(&vgpu->vdev.unpin_list)) {
			spin_unlock(&vgpu->vdev.unpin_lock);
			break;
		}
		this = list_first_entry(&vgpu->vdev.unpin_list,
					struct gvt_dma, list);
		list_del(&this->list);
		spin_unlock(&vgpu->vdev.unpin_lock);

		gfn = this->gfn;
		vfio_unpin_pages(dev, &gfn, 1);
		kfree(this);
	}
}

static bool gvt_cache_mark_remove(struct intel_vgpu *vgpu, gfn_t gfn)
{
	struct gvt_dma *this;


	mutex_lock(&vgpu->vdev.cache_lock);
	mutex_lock(&vgpu->vdev.cache_lock);
	this  = __gvt_cache_find(vgpu, gfn);
	this  = __gvt_cache_find(vgpu, gfn);
	if (!this) {
	if (!this) {
		mutex_unlock(&vgpu->vdev.cache_lock);
		mutex_unlock(&vgpu->vdev.cache_lock);
		return;
		return false;
	}
	}

	g1 = gfn;
	gvt_dma_unmap_iova(vgpu, this->iova);
	gvt_dma_unmap_iova(vgpu, this->iova);
	rc = vfio_unpin_pages(dev, &g1, 1);
	/* remove this from rb tree */
	WARN_ON(rc != 1);
	rb_erase(&this->node, &vgpu->vdev.cache);
	__gvt_cache_remove_entry(vgpu, this);
	mutex_unlock(&vgpu->vdev.cache_lock);
	mutex_unlock(&vgpu->vdev.cache_lock);

	/* put this to the unpin_list */
	spin_lock(&vgpu->vdev.unpin_lock);
	list_move_tail(&this->list, &vgpu->vdev.unpin_list);
	spin_unlock(&vgpu->vdev.unpin_lock);

	return true;
}
}


static void gvt_cache_init(struct intel_vgpu *vgpu)
static void gvt_cache_init(struct intel_vgpu *vgpu)
@@ -453,6 +481,9 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
	}
	}


	INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
	INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
	INIT_WORK(&vgpu->vdev.unpin_work, intel_vgpu_unpin_work);
	spin_lock_init(&vgpu->vdev.unpin_lock);
	INIT_LIST_HEAD(&vgpu->vdev.unpin_list);


	vgpu->vdev.mdev = mdev;
	vgpu->vdev.mdev = mdev;
	mdev_set_drvdata(mdev, vgpu);
	mdev_set_drvdata(mdev, vgpu);
@@ -482,6 +513,7 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
	struct intel_vgpu *vgpu = container_of(nb,
	struct intel_vgpu *vgpu = container_of(nb,
					struct intel_vgpu,
					struct intel_vgpu,
					vdev.iommu_notifier);
					vdev.iommu_notifier);
	bool sched_unmap = false;


	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
		struct vfio_iommu_type1_dma_unmap *unmap = data;
		struct vfio_iommu_type1_dma_unmap *unmap = data;
@@ -491,7 +523,10 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
		end_gfn = gfn + unmap->size / PAGE_SIZE;
		end_gfn = gfn + unmap->size / PAGE_SIZE;


		while (gfn < end_gfn)
		while (gfn < end_gfn)
			gvt_cache_remove(vgpu, gfn++);
			sched_unmap |= gvt_cache_mark_remove(vgpu, gfn++);

		if (sched_unmap)
			schedule_work(&vgpu->vdev.unpin_work);
	}
	}


	return NOTIFY_OK;
	return NOTIFY_OK;