Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit acd7c8fe authored by Tadeusz Struk's avatar Tadeusz Struk Committed by Doug Ledford
Browse files

IB/hfi1: Fix an Oops on pci device force remove



This patch fixes an Oops on device unbind, when the device is used
by a PSM user process. PSM processes access device resources which
are freed on device removal. Similar protection exists in uverbs
in ib_core for Verbs clients, but PSM doesn't use ib_uverbs hence
a separate protection is required for PSM clients.

Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: default avatarIra Weiny <ira.weiny@intel.com>
Reviewed-by: default avatarDean Luick <dean.luick@intel.com>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarTadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent d9ac4555
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -14691,6 +14691,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
	if (ret)
		goto bail_free_cntrs;

	init_completion(&dd->user_comp);

	/* The user refcount starts with one to inidicate an active device */
	atomic_set(&dd->user_refcount, 1);

	goto bail;

bail_free_rcverr:
+16 −3
Original line number Diff line number Diff line
@@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
					       struct hfi1_devdata,
					       user_cdev);

	if (!atomic_inc_not_zero(&dd->user_refcount))
		return -ENXIO;

	/* Just take a ref now. Not all opens result in a context assign */
	kobject_get(&dd->kobj);

@@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
		fd->rec_cpu_num = -1; /* no cpu affinity by default */
		fd->mm = current->mm;
		atomic_inc(&fd->mm->mm_count);
	}

		fp->private_data = fd;
	} else {
		fp->private_data = NULL;

		if (atomic_dec_and_test(&dd->user_refcount))
			complete(&dd->user_comp);

	return fd ? 0 : -ENOMEM;
		return -ENOMEM;
	}

	return 0;
}

static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
done:
	mmdrop(fdata->mm);
	kobject_put(&dd->kobj);

	if (atomic_dec_and_test(&dd->user_refcount))
		complete(&dd->user_comp);

	kfree(fdata);
	return 0;
}
+4 −0
Original line number Diff line number Diff line
@@ -1174,6 +1174,10 @@ struct hfi1_devdata {
	spinlock_t aspm_lock;
	/* Number of verbs contexts which have disabled ASPM */
	atomic_t aspm_disabled_cnt;
	/* Keeps track of user space clients */
	atomic_t user_refcount;
	/* Used to wait for outstanding user space clients before dev removal */
	struct completion user_comp;

	struct hfi1_affinity *affinity;
	struct rhashtable sdma_rht;
+19 −2
Original line number Diff line number Diff line
@@ -1538,12 +1538,31 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
	return ret;
}

static void wait_for_clients(struct hfi1_devdata *dd)
{
	/*
	 * Remove the device init value and complete the device if there is
	 * no clients or wait for active clients to finish.
	 */
	if (atomic_dec_and_test(&dd->user_refcount))
		complete(&dd->user_comp);

	wait_for_completion(&dd->user_comp);
}

static void remove_one(struct pci_dev *pdev)
{
	struct hfi1_devdata *dd = pci_get_drvdata(pdev);

	/* close debugfs files before ib unregister */
	hfi1_dbg_ibdev_exit(&dd->verbs_dev);

	/* remove the /dev hfi1 interface */
	hfi1_device_remove(dd);

	/* wait for existing user space clients to finish */
	wait_for_clients(dd);

	/* unregister from IB core */
	hfi1_unregister_ib_device(dd);

@@ -1558,8 +1577,6 @@ static void remove_one(struct pci_dev *pdev)
	/* wait until all of our (qsfp) queue_work() calls complete */
	flush_workqueue(ib_wq);

	hfi1_device_remove(dd);

	postinit_cleanup(dd);
}