Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8c194f3b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfio-v4.1-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - VFIO platform bus driver support (Baptiste Reynal, Antonios Motakis,
   testing and review by Eric Auger)

 - Split VFIO irqfd support to separate module (Alex Williamson)

 - vfio-pci VGA arbiter client (Alex Williamson)

 - New vfio-pci.ids= module option (Alex Williamson)

 - vfio-pci D3 power state support for idle devices (Alex Williamson)

* tag 'vfio-v4.1-rc1' of git://github.com/awilliam/linux-vfio: (30 commits)
  vfio-pci: Fix use after free
  vfio-pci: Move idle devices to D3hot power state
  vfio-pci: Remove warning if try-reset fails
  vfio-pci: Allow PCI IDs to be specified as module options
  vfio-pci: Add VGA arbiter client
  vfio-pci: Add module option to disable VGA region access
  vgaarb: Stub vga_set_legacy_decoding()
  vfio: Split virqfd into a separate module for vfio bus drivers
  vfio: virqfd_lock can be static
  vfio: put off the allocation of "minor" in vfio_create_group
  vfio/platform: implement IRQ masking/unmasking via an eventfd
  vfio: initialize the virqfd workqueue in VFIO generic code
  vfio: move eventfd support code for VFIO_PCI to a separate file
  vfio: pass an opaque pointer on virqfd initialization
  vfio: add local lock for virqfd instead of depending on VFIO PCI
  vfio: virqfd: rename vfio_pci_virqfd_init and vfio_pci_virqfd_exit
  vfio: add a vfio_ prefix to virqfd_enable and virqfd_disable and export
  vfio/platform: support for level sensitive interrupts
  vfio/platform: trigger an interrupt via eventfd
  vfio/platform: initial interrupts support code
  ...
parents 07e492eb 5a0ff177
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -13,6 +13,11 @@ config VFIO_SPAPR_EEH
	depends on EEH && VFIO_IOMMU_SPAPR_TCE
	default n

config VFIO_VIRQFD
	tristate
	depends on VFIO && EVENTFD
	default n

menuconfig VFIO
	tristate "VFIO Non-Privileged userspace driver framework"
	depends on IOMMU_API
@@ -27,3 +32,4 @@ menuconfig VFIO
	  If you don't know what to do here, say N.

source "drivers/vfio/pci/Kconfig"
source "drivers/vfio/platform/Kconfig"
+4 −0
Original line number Diff line number Diff line
vfio_virqfd-y := virqfd.o

obj-$(CONFIG_VFIO) += vfio.o
obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
obj-$(CONFIG_VFIO_PCI) += pci/
obj-$(CONFIG_VFIO_PLATFORM) += platform/
+1 −0
Original line number Diff line number Diff line
config VFIO_PCI
	tristate "VFIO support for PCI devices"
	depends on VFIO && PCI && EVENTFD
	select VFIO_VIRQFD
	help
	  Support for the PCI VFIO bus driver.  This is required to make
	  use of PCI drivers using the VFIO framework.
+163 −25
Original line number Diff line number Diff line
@@ -11,6 +11,8 @@
 * Author: Tom Lyon, pugs@cisco.com
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/device.h>
#include <linux/eventfd.h>
#include <linux/file.h>
@@ -25,6 +27,7 @@
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
#include <linux/vgaarb.h>

#include "vfio_pci_private.h"

@@ -32,13 +35,81 @@
#define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
#define DRIVER_DESC     "VFIO PCI - User Level meta-driver"

static char ids[1024] __initdata;
module_param_string(ids, ids, sizeof(ids), 0);
MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified");

static bool nointxmask;
module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(nointxmask,
		  "Disable support for PCI 2.3 style INTx masking.  If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");

#ifdef CONFIG_VFIO_PCI_VGA
static bool disable_vga;
module_param(disable_vga, bool, S_IRUGO);
MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci");
#endif

static bool disable_idle_d3;
module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(disable_idle_d3,
		 "Disable using the PCI D3 low power state for idle, unused devices");

static DEFINE_MUTEX(driver_lock);

static inline bool vfio_vga_disabled(void)
{
#ifdef CONFIG_VFIO_PCI_VGA
	return disable_vga;
#else
	return true;
#endif
}

/*
 * Our VGA arbiter participation is limited since we don't know anything
 * about the device itself.  However, if the device is the only VGA device
 * downstream of a bridge and VFIO VGA support is disabled, then we can
 * safely return legacy VGA IO and memory as not decoded since the user
 * has no way to get to it and routing can be disabled externally at the
 * bridge.
 */
static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga)
{
	struct vfio_pci_device *vdev = opaque;
	struct pci_dev *tmp = NULL, *pdev = vdev->pdev;
	unsigned char max_busnr;
	unsigned int decodes;

	if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))
		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
		       VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;

	max_busnr = pci_bus_max_busnr(pdev->bus);
	decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;

	while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {
		if (tmp == pdev ||
		    pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||
		    pci_is_root_bus(tmp->bus))
			continue;

		if (tmp->bus->number >= pdev->bus->number &&
		    tmp->bus->number <= max_busnr) {
			pci_dev_put(tmp);
			decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
			break;
		}
	}

	return decodes;
}

static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
{
	return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
}

static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);

static int vfio_pci_enable(struct vfio_pci_device *vdev)
@@ -48,6 +119,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
	u16 cmd;
	u8 msix_pos;

	pci_set_power_state(pdev, PCI_D0);

	/* Don't allow our initial saved state to include busmaster */
	pci_clear_master(pdev);

@@ -93,10 +166,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
	} else
		vdev->msix_bar = 0xFF;

#ifdef CONFIG_VFIO_PCI_VGA
	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
	if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
		vdev->has_vga = true;
#endif

	return 0;
}
@@ -153,20 +224,17 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
	 * Try to reset the device.  The success of this is dependent on
	 * being able to lock the device, which is not always possible.
	 */
	if (vdev->reset_works) {
		int ret = pci_try_reset_function(pdev);
		if (ret)
			pr_warn("%s: Failed to reset device %s (%d)\n",
				__func__, dev_name(&pdev->dev), ret);
		else
	if (vdev->reset_works && !pci_try_reset_function(pdev))
		vdev->needs_reset = false;
	}

	pci_restore_state(pdev);
out:
	pci_disable_device(pdev);

	vfio_pci_try_bus_reset(vdev);

	if (!disable_idle_d3)
		pci_set_power_state(pdev, PCI_D3hot);
}

static void vfio_pci_release(void *device_data)
@@ -885,6 +953,27 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
	if (ret) {
		iommu_group_put(group);
		kfree(vdev);
		return ret;
	}

	if (vfio_pci_is_vga(pdev)) {
		vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
		vga_set_legacy_decoding(pdev,
					vfio_pci_set_vga_decode(vdev, false));
	}

	if (!disable_idle_d3) {
		/*
		 * pci-core sets the device power state to an unknown value at
		 * bootup and after being removed from a driver.  The only
		 * transition it allows from this unknown state is to D0, which
		 * typically happens when a driver calls pci_enable_device().
		 * We're not ready to enable the device yet, but we do want to
		 * be able to get to D3.  Therefore first do a D0 transition
		 * before going to D3.
		 */
		pci_set_power_state(pdev, PCI_D0);
		pci_set_power_state(pdev, PCI_D3hot);
	}

	return ret;
@@ -895,10 +984,21 @@ static void vfio_pci_remove(struct pci_dev *pdev)
	struct vfio_pci_device *vdev;

	vdev = vfio_del_group_dev(&pdev->dev);
	if (vdev) {
	if (!vdev)
		return;

	iommu_group_put(pdev->dev.iommu_group);
	kfree(vdev);

	if (vfio_pci_is_vga(pdev)) {
		vga_client_register(pdev, NULL, NULL, NULL);
		vga_set_legacy_decoding(pdev,
				VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
				VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM);
	}

	if (!disable_idle_d3)
		pci_set_power_state(pdev, PCI_D0);
}

static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@@ -1017,10 +1117,13 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)

put_devs:
	for (i = 0; i < devs.cur_index; i++) {
		if (!ret) {
		tmp = vfio_device_data(devs.devices[i]);
		if (!ret)
			tmp->needs_reset = false;
		}

		if (!tmp->refcnt && !disable_idle_d3)
			pci_set_power_state(tmp->pdev, PCI_D3hot);

		vfio_device_put(devs.devices[i]);
	}

@@ -1030,10 +1133,50 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
static void __exit vfio_pci_cleanup(void)
{
	pci_unregister_driver(&vfio_pci_driver);
	vfio_pci_virqfd_exit();
	vfio_pci_uninit_perm_bits();
}

static void __init vfio_pci_fill_ids(void)
{
	char *p, *id;
	int rc;

	/* no ids passed actually */
	if (ids[0] == '\0')
		return;

	/* add ids specified in the module parameter */
	p = ids;
	while ((id = strsep(&p, ","))) {
		unsigned int vendor, device, subvendor = PCI_ANY_ID,
			subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
		int fields;

		if (!strlen(id))
			continue;

		fields = sscanf(id, "%x:%x:%x:%x:%x:%x",
				&vendor, &device, &subvendor, &subdevice,
				&class, &class_mask);

		if (fields < 2) {
			pr_warn("invalid id string \"%s\"\n", id);
			continue;
		}

		rc = pci_add_dynid(&vfio_pci_driver, vendor, device,
				   subvendor, subdevice, class, class_mask, 0);
		if (rc)
			pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n",
				vendor, device, subvendor, subdevice,
				class, class_mask, rc);
		else
			pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n",
				vendor, device, subvendor, subdevice,
				class, class_mask);
	}
}

static int __init vfio_pci_init(void)
{
	int ret;
@@ -1043,21 +1186,16 @@ static int __init vfio_pci_init(void)
	if (ret)
		return ret;

	/* Start the virqfd cleanup handler */
	ret = vfio_pci_virqfd_init();
	if (ret)
		goto out_virqfd;

	/* Register and scan for devices */
	ret = pci_register_driver(&vfio_pci_driver);
	if (ret)
		goto out_driver;

	vfio_pci_fill_ids();

	return 0;

out_driver:
	vfio_pci_virqfd_exit();
out_virqfd:
	vfio_pci_uninit_perm_bits();
	return ret;
}
+14 −224
Original line number Diff line number Diff line
@@ -19,230 +19,19 @@
#include <linux/msi.h>
#include <linux/pci.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/vfio.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <linux/slab.h>

#include "vfio_pci_private.h"

/*
 * IRQfd - generic
 */
struct virqfd {
	struct vfio_pci_device	*vdev;
	struct eventfd_ctx	*eventfd;
	int			(*handler)(struct vfio_pci_device *, void *);
	void			(*thread)(struct vfio_pci_device *, void *);
	void			*data;
	struct work_struct	inject;
	wait_queue_t		wait;
	poll_table		pt;
	struct work_struct	shutdown;
	struct virqfd		**pvirqfd;
};

static struct workqueue_struct *vfio_irqfd_cleanup_wq;

int __init vfio_pci_virqfd_init(void)
{
	vfio_irqfd_cleanup_wq =
		create_singlethread_workqueue("vfio-irqfd-cleanup");
	if (!vfio_irqfd_cleanup_wq)
		return -ENOMEM;

	return 0;
}

void vfio_pci_virqfd_exit(void)
{
	destroy_workqueue(vfio_irqfd_cleanup_wq);
}

static void virqfd_deactivate(struct virqfd *virqfd)
{
	queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
}

static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
	struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
	unsigned long flags = (unsigned long)key;

	if (flags & POLLIN) {
		/* An event has been signaled, call function */
		if ((!virqfd->handler ||
		     virqfd->handler(virqfd->vdev, virqfd->data)) &&
		    virqfd->thread)
			schedule_work(&virqfd->inject);
	}

	if (flags & POLLHUP) {
		unsigned long flags;
		spin_lock_irqsave(&virqfd->vdev->irqlock, flags);

		/*
		 * The eventfd is closing, if the virqfd has not yet been
		 * queued for release, as determined by testing whether the
		 * vdev pointer to it is still valid, queue it now.  As
		 * with kvm irqfds, we know we won't race against the virqfd
		 * going away because we hold wqh->lock to get here.
		 */
		if (*(virqfd->pvirqfd) == virqfd) {
			*(virqfd->pvirqfd) = NULL;
			virqfd_deactivate(virqfd);
		}

		spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags);
	}

	return 0;
}

static void virqfd_ptable_queue_proc(struct file *file,
				     wait_queue_head_t *wqh, poll_table *pt)
{
	struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
	add_wait_queue(wqh, &virqfd->wait);
}

static void virqfd_shutdown(struct work_struct *work)
{
	struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
	u64 cnt;

	eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
	flush_work(&virqfd->inject);
	eventfd_ctx_put(virqfd->eventfd);

	kfree(virqfd);
}

static void virqfd_inject(struct work_struct *work)
{
	struct virqfd *virqfd = container_of(work, struct virqfd, inject);
	if (virqfd->thread)
		virqfd->thread(virqfd->vdev, virqfd->data);
}

static int virqfd_enable(struct vfio_pci_device *vdev,
			 int (*handler)(struct vfio_pci_device *, void *),
			 void (*thread)(struct vfio_pci_device *, void *),
			 void *data, struct virqfd **pvirqfd, int fd)
{
	struct fd irqfd;
	struct eventfd_ctx *ctx;
	struct virqfd *virqfd;
	int ret = 0;
	unsigned int events;

	virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL);
	if (!virqfd)
		return -ENOMEM;

	virqfd->pvirqfd = pvirqfd;
	virqfd->vdev = vdev;
	virqfd->handler = handler;
	virqfd->thread = thread;
	virqfd->data = data;

	INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
	INIT_WORK(&virqfd->inject, virqfd_inject);

	irqfd = fdget(fd);
	if (!irqfd.file) {
		ret = -EBADF;
		goto err_fd;
	}

	ctx = eventfd_ctx_fileget(irqfd.file);
	if (IS_ERR(ctx)) {
		ret = PTR_ERR(ctx);
		goto err_ctx;
	}

	virqfd->eventfd = ctx;

	/*
	 * virqfds can be released by closing the eventfd or directly
	 * through ioctl.  These are both done through a workqueue, so
	 * we update the pointer to the virqfd under lock to avoid
	 * pushing multiple jobs to release the same virqfd.
	 */
	spin_lock_irq(&vdev->irqlock);

	if (*pvirqfd) {
		spin_unlock_irq(&vdev->irqlock);
		ret = -EBUSY;
		goto err_busy;
	}
	*pvirqfd = virqfd;

	spin_unlock_irq(&vdev->irqlock);

	/*
	 * Install our own custom wake-up handling so we are notified via
	 * a callback whenever someone signals the underlying eventfd.
	 */
	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);

	events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);

	/*
	 * Check if there was an event already pending on the eventfd
	 * before we registered and trigger it as if we didn't miss it.
	 */
	if (events & POLLIN) {
		if ((!handler || handler(vdev, data)) && thread)
			schedule_work(&virqfd->inject);
	}

	/*
	 * Do not drop the file until the irqfd is fully initialized,
	 * otherwise we might race against the POLLHUP.
	 */
	fdput(irqfd);

	return 0;
err_busy:
	eventfd_ctx_put(ctx);
err_ctx:
	fdput(irqfd);
err_fd:
	kfree(virqfd);

	return ret;
}

static void virqfd_disable(struct vfio_pci_device *vdev,
			   struct virqfd **pvirqfd)
{
	unsigned long flags;

	spin_lock_irqsave(&vdev->irqlock, flags);

	if (*pvirqfd) {
		virqfd_deactivate(*pvirqfd);
		*pvirqfd = NULL;
	}

	spin_unlock_irqrestore(&vdev->irqlock, flags);

	/*
	 * Block until we know all outstanding shutdown jobs have completed.
	 * Even if we don't queue the job, flush the wq to be sure it's
	 * been released.
	 */
	flush_workqueue(vfio_irqfd_cleanup_wq);
}

/*
 * INTx
 */
static void vfio_send_intx_eventfd(struct vfio_pci_device *vdev, void *unused)
static void vfio_send_intx_eventfd(void *opaque, void *unused)
{
	struct vfio_pci_device *vdev = opaque;

	if (likely(is_intx(vdev) && !vdev->virq_disabled))
		eventfd_signal(vdev->ctx[0].trigger, 1);
}
@@ -285,9 +74,9 @@ void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
 * a signal is necessary, which can then be handled via a work queue
 * or directly depending on the caller.
 */
static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev,
					void *unused)
static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
{
	struct vfio_pci_device *vdev = opaque;
	struct pci_dev *pdev = vdev->pdev;
	unsigned long flags;
	int ret = 0;
@@ -440,8 +229,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
static void vfio_intx_disable(struct vfio_pci_device *vdev)
{
	vfio_intx_set_signal(vdev, -1);
	virqfd_disable(vdev, &vdev->ctx[0].unmask);
	virqfd_disable(vdev, &vdev->ctx[0].mask);
	vfio_virqfd_disable(&vdev->ctx[0].unmask);
	vfio_virqfd_disable(&vdev->ctx[0].mask);
	vdev->irq_type = VFIO_PCI_NUM_IRQS;
	vdev->num_ctx = 0;
	kfree(vdev->ctx);
@@ -605,8 +394,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
	vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);

	for (i = 0; i < vdev->num_ctx; i++) {
		virqfd_disable(vdev, &vdev->ctx[i].unmask);
		virqfd_disable(vdev, &vdev->ctx[i].mask);
		vfio_virqfd_disable(&vdev->ctx[i].unmask);
		vfio_virqfd_disable(&vdev->ctx[i].mask);
	}

	if (msix) {
@@ -639,11 +428,12 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
		int32_t fd = *(int32_t *)data;
		if (fd >= 0)
			return virqfd_enable(vdev, vfio_pci_intx_unmask_handler,
			return vfio_virqfd_enable((void *) vdev,
						  vfio_pci_intx_unmask_handler,
						  vfio_send_intx_eventfd, NULL,
						  &vdev->ctx[0].unmask, fd);

		virqfd_disable(vdev, &vdev->ctx[0].unmask);
		vfio_virqfd_disable(&vdev->ctx[0].unmask);
	}

	return 0;
Loading