Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8a98f664 authored by Xiantao Zhang's avatar Xiantao Zhang Committed by Avi Kivity
Browse files

KVM: Move device assignment logic to common code



To share with other archs, this patch moves device assignment
logic to common parts.

Signed-off-by: default avatarXiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: default avatarAvi Kivity <avi@qumranet.com>
parent 371c01b2
Loading
Loading
Loading
Loading
+0 −255
Original line number Diff line number Diff line
@@ -30,7 +30,6 @@
#include <linux/interrupt.h>
#include <linux/kvm.h>
#include <linux/fs.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/mman.h>
@@ -107,238 +106,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
	{ NULL }
};

static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
						      int assigned_dev_id)
{
	struct list_head *ptr;
	struct kvm_assigned_dev_kernel *match;

	list_for_each(ptr, head) {
		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
		if (match->assigned_dev_id == assigned_dev_id)
			return match;
	}
	return NULL;
}

static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
{
	struct kvm_assigned_dev_kernel *assigned_dev;

	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
				    interrupt_work);

	/* This is taken to safely inject irq inside the guest. When
	 * the interrupt injection (or the ioapic code) uses a
	 * finer-grained lock, update this
	 */
	mutex_lock(&assigned_dev->kvm->lock);
	kvm_set_irq(assigned_dev->kvm,
		    assigned_dev->guest_irq, 1);
	mutex_unlock(&assigned_dev->kvm->lock);
	kvm_put_kvm(assigned_dev->kvm);
}

/* FIXME: Implement the OR logic needed to make shared interrupts on
 * this line behave properly
 */
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
{
	struct kvm_assigned_dev_kernel *assigned_dev =
		(struct kvm_assigned_dev_kernel *) dev_id;

	kvm_get_kvm(assigned_dev->kvm);
	schedule_work(&assigned_dev->interrupt_work);
	disable_irq_nosync(irq);
	return IRQ_HANDLED;
}

/* Ack the irq line for an assigned device */
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
{
	struct kvm_assigned_dev_kernel *dev;

	if (kian->gsi == -1)
		return;

	dev = container_of(kian, struct kvm_assigned_dev_kernel,
			   ack_notifier);
	kvm_set_irq(dev->kvm, dev->guest_irq, 0);
	enable_irq(dev->host_irq);
}

static void kvm_free_assigned_device(struct kvm *kvm,
				     struct kvm_assigned_dev_kernel
				     *assigned_dev)
{
	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
		free_irq(assigned_dev->host_irq, (void *)assigned_dev);

	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);

	if (cancel_work_sync(&assigned_dev->interrupt_work))
		/* We had pending work. That means we will have to take
		 * care of kvm_put_kvm.
		 */
		kvm_put_kvm(kvm);

	pci_release_regions(assigned_dev->dev);
	pci_disable_device(assigned_dev->dev);
	pci_dev_put(assigned_dev->dev);

	list_del(&assigned_dev->list);
	kfree(assigned_dev);
}

static void kvm_free_all_assigned_devices(struct kvm *kvm)
{
	struct list_head *ptr, *ptr2;
	struct kvm_assigned_dev_kernel *assigned_dev;

	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
		assigned_dev = list_entry(ptr,
					  struct kvm_assigned_dev_kernel,
					  list);

		kvm_free_assigned_device(kvm, assigned_dev);
	}
}

static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
				   struct kvm_assigned_irq
				   *assigned_irq)
{
	int r = 0;
	struct kvm_assigned_dev_kernel *match;

	mutex_lock(&kvm->lock);

	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
				      assigned_irq->assigned_dev_id);
	if (!match) {
		mutex_unlock(&kvm->lock);
		return -EINVAL;
	}

	if (match->irq_requested) {
		match->guest_irq = assigned_irq->guest_irq;
		match->ack_notifier.gsi = assigned_irq->guest_irq;
		mutex_unlock(&kvm->lock);
		return 0;
	}

	INIT_WORK(&match->interrupt_work,
		  kvm_assigned_dev_interrupt_work_handler);

	if (irqchip_in_kernel(kvm)) {
		if (!capable(CAP_SYS_RAWIO)) {
			r = -EPERM;
			goto out_release;
		}

		if (assigned_irq->host_irq)
			match->host_irq = assigned_irq->host_irq;
		else
			match->host_irq = match->dev->irq;
		match->guest_irq = assigned_irq->guest_irq;
		match->ack_notifier.gsi = assigned_irq->guest_irq;
		match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
		kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);

		/* Even though this is PCI, we don't want to use shared
		 * interrupts. Sharing host devices with guest-assigned devices
		 * on the same interrupt line is not a happy situation: there
		 * are going to be long delays in accepting, acking, etc.
		 */
		if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
				"kvm_assigned_device", (void *)match)) {
			r = -EIO;
			goto out_release;
		}
	}

	match->irq_requested = true;
	mutex_unlock(&kvm->lock);
	return r;
out_release:
	mutex_unlock(&kvm->lock);
	kvm_free_assigned_device(kvm, match);
	return r;
}

static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
				      struct kvm_assigned_pci_dev *assigned_dev)
{
	int r = 0;
	struct kvm_assigned_dev_kernel *match;
	struct pci_dev *dev;

	mutex_lock(&kvm->lock);

	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
				      assigned_dev->assigned_dev_id);
	if (match) {
		/* device already assigned */
		r = -EINVAL;
		goto out;
	}

	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
	if (match == NULL) {
		printk(KERN_INFO "%s: Couldn't allocate memory\n",
		       __func__);
		r = -ENOMEM;
		goto out;
	}
	dev = pci_get_bus_and_slot(assigned_dev->busnr,
				   assigned_dev->devfn);
	if (!dev) {
		printk(KERN_INFO "%s: host device not found\n", __func__);
		r = -EINVAL;
		goto out_free;
	}
	if (pci_enable_device(dev)) {
		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
		r = -EBUSY;
		goto out_put;
	}
	r = pci_request_regions(dev, "kvm_assigned_device");
	if (r) {
		printk(KERN_INFO "%s: Could not get access to device regions\n",
		       __func__);
		goto out_disable;
	}
	match->assigned_dev_id = assigned_dev->assigned_dev_id;
	match->host_busnr = assigned_dev->busnr;
	match->host_devfn = assigned_dev->devfn;
	match->dev = dev;

	match->kvm = kvm;

	list_add(&match->list, &kvm->arch.assigned_dev_head);

	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
		r = kvm_iommu_map_guest(kvm, match);
		if (r)
			goto out_list_del;
	}

out:
	mutex_unlock(&kvm->lock);
	return r;
out_list_del:
	list_del(&match->list);
	pci_release_regions(dev);
out_disable:
	pci_disable_device(dev);
out_put:
	pci_dev_put(dev);
out_free:
	kfree(match);
	mutex_unlock(&kvm->lock);
	return r;
}

unsigned long segment_base(u16 selector)
{
	struct descriptor_table gdt;
@@ -2030,28 +1797,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
			goto out;
		break;
	}
	case KVM_ASSIGN_PCI_DEVICE: {
		struct kvm_assigned_pci_dev assigned_dev;

		r = -EFAULT;
		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
			goto out;
		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
		if (r)
			goto out;
		break;
	}
	case KVM_ASSIGN_IRQ: {
		struct kvm_assigned_irq assigned_irq;

		r = -EFAULT;
		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
			goto out;
		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
		if (r)
			goto out;
		break;
	}
	case KVM_GET_PIT: {
		r = -EFAULT;
		if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
+2 −0
Original line number Diff line number Diff line
@@ -383,7 +383,9 @@ struct kvm_trace_rec {
#define KVM_CAP_MP_STATE 14
#define KVM_CAP_COALESCED_MMIO 15
#define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
#ifdef CONFIG_X86
#define KVM_CAP_DEVICE_ASSIGNMENT 17
#endif
#define KVM_CAP_IOMMU 18

/*
+1 −0
Original line number Diff line number Diff line
@@ -281,6 +281,7 @@ void kvm_free_physmem(struct kvm *kvm);

struct  kvm *kvm_arch_create_vm(void);
void kvm_arch_destroy_vm(struct kvm *kvm);
void kvm_free_all_assigned_devices(struct kvm *kvm);

int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
+266 −2
Original line number Diff line number Diff line
@@ -51,6 +51,12 @@
#include "coalesced_mmio.h"
#endif

#ifdef KVM_CAP_DEVICE_ASSIGNMENT
#include <linux/pci.h>
#include <linux/interrupt.h>
#include "irq.h"
#endif

MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");

@@ -71,6 +77,240 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,

bool kvm_rebooting;

#ifdef KVM_CAP_DEVICE_ASSIGNMENT
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
						      int assigned_dev_id)
{
	struct list_head *ptr;
	struct kvm_assigned_dev_kernel *match;

	list_for_each(ptr, head) {
		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
		if (match->assigned_dev_id == assigned_dev_id)
			return match;
	}
	return NULL;
}

static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
{
	struct kvm_assigned_dev_kernel *assigned_dev;

	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
				    interrupt_work);

	/* This is taken to safely inject irq inside the guest. When
	 * the interrupt injection (or the ioapic code) uses a
	 * finer-grained lock, update this
	 */
	mutex_lock(&assigned_dev->kvm->lock);
	kvm_set_irq(assigned_dev->kvm,
		    assigned_dev->guest_irq, 1);
	mutex_unlock(&assigned_dev->kvm->lock);
	kvm_put_kvm(assigned_dev->kvm);
}

/* FIXME: Implement the OR logic needed to make shared interrupts on
 * this line behave properly
 */
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
{
	struct kvm_assigned_dev_kernel *assigned_dev =
		(struct kvm_assigned_dev_kernel *) dev_id;

	kvm_get_kvm(assigned_dev->kvm);
	schedule_work(&assigned_dev->interrupt_work);
	disable_irq_nosync(irq);
	return IRQ_HANDLED;
}

/* Ack the irq line for an assigned device */
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
{
	struct kvm_assigned_dev_kernel *dev;

	if (kian->gsi == -1)
		return;

	dev = container_of(kian, struct kvm_assigned_dev_kernel,
			   ack_notifier);
	kvm_set_irq(dev->kvm, dev->guest_irq, 0);
	enable_irq(dev->host_irq);
}

static void kvm_free_assigned_device(struct kvm *kvm,
				     struct kvm_assigned_dev_kernel
				     *assigned_dev)
{
	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
		free_irq(assigned_dev->host_irq, (void *)assigned_dev);

	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);

	if (cancel_work_sync(&assigned_dev->interrupt_work))
		/* We had pending work. That means we will have to take
		 * care of kvm_put_kvm.
		 */
		kvm_put_kvm(kvm);

	pci_release_regions(assigned_dev->dev);
	pci_disable_device(assigned_dev->dev);
	pci_dev_put(assigned_dev->dev);

	list_del(&assigned_dev->list);
	kfree(assigned_dev);
}

void kvm_free_all_assigned_devices(struct kvm *kvm)
{
	struct list_head *ptr, *ptr2;
	struct kvm_assigned_dev_kernel *assigned_dev;

	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
		assigned_dev = list_entry(ptr,
					  struct kvm_assigned_dev_kernel,
					  list);

		kvm_free_assigned_device(kvm, assigned_dev);
	}
}

static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
				   struct kvm_assigned_irq
				   *assigned_irq)
{
	int r = 0;
	struct kvm_assigned_dev_kernel *match;

	mutex_lock(&kvm->lock);

	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
				      assigned_irq->assigned_dev_id);
	if (!match) {
		mutex_unlock(&kvm->lock);
		return -EINVAL;
	}

	if (match->irq_requested) {
		match->guest_irq = assigned_irq->guest_irq;
		match->ack_notifier.gsi = assigned_irq->guest_irq;
		mutex_unlock(&kvm->lock);
		return 0;
	}

	INIT_WORK(&match->interrupt_work,
		  kvm_assigned_dev_interrupt_work_handler);

	if (irqchip_in_kernel(kvm)) {
		if (!capable(CAP_SYS_RAWIO)) {
			r = -EPERM;
			goto out_release;
		}

		if (assigned_irq->host_irq)
			match->host_irq = assigned_irq->host_irq;
		else
			match->host_irq = match->dev->irq;
		match->guest_irq = assigned_irq->guest_irq;
		match->ack_notifier.gsi = assigned_irq->guest_irq;
		match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
		kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);

		/* Even though this is PCI, we don't want to use shared
		 * interrupts. Sharing host devices with guest-assigned devices
		 * on the same interrupt line is not a happy situation: there
		 * are going to be long delays in accepting, acking, etc.
		 */
		if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
				"kvm_assigned_device", (void *)match)) {
			r = -EIO;
			goto out_release;
		}
	}

	match->irq_requested = true;
	mutex_unlock(&kvm->lock);
	return r;
out_release:
	mutex_unlock(&kvm->lock);
	kvm_free_assigned_device(kvm, match);
	return r;
}

static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
				      struct kvm_assigned_pci_dev *assigned_dev)
{
	int r = 0;
	struct kvm_assigned_dev_kernel *match;
	struct pci_dev *dev;

	mutex_lock(&kvm->lock);

	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
				      assigned_dev->assigned_dev_id);
	if (match) {
		/* device already assigned */
		r = -EINVAL;
		goto out;
	}

	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
	if (match == NULL) {
		printk(KERN_INFO "%s: Couldn't allocate memory\n",
		       __func__);
		r = -ENOMEM;
		goto out;
	}
	dev = pci_get_bus_and_slot(assigned_dev->busnr,
				   assigned_dev->devfn);
	if (!dev) {
		printk(KERN_INFO "%s: host device not found\n", __func__);
		r = -EINVAL;
		goto out_free;
	}
	if (pci_enable_device(dev)) {
		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
		r = -EBUSY;
		goto out_put;
	}
	r = pci_request_regions(dev, "kvm_assigned_device");
	if (r) {
		printk(KERN_INFO "%s: Could not get access to device regions\n",
		       __func__);
		goto out_disable;
	}
	match->assigned_dev_id = assigned_dev->assigned_dev_id;
	match->host_busnr = assigned_dev->busnr;
	match->host_devfn = assigned_dev->devfn;
	match->dev = dev;

	match->kvm = kvm;

	list_add(&match->list, &kvm->arch.assigned_dev_head);

	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
		r = kvm_iommu_map_guest(kvm, match);
		if (r)
			goto out_list_del;
	}

out:
	mutex_unlock(&kvm->lock);
	return r;
out_list_del:
	list_del(&match->list);
	pci_release_regions(dev);
out_disable:
	pci_disable_device(dev);
out_put:
	pci_dev_put(dev);
out_free:
	kfree(match);
	mutex_unlock(&kvm->lock);
	return r;
}
#endif

static inline int valid_vcpu(int n)
{
	return likely(n >= 0 && n < KVM_MAX_VCPUS);
@@ -578,12 +818,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
	}

	kvm_free_physmem_slot(&old, &new);

#ifdef CONFIG_DMAR
	/* map the pages in iommu page table */
	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
	if (r)
		goto out;

#endif
	return 0;

out_free:
@@ -1382,6 +1622,30 @@ static long kvm_vm_ioctl(struct file *filp,
		r = 0;
		break;
	}
#endif
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
	case KVM_ASSIGN_PCI_DEVICE: {
		struct kvm_assigned_pci_dev assigned_dev;

		r = -EFAULT;
		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
			goto out;
		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
		if (r)
			goto out;
		break;
	}
	case KVM_ASSIGN_IRQ: {
		struct kvm_assigned_irq assigned_irq;

		r = -EFAULT;
		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
			goto out;
		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
		if (r)
			goto out;
		break;
	}
#endif
	default:
		r = kvm_arch_vm_ioctl(filp, ioctl, arg);