Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5975a2e0 authored by Paul Mackerras's avatar Paul Mackerras Committed by Alexander Graf
Browse files

KVM: PPC: Book3S: Add API for in-kernel XICS emulation



This adds the API for userspace to instantiate an XICS device in a VM
and connect VCPUs to it.  The API consists of a new device type for
the KVM_CREATE_DEVICE ioctl, a new capability KVM_CAP_IRQ_XICS, which
functions similarly to KVM_CAP_IRQ_MPIC, and the KVM_IRQ_LINE ioctl,
which is used to assert and deassert interrupt inputs of the XICS.

The XICS device has one attribute group, KVM_DEV_XICS_GRP_SOURCES.
Each attribute within this group corresponds to the state of one
interrupt source.  The attribute number is the same as the interrupt
source number.

This does not support irq routing or irqfd yet.

Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Acked-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Signed-off-by: default avatarAlexander Graf <agraf@suse.de>
parent d133b40f
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -2772,3 +2772,11 @@ Parameters: args[0] is the MPIC device fd
            args[1] is the MPIC CPU number for this vcpu

This capability connects the vcpu to an in-kernel MPIC device.

6.7 KVM_CAP_IRQ_XICS

Architectures: ppc
Parameters: args[0] is the XICS device fd
            args[1] is the XICS CPU number (server ID) for this vcpu

This capability connects the vcpu to an in-kernel XICS device.
+66 −0
Original line number Diff line number Diff line
XICS interrupt controller

Device type supported: KVM_DEV_TYPE_XICS

Groups:
  KVM_DEV_XICS_SOURCES
  Attributes: One per interrupt source, indexed by the source number.

This device emulates the XICS (eXternal Interrupt Controller
Specification) defined in PAPR.  The XICS has a set of interrupt
sources, each identified by a 20-bit source number, and a set of
Interrupt Control Presentation (ICP) entities, also called "servers",
each associated with a virtual CPU.

The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
the interrupt server number (i.e. the vcpu number from the XICS's
point of view) in args[1] of the kvm_enable_cap struct.  Each ICP has
64 bits of state which can be read and written using the
KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu.  The 64 bit
state word has the following bitfields, starting at the
least-significant end of the word:

* Unused, 16 bits

* Pending interrupt priority, 8 bits
  Zero is the highest priority, 255 means no interrupt is pending.

* Pending IPI (inter-processor interrupt) priority, 8 bits
  Zero is the highest priority, 255 means no IPI is pending.

* Pending interrupt source number, 24 bits
  Zero means no interrupt pending, 2 means an IPI is pending

* Current processor priority, 8 bits
  Zero is the highest priority, meaning no interrupts can be
  delivered, and 255 is the lowest priority.

Each source has 64 bits of state that can be read and written using
the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
the interrupt source number.  The 64 bit state word has the following
bitfields, starting from the least-significant end of the word:

* Destination (server number), 32 bits
  This specifies where the interrupt should be sent, and is the
  interrupt server number specified for the destination vcpu.

* Priority, 8 bits
  This is the priority specified for this interrupt source, where 0 is
  the highest priority and 255 is the lowest.  An interrupt with a
  priority of 255 will never be delivered.

* Level sensitive flag, 1 bit
  This bit is 1 for a level-sensitive interrupt source, or 0 for
  edge-sensitive (or MSI).

* Masked flag, 1 bit
  This bit is set to 1 if the interrupt is masked (cannot be delivered
  regardless of its priority), for example by the ibm,int-off RTAS
  call, or 0 if it is not masked.

* Pending flag, 1 bit
  This bit is 1 if the source has a pending interrupt, otherwise 0.

Only one XICS instance may be created per VM.
+2 −0
Original line number Diff line number Diff line
@@ -315,6 +315,8 @@ extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
			struct kvm_vcpu *vcpu, u32 cpu);
#else
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
	{ return 0; }
+12 −0
Original line number Diff line number Diff line
@@ -499,4 +499,16 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_TLB3PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
#define KVM_REG_PPC_EPTCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)

/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */

/* Layout of 64-bit source attribute values */
#define  KVM_XICS_DESTINATION_SHIFT	0
#define  KVM_XICS_DESTINATION_MASK	0xffffffffULL
#define  KVM_XICS_PRIORITY_SHIFT	32
#define  KVM_XICS_PRIORITY_MASK		0xff
#define  KVM_XICS_LEVEL_SENSITIVE	(1ULL << 40)
#define  KVM_XICS_MASKED		(1ULL << 41)
#define  KVM_XICS_PENDING		(1ULL << 42)

#endif /* __LINUX_KVM_POWERPC_H */
+165 −25
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/anon_inodes.h>

#include <asm/uaccess.h>
#include <asm/kvm_book3s.h>
@@ -55,8 +56,6 @@
 *
 * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
 *   locks array to improve scalability
 *
 * - ioctl's to save/restore the entire state for snapshot & migration
 */

/* -- ICS routines -- */
@@ -64,7 +63,8 @@
static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
			    u32 new_irq);

static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
			   bool report_status)
{
	struct ics_irq_state *state;
	struct kvmppc_ics *ics;
@@ -81,6 +81,9 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
	if (!state->exists)
		return -EINVAL;

	if (report_status)
		return state->asserted;

	/*
	 * We set state->asserted locklessly. This should be fine as
	 * we are the only setter, thus concurrent access is undefined
@@ -96,7 +99,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
	/* Attempt delivery */
	icp_deliver_irq(xics, NULL, irq);

	return 0;
	return state->asserted;
}

static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
@@ -891,7 +894,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
	kfree(name);
}

struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
					struct kvmppc_xics *xics, int irq)
{
	struct kvmppc_ics *ics;
@@ -1044,34 +1047,138 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
	return 0;
}

/* -- ioctls -- */
static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
{
	int ret;
	struct kvmppc_ics *ics;
	struct ics_irq_state *irqp;
	u64 __user *ubufp = (u64 __user *) addr;
	u16 idx;
	u64 val, prio;

	ics = kvmppc_xics_find_ics(xics, irq, &idx);
	if (!ics)
		return -ENOENT;

	irqp = &ics->irq_state[idx];
	mutex_lock(&ics->lock);
	ret = -ENOENT;
	if (irqp->exists) {
		val = irqp->server;
		prio = irqp->priority;
		if (prio == MASKED) {
			val |= KVM_XICS_MASKED;
			prio = irqp->saved_priority;
		}
		val |= prio << KVM_XICS_PRIORITY_SHIFT;
		if (irqp->asserted)
			val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
		else if (irqp->masked_pending || irqp->resend)
			val |= KVM_XICS_PENDING;
		ret = 0;
	}
	mutex_unlock(&ics->lock);

	if (!ret && put_user(val, ubufp))
		ret = -EFAULT;

int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args)
	return ret;
}

static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
{
	struct kvmppc_xics *xics;
	int r;
	struct kvmppc_ics *ics;
	struct ics_irq_state *irqp;
	u64 __user *ubufp = (u64 __user *) addr;
	u16 idx;
	u64 val;
	u8 prio;
	u32 server;

	if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
		return -ENOENT;

	/* locking against multiple callers? */
	ics = kvmppc_xics_find_ics(xics, irq, &idx);
	if (!ics) {
		ics = kvmppc_xics_create_ics(xics->kvm, xics, irq);
		if (!ics)
			return -ENOMEM;
	}
	irqp = &ics->irq_state[idx];
	if (get_user(val, ubufp))
		return -EFAULT;

	server = val & KVM_XICS_DESTINATION_MASK;
	prio = val >> KVM_XICS_PRIORITY_SHIFT;
	if (prio != MASKED &&
	    kvmppc_xics_find_server(xics->kvm, server) == NULL)
		return -EINVAL;

	xics = kvm->arch.xics;
	if (!xics)
		return -ENODEV;
	mutex_lock(&ics->lock);
	irqp->server = server;
	irqp->saved_priority = prio;
	if (val & KVM_XICS_MASKED)
		prio = MASKED;
	irqp->priority = prio;
	irqp->resend = 0;
	irqp->masked_pending = 0;
	irqp->asserted = 0;
	if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
		irqp->asserted = 1;
	irqp->exists = 1;
	mutex_unlock(&ics->lock);

	switch (args->level) {
	case KVM_INTERRUPT_SET:
	case KVM_INTERRUPT_SET_LEVEL:
	case KVM_INTERRUPT_UNSET:
		r = ics_deliver_irq(xics, args->irq, args->level);
		break;
	default:
		r = -EINVAL;
	if (val & KVM_XICS_PENDING)
		icp_deliver_irq(xics, NULL, irqp->number);

	return 0;
}

	return r;
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
		bool line_status)
{
	struct kvmppc_xics *xics = kvm->arch.xics;

	return ics_deliver_irq(xics, irq, level, line_status);
}

static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
	struct kvmppc_xics *xics = dev->private;

	switch (attr->group) {
	case KVM_DEV_XICS_GRP_SOURCES:
		return xics_set_source(xics, attr->attr, attr->addr);
	}
	return -ENXIO;
}

static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
	struct kvmppc_xics *xics = dev->private;

	switch (attr->group) {
	case KVM_DEV_XICS_GRP_SOURCES:
		return xics_get_source(xics, attr->attr, attr->addr);
	}
	return -ENXIO;
}

void kvmppc_xics_free(struct kvmppc_xics *xics)
static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
	switch (attr->group) {
	case KVM_DEV_XICS_GRP_SOURCES:
		if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
		    attr->attr < KVMPPC_XICS_NR_IRQS)
			return 0;
		break;
	}
	return -ENXIO;
}

static void kvmppc_xics_free(struct kvm_device *dev)
{
	struct kvmppc_xics *xics = dev->private;
	int i;
	struct kvm *kvm = xics->kvm;

@@ -1083,17 +1190,21 @@ void kvmppc_xics_free(struct kvmppc_xics *xics)
	for (i = 0; i <= xics->max_icsid; i++)
		kfree(xics->ics[i]);
	kfree(xics);
	kfree(dev);
}

int kvm_xics_create(struct kvm *kvm, u32 type)
static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
{
	struct kvmppc_xics *xics;
	struct kvm *kvm = dev->kvm;
	int ret = 0;

	xics = kzalloc(sizeof(*xics), GFP_KERNEL);
	if (!xics)
		return -ENOMEM;

	dev->private = xics;
	xics->dev = dev;
	xics->kvm = kvm;

	/* Already there ? */
@@ -1120,6 +1231,35 @@ int kvm_xics_create(struct kvm *kvm, u32 type)
	return 0;
}

struct kvm_device_ops kvm_xics_ops = {
	.name = "kvm-xics",
	.create = kvmppc_xics_create,
	.destroy = kvmppc_xics_free,
	.set_attr = xics_set_attr,
	.get_attr = xics_get_attr,
	.has_attr = xics_has_attr,
};

int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
			     u32 xcpu)
{
	struct kvmppc_xics *xics = dev->private;
	int r = -EBUSY;

	if (dev->ops != &kvm_xics_ops)
		return -EPERM;
	if (xics->kvm != vcpu->kvm)
		return -EPERM;
	if (vcpu->arch.irq_type)
		return -EBUSY;

	r = kvmppc_xics_create_icp(vcpu, xcpu);
	if (!r)
		vcpu->arch.irq_type = KVMPPC_IRQ_XICS;

	return r;
}

void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
{
	if (!vcpu->arch.icp)
Loading