Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 411b44ba authored by Suravee Suthikulpanit's avatar Suravee Suthikulpanit Committed by Paolo Bonzini
Browse files

svm: Implements update_pi_irte hook to setup posted interrupt



This patch implements update_pi_irte function hook to allow SVM
communicate to IOMMU driver regarding how to set up IRTE for handling
posted interrupt.

In case AVIC is enabled, during vcpu_load/unload, SVM needs to update
IOMMU IRTE with appropriate host physical APIC ID. Also, when
vcpu_blocking/unblocking, SVM needs to update the is-running bit in
the IOMMU IRTE. Both are achieved via calling amd_iommu_update_ga().

However, if GA mode is not enabled for the pass-through device,
IOMMU driver will simply just return when calling amd_iommu_update_ga.

Signed-off-by: default avatarSuravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Reviewed-by: default avatarRadim Krčmář <rkrcmar@redhat.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 5881f737
Loading
Loading
Loading
Loading
+266 −19
Original line number Original line Diff line number Diff line
@@ -43,6 +43,7 @@
#include <asm/desc.h>
#include <asm/desc.h>
#include <asm/debugreg.h>
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>


#include <asm/virtext.h>
#include <asm/virtext.h>
#include "trace.h"
#include "trace.h"
@@ -200,6 +201,23 @@ struct vcpu_svm {
	struct page *avic_backing_page;
	struct page *avic_backing_page;
	u64 *avic_physical_id_cache;
	u64 *avic_physical_id_cache;
	bool avic_is_running;
	bool avic_is_running;

	/*
	 * Per-vcpu list of struct amd_svm_iommu_ir:
	 * This is used mainly to store interrupt remapping information used
	 * when update the vcpu affinity. This avoids the need to scan for
	 * IRTE and try to match ga_tag in the IOMMU driver.
	 */
	struct list_head ir_list;
	spinlock_t ir_list_lock;
};

/*
 * This is a wrapper of struct amd_iommu_ir_data.
 */
struct amd_svm_iommu_ir {
	struct list_head node;	/* Used by SVM for per-vcpu ir_list */
	void *data;		/* Storing pointer to struct amd_ir_data */
};
};


#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK	(0xFF)
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK	(0xFF)
@@ -1440,31 +1458,34 @@ static int avic_vm_init(struct kvm *kvm)
	return err;
	return err;
}
}


/**
static inline int
 * This function is called during VCPU halt/unhalt.
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 */
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
{
{
	u64 entry;
	int ret = 0;
	int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu);
	unsigned long flags;
	struct amd_svm_iommu_ir *ir;
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vcpu_svm *svm = to_svm(vcpu);


	if (!kvm_vcpu_apicv_active(vcpu))
	if (!kvm_arch_has_assigned_device(vcpu->kvm))
		return;
		return 0;

	svm->avic_is_running = is_run;


	/* ID = 0xff (broadcast), ID > 0xff (reserved) */
	/*
	if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
	 * Here, we go through the per-vcpu ir_list to update all existing
		return;
	 * interrupt remapping table entry targeting this vcpu.
	 */
	spin_lock_irqsave(&svm->ir_list_lock, flags);


	entry = READ_ONCE(*(svm->avic_physical_id_cache));
	if (list_empty(&svm->ir_list))
	WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
		goto out;


	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
	list_for_each_entry(ir, &svm->ir_list, node) {
	if (is_run)
		ret = amd_iommu_update_ga(cpu, r, ir->data);
		entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
		if (ret)
	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
			break;
	}
out:
	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
	return ret;
}
}


static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1491,6 +1512,8 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
		entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
		entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;


	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
					svm->avic_is_running);
}
}


static void avic_vcpu_put(struct kvm_vcpu *vcpu)
static void avic_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1502,10 +1525,27 @@ static void avic_vcpu_put(struct kvm_vcpu *vcpu)
		return;
		return;


	entry = READ_ONCE(*(svm->avic_physical_id_cache));
	entry = READ_ONCE(*(svm->avic_physical_id_cache));
	if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
		avic_update_iommu_vcpu_affinity(vcpu, -1, 0);

	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
}


/**
 * This function is called during VCPU halt/unhalt.
 */
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	svm->avic_is_running = is_run;
	if (is_run)
		avic_vcpu_load(vcpu, vcpu->cpu);
	else
		avic_vcpu_put(vcpu);
}

static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
{
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vcpu_svm *svm = to_svm(vcpu);
@@ -1567,6 +1607,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
		err = avic_init_backing_page(&svm->vcpu);
		err = avic_init_backing_page(&svm->vcpu);
		if (err)
		if (err)
			goto free_page4;
			goto free_page4;

		INIT_LIST_HEAD(&svm->ir_list);
		spin_lock_init(&svm->ir_list_lock);
	}
	}


	/* We initialize this flag to true to make sure that the is_running
	/* We initialize this flag to true to make sure that the is_running
@@ -4363,6 +4406,209 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
		kvm_vcpu_wake_up(vcpu);
		kvm_vcpu_wake_up(vcpu);
}
}


static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
{
	unsigned long flags;
	struct amd_svm_iommu_ir *cur;

	spin_lock_irqsave(&svm->ir_list_lock, flags);
	list_for_each_entry(cur, &svm->ir_list, node) {
		if (cur->data != pi->ir_data)
			continue;
		list_del(&cur->node);
		kfree(cur);
		break;
	}
	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
}

static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
{
	int ret = 0;
	unsigned long flags;
	struct amd_svm_iommu_ir *ir;

	/**
	 * In some cases, the existing irte is updaed and re-set,
	 * so we need to check here if it's already been * added
	 * to the ir_list.
	 */
	if (pi->ir_data && (pi->prev_ga_tag != 0)) {
		struct kvm *kvm = svm->vcpu.kvm;
		u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
		struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
		struct vcpu_svm *prev_svm;

		if (!prev_vcpu) {
			ret = -EINVAL;
			goto out;
		}

		prev_svm = to_svm(prev_vcpu);
		svm_ir_list_del(prev_svm, pi);
	}

	/**
	 * Allocating new amd_iommu_pi_data, which will get
	 * add to the per-vcpu ir_list.
	 */
	ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
	if (!ir) {
		ret = -ENOMEM;
		goto out;
	}
	ir->data = pi->ir_data;

	spin_lock_irqsave(&svm->ir_list_lock, flags);
	list_add(&ir->node, &svm->ir_list);
	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
out:
	return ret;
}

/**
 * Note:
 * The HW cannot support posting multicast/broadcast
 * interrupts to a vCPU. So, we still use legacy interrupt
 * remapping for these kind of interrupts.
 *
 * For lowest-priority interrupts, we only support
 * those with single CPU as the destination, e.g. user
 * configures the interrupts via /proc/irq or uses
 * irqbalance to make the interrupts single-CPU.
 */
static int
get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
		 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
{
	struct kvm_lapic_irq irq;
	struct kvm_vcpu *vcpu = NULL;

	kvm_set_msi_irq(kvm, e, &irq);

	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
		pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
			 __func__, irq.vector);
		return -1;
	}

	pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
		 irq.vector);
	*svm = to_svm(vcpu);
	vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
	vcpu_info->vector = irq.vector;

	return 0;
}

/*
 * svm_update_pi_irte - set IRTE for Posted-Interrupts
 *
 * @kvm: kvm
 * @host_irq: host irq of the interrupt
 * @guest_irq: gsi of the interrupt
 * @set: set or unset PI
 * returns 0 on success, < 0 on failure
 */
static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
			      uint32_t guest_irq, bool set)
{
	struct kvm_kernel_irq_routing_entry *e;
	struct kvm_irq_routing_table *irq_rt;
	int idx, ret = -EINVAL;

	if (!kvm_arch_has_assigned_device(kvm) ||
	    !irq_remapping_cap(IRQ_POSTING_CAP))
		return 0;

	pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
		 __func__, host_irq, guest_irq, set);

	idx = srcu_read_lock(&kvm->irq_srcu);
	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
	WARN_ON(guest_irq >= irq_rt->nr_rt_entries);

	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
		struct vcpu_data vcpu_info;
		struct vcpu_svm *svm = NULL;

		if (e->type != KVM_IRQ_ROUTING_MSI)
			continue;

		/**
		 * Here, we setup with legacy mode in the following cases:
		 * 1. When cannot target interrupt to a specific vcpu.
		 * 2. Unsetting posted interrupt.
		 * 3. APIC virtialization is disabled for the vcpu.
		 */
		if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
		    kvm_vcpu_apicv_active(&svm->vcpu)) {
			struct amd_iommu_pi_data pi;

			/* Try to enable guest_mode in IRTE */
			pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
			pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
						     svm->vcpu.vcpu_id);
			pi.is_guest_mode = true;
			pi.vcpu_data = &vcpu_info;
			ret = irq_set_vcpu_affinity(host_irq, &pi);

			/**
			 * Here, we successfully setting up vcpu affinity in
			 * IOMMU guest mode. Now, we need to store the posted
			 * interrupt information in a per-vcpu ir_list so that
			 * we can reference to them directly when we update vcpu
			 * scheduling information in IOMMU irte.
			 */
			if (!ret && pi.is_guest_mode)
				svm_ir_list_add(svm, &pi);
		} else {
			/* Use legacy mode in IRTE */
			struct amd_iommu_pi_data pi;

			/**
			 * Here, pi is used to:
			 * - Tell IOMMU to use legacy mode for this interrupt.
			 * - Retrieve ga_tag of prior interrupt remapping data.
			 */
			pi.is_guest_mode = false;
			ret = irq_set_vcpu_affinity(host_irq, &pi);

			/**
			 * Check if the posted interrupt was previously
			 * setup with the guest_mode by checking if the ga_tag
			 * was cached. If so, we need to clean up the per-vcpu
			 * ir_list.
			 */
			if (!ret && pi.prev_ga_tag) {
				int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
				struct kvm_vcpu *vcpu;

				vcpu = kvm_get_vcpu_by_id(kvm, id);
				if (vcpu)
					svm_ir_list_del(to_svm(vcpu), &pi);
			}
		}

		if (!ret && svm) {
			trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
						 host_irq, e->gsi,
						 vcpu_info.vector,
						 vcpu_info.pi_desc_addr, set);
		}

		if (ret < 0) {
			pr_err("%s: failed to update PI IRTE\n", __func__);
			goto out;
		}
	}

	ret = 0;
out:
	srcu_read_unlock(&kvm->irq_srcu, idx);
	return ret;
}

static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{
{
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vcpu_svm *svm = to_svm(vcpu);
@@ -5195,6 +5441,7 @@ static struct kvm_x86_ops svm_x86_ops = {


	.pmu_ops = &amd_pmu_ops,
	.pmu_ops = &amd_pmu_ops,
	.deliver_posted_interrupt = svm_deliver_avic_intr,
	.deliver_posted_interrupt = svm_deliver_avic_intr,
	.update_pi_irte = svm_update_pi_irte,
};
};


static int __init svm_init(void)
static int __init svm_init(void)