svm: Implements update_pi_irte hook to setup posted interrupt (411b44ba) · Commits · e / devices / android_kernel_fairphone_FP3

arch/x86/kvm/svm.c

+266 −19

Original line number	Diff line number	Diff line
		@@ -43,6 +43,7 @@
		#include <asm/desc.h>
		#include <asm/debugreg.h>
		#include <asm/kvm_para.h>
		#include <asm/irq_remapping.h>

		#include <asm/virtext.h>
		#include "trace.h"
		@@ -200,6 +201,23 @@ struct vcpu_svm {
		struct page *avic_backing_page;
		u64 *avic_physical_id_cache;
		bool avic_is_running;

		/*
		* Per-vcpu list of struct amd_svm_iommu_ir:
		* This is used mainly to store interrupt remapping information used
		* when update the vcpu affinity. This avoids the need to scan for
		* IRTE and try to match ga_tag in the IOMMU driver.
		*/
		struct list_head ir_list;
		spinlock_t ir_list_lock;
		};

		/*
		* This is a wrapper of struct amd_iommu_ir_data.
		*/
		struct amd_svm_iommu_ir {
		struct list_head node; /* Used by SVM for per-vcpu ir_list */
		void data; / Storing pointer to struct amd_ir_data */
		};

		#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
		@@ -1440,31 +1458,34 @@ static int avic_vm_init(struct kvm *kvm)
		return err;
		}

		/**
		* This function is called during VCPU halt/unhalt.
		*/
		static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
		static inline int
		avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
		{
		u64 entry;
		int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu);
		int ret = 0;
		unsigned long flags;
		struct amd_svm_iommu_ir *ir;
		struct vcpu_svm *svm = to_svm(vcpu);

		if (!kvm_vcpu_apicv_active(vcpu))
		return;

		svm->avic_is_running = is_run;
		if (!kvm_arch_has_assigned_device(vcpu->kvm))
		return 0;

		/* ID = 0xff (broadcast), ID > 0xff (reserved) */
		if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
		return;
		/*
		* Here, we go through the per-vcpu ir_list to update all existing
		* interrupt remapping table entry targeting this vcpu.
		*/
		spin_lock_irqsave(&svm->ir_list_lock, flags);

		entry = READ_ONCE(*(svm->avic_physical_id_cache));
		WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
		if (list_empty(&svm->ir_list))
		goto out;

		entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
		if (is_run)
		entry \|= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
		WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
		list_for_each_entry(ir, &svm->ir_list, node) {
		ret = amd_iommu_update_ga(cpu, r, ir->data);
		if (ret)
		break;
		}
		out:
		spin_unlock_irqrestore(&svm->ir_list_lock, flags);
		return ret;
		}

		static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
		@@ -1491,6 +1512,8 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
		entry \|= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;

		WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
		avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
		svm->avic_is_running);
		}

		static void avic_vcpu_put(struct kvm_vcpu *vcpu)
		@@ -1502,10 +1525,27 @@ static void avic_vcpu_put(struct kvm_vcpu *vcpu)
		return;

		entry = READ_ONCE(*(svm->avic_physical_id_cache));
		if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
		avic_update_iommu_vcpu_affinity(vcpu, -1, 0);

		entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
		WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
		}

		/**
		* This function is called during VCPU halt/unhalt.
		*/
		static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
		{
		struct vcpu_svm *svm = to_svm(vcpu);

		svm->avic_is_running = is_run;
		if (is_run)
		avic_vcpu_load(vcpu, vcpu->cpu);
		else
		avic_vcpu_put(vcpu);
		}

		static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
		{
		struct vcpu_svm *svm = to_svm(vcpu);
		@@ -1567,6 +1607,9 @@ static struct kvm_vcpu svm_create_vcpu(struct kvm kvm, unsigned int id)
		err = avic_init_backing_page(&svm->vcpu);
		if (err)
		goto free_page4;

		INIT_LIST_HEAD(&svm->ir_list);
		spin_lock_init(&svm->ir_list_lock);
		}

		/* We initialize this flag to true to make sure that the is_running
		@@ -4363,6 +4406,209 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
		kvm_vcpu_wake_up(vcpu);
		}

		static void svm_ir_list_del(struct vcpu_svm svm, struct amd_iommu_pi_data pi)
		{
		unsigned long flags;
		struct amd_svm_iommu_ir *cur;

		spin_lock_irqsave(&svm->ir_list_lock, flags);
		list_for_each_entry(cur, &svm->ir_list, node) {
		if (cur->data != pi->ir_data)
		continue;
		list_del(&cur->node);
		kfree(cur);
		break;
		}
		spin_unlock_irqrestore(&svm->ir_list_lock, flags);
		}

		static int svm_ir_list_add(struct vcpu_svm svm, struct amd_iommu_pi_data pi)
		{
		int ret = 0;
		unsigned long flags;
		struct amd_svm_iommu_ir *ir;

		/**
		* In some cases, the existing irte is updaed and re-set,
		* so we need to check here if it's already been * added
		* to the ir_list.
		*/
		if (pi->ir_data && (pi->prev_ga_tag != 0)) {
		struct kvm *kvm = svm->vcpu.kvm;
		u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
		struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
		struct vcpu_svm *prev_svm;

		if (!prev_vcpu) {
		ret = -EINVAL;
		goto out;
		}

		prev_svm = to_svm(prev_vcpu);
		svm_ir_list_del(prev_svm, pi);
		}

		/**
		* Allocating new amd_iommu_pi_data, which will get
		* add to the per-vcpu ir_list.
		*/
		ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
		if (!ir) {
		ret = -ENOMEM;
		goto out;
		}
		ir->data = pi->ir_data;

		spin_lock_irqsave(&svm->ir_list_lock, flags);
		list_add(&ir->node, &svm->ir_list);
		spin_unlock_irqrestore(&svm->ir_list_lock, flags);
		out:
		return ret;
		}

		/**
		* Note:
		* The HW cannot support posting multicast/broadcast
		* interrupts to a vCPU. So, we still use legacy interrupt
		* remapping for these kind of interrupts.
		*
		* For lowest-priority interrupts, we only support
		* those with single CPU as the destination, e.g. user
		* configures the interrupts via /proc/irq or uses
		* irqbalance to make the interrupts single-CPU.
		*/
		static int
		get_pi_vcpu_info(struct kvm kvm, struct kvm_kernel_irq_routing_entry e,
		struct vcpu_data vcpu_info, struct vcpu_svm *svm)
		{
		struct kvm_lapic_irq irq;
		struct kvm_vcpu *vcpu = NULL;

		kvm_set_msi_irq(kvm, e, &irq);

		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
		pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
		__func__, irq.vector);
		return -1;
		}

		pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
		irq.vector);
		*svm = to_svm(vcpu);
		vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
		vcpu_info->vector = irq.vector;

		return 0;
		}

		/*
		* svm_update_pi_irte - set IRTE for Posted-Interrupts
		*
		* @kvm: kvm
		* @host_irq: host irq of the interrupt
		* @guest_irq: gsi of the interrupt
		* @set: set or unset PI
		* returns 0 on success, < 0 on failure
		*/
		static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
		uint32_t guest_irq, bool set)
		{
		struct kvm_kernel_irq_routing_entry *e;
		struct kvm_irq_routing_table *irq_rt;
		int idx, ret = -EINVAL;

		if (!kvm_arch_has_assigned_device(kvm) \|\|
		!irq_remapping_cap(IRQ_POSTING_CAP))
		return 0;

		pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
		__func__, host_irq, guest_irq, set);

		idx = srcu_read_lock(&kvm->irq_srcu);
		irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
		WARN_ON(guest_irq >= irq_rt->nr_rt_entries);

		hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
		struct vcpu_data vcpu_info;
		struct vcpu_svm *svm = NULL;

		if (e->type != KVM_IRQ_ROUTING_MSI)
		continue;

		/**
		* Here, we setup with legacy mode in the following cases:
		* 1. When cannot target interrupt to a specific vcpu.
		* 2. Unsetting posted interrupt.
		* 3. APIC virtialization is disabled for the vcpu.
		*/
		if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
		kvm_vcpu_apicv_active(&svm->vcpu)) {
		struct amd_iommu_pi_data pi;

		/* Try to enable guest_mode in IRTE */
		pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
		pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
		svm->vcpu.vcpu_id);
		pi.is_guest_mode = true;
		pi.vcpu_data = &vcpu_info;
		ret = irq_set_vcpu_affinity(host_irq, &pi);

		/**
		* Here, we successfully setting up vcpu affinity in
		* IOMMU guest mode. Now, we need to store the posted
		* interrupt information in a per-vcpu ir_list so that
		* we can reference to them directly when we update vcpu
		* scheduling information in IOMMU irte.
		*/
		if (!ret && pi.is_guest_mode)
		svm_ir_list_add(svm, &pi);
		} else {
		/* Use legacy mode in IRTE */
		struct amd_iommu_pi_data pi;

		/**
		* Here, pi is used to:
		* - Tell IOMMU to use legacy mode for this interrupt.
		* - Retrieve ga_tag of prior interrupt remapping data.
		*/
		pi.is_guest_mode = false;
		ret = irq_set_vcpu_affinity(host_irq, &pi);

		/**
		* Check if the posted interrupt was previously
		* setup with the guest_mode by checking if the ga_tag
		* was cached. If so, we need to clean up the per-vcpu
		* ir_list.
		*/
		if (!ret && pi.prev_ga_tag) {
		int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
		struct kvm_vcpu *vcpu;

		vcpu = kvm_get_vcpu_by_id(kvm, id);
		if (vcpu)
		svm_ir_list_del(to_svm(vcpu), &pi);
		}
		}

		if (!ret && svm) {
		trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
		host_irq, e->gsi,
		vcpu_info.vector,
		vcpu_info.pi_desc_addr, set);
		}

		if (ret < 0) {
		pr_err("%s: failed to update PI IRTE\n", __func__);
		goto out;
		}
		}

		ret = 0;
		out:
		srcu_read_unlock(&kvm->irq_srcu, idx);
		return ret;
		}

		static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
		{
		struct vcpu_svm *svm = to_svm(vcpu);
		@@ -5195,6 +5441,7 @@ static struct kvm_x86_ops svm_x86_ops = {

		.pmu_ops = &amd_pmu_ops,
		.deliver_posted_interrupt = svm_deliver_avic_intr,
		.update_pi_irte = svm_update_pi_irte,
		};

		static int __init svm_init(void)