Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bf9f6ac8 authored by Feng Wu's avatar Feng Wu Committed by Paolo Bonzini
Browse files

KVM: Update Posted-Interrupts Descriptor when vCPU is blocked



This patch updates the Posted-Interrupts Descriptor when vCPU
is blocked.

pre-block:
- Add the vCPU to the blocked per-CPU list
- Set 'NV' to POSTED_INTR_WAKEUP_VECTOR

post-block:
- Remove the vCPU from the per-CPU list

Signed-off-by: default avatarFeng Wu <feng.wu@intel.com>
[Concentrate invocation of pre/post-block hooks to vcpu_block. - Paolo]
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 28b835d6
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -166,3 +166,15 @@ Comment: The srcu read lock must be held while accessing memslots (e.g.
		MMIO/PIO address->device structure mapping (kvm->buses).
		The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
		if it is needed by multiple functions.

Name:		blocked_vcpu_on_cpu_lock
Type:		spinlock_t
Arch:		x86
Protects:	blocked_vcpu_on_cpu
Comment:	This is a per-CPU lock and it is used for VT-d posted-interrupts.
		When VT-d posted-interrupts is supported and the VM has assigned
		devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
		protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
		wakeup notification event since external interrupts from the
		assigned devices happens, we will find the vCPU on the list to
		wakeup.
+11 −0
Original line number Diff line number Diff line
@@ -899,6 +899,17 @@ struct kvm_x86_ops {
	/* pmu operations of sub-arch */
	const struct kvm_pmu_ops *pmu_ops;

	/*
	 * Architecture specific hooks for vCPU blocking due to
	 * HLT instruction.
	 * Returns for .pre_block():
	 *    - 0 means continue to block the vCPU.
	 *    - 1 means we cannot block the vCPU since some event
	 *        happens during this period, such as, 'ON' bit in
	 *        posted-interrupts descriptor is set.
	 */
	int (*pre_block)(struct kvm_vcpu *vcpu);
	void (*post_block)(struct kvm_vcpu *vcpu);
	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
			      uint32_t guest_irq, bool set);
};
+153 −0
Original line number Diff line number Diff line
@@ -878,6 +878,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
static DEFINE_PER_CPU(struct desc_ptr, host_gdt);

/*
 * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
 * can find which vCPU should be waken up.
 */
static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);

static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
static unsigned long *vmx_msr_bitmap_legacy;
@@ -2986,6 +2993,8 @@ static int hardware_enable(void)
		return -EBUSY;

	INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
	INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
	spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));

	/*
	 * Now we can enable the vmclear operation in kdump
@@ -6045,6 +6054,25 @@ static void update_ple_window_actual_max(void)
			                    ple_window_grow, INT_MIN);
}

/*
 * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
 */
static void wakeup_handler(void)
{
	struct kvm_vcpu *vcpu;
	int cpu = smp_processor_id();

	spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
	list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
			blocked_vcpu_list) {
		struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);

		if (pi_test_on(pi_desc) == 1)
			kvm_vcpu_kick(vcpu);
	}
	spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
}

static __init int hardware_setup(void)
{
	int r = -ENOMEM, i, msr;
@@ -6231,6 +6259,8 @@ static __init int hardware_setup(void)
		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
	}

	kvm_set_posted_intr_wakeup_handler(wakeup_handler);

	return alloc_kvm_area();

out8:
@@ -10431,6 +10461,126 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
}

/*
 * This routine does the following things for vCPU which is going
 * to be blocked if VT-d PI is enabled.
 * - Store the vCPU to the wakeup list, so when interrupts happen
 *   we can find the right vCPU to wake up.
 * - Change the Posted-interrupt descriptor as below:
 *      'NDST' <-- vcpu->pre_pcpu
 *      'NV' <-- POSTED_INTR_WAKEUP_VECTOR
 * - If 'ON' is set during this process, which means at least one
 *   interrupt is posted for this vCPU, we cannot block it, in
 *   this case, return 1, otherwise, return 0.
 *
 */
static int vmx_pre_block(struct kvm_vcpu *vcpu)
{
	unsigned long flags;
	unsigned int dest;
	struct pi_desc old, new;
	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);

	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
		!irq_remapping_cap(IRQ_POSTING_CAP))
		return 0;

	vcpu->pre_pcpu = vcpu->cpu;
	spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
			  vcpu->pre_pcpu), flags);
	list_add_tail(&vcpu->blocked_vcpu_list,
		      &per_cpu(blocked_vcpu_on_cpu,
		      vcpu->pre_pcpu));
	spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
			       vcpu->pre_pcpu), flags);

	do {
		old.control = new.control = pi_desc->control;

		/*
		 * We should not block the vCPU if
		 * an interrupt is posted for it.
		 */
		if (pi_test_on(pi_desc) == 1) {
			spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
					  vcpu->pre_pcpu), flags);
			list_del(&vcpu->blocked_vcpu_list);
			spin_unlock_irqrestore(
					&per_cpu(blocked_vcpu_on_cpu_lock,
					vcpu->pre_pcpu), flags);
			vcpu->pre_pcpu = -1;

			return 1;
		}

		WARN((pi_desc->sn == 1),
		     "Warning: SN field of posted-interrupts "
		     "is set before blocking\n");

		/*
		 * Since vCPU can be preempted during this process,
		 * vcpu->cpu could be different with pre_pcpu, we
		 * need to set pre_pcpu as the destination of wakeup
		 * notification event, then we can find the right vCPU
		 * to wakeup in wakeup handler if interrupts happen
		 * when the vCPU is in blocked state.
		 */
		dest = cpu_physical_id(vcpu->pre_pcpu);

		if (x2apic_enabled())
			new.ndst = dest;
		else
			new.ndst = (dest << 8) & 0xFF00;

		/* set 'NV' to 'wakeup vector' */
		new.nv = POSTED_INTR_WAKEUP_VECTOR;
	} while (cmpxchg(&pi_desc->control, old.control,
			new.control) != old.control);

	return 0;
}

static void vmx_post_block(struct kvm_vcpu *vcpu)
{
	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
	struct pi_desc old, new;
	unsigned int dest;
	unsigned long flags;

	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
		!irq_remapping_cap(IRQ_POSTING_CAP))
		return;

	do {
		old.control = new.control = pi_desc->control;

		dest = cpu_physical_id(vcpu->cpu);

		if (x2apic_enabled())
			new.ndst = dest;
		else
			new.ndst = (dest << 8) & 0xFF00;

		/* Allow posting non-urgent interrupts */
		new.sn = 0;

		/* set 'NV' to 'notification vector' */
		new.nv = POSTED_INTR_VECTOR;
	} while (cmpxchg(&pi_desc->control, old.control,
			new.control) != old.control);

	if(vcpu->pre_pcpu != -1) {
		spin_lock_irqsave(
			&per_cpu(blocked_vcpu_on_cpu_lock,
			vcpu->pre_pcpu), flags);
		list_del(&vcpu->blocked_vcpu_list);
		spin_unlock_irqrestore(
			&per_cpu(blocked_vcpu_on_cpu_lock,
			vcpu->pre_pcpu), flags);
		vcpu->pre_pcpu = -1;
	}
}

/*
 * vmx_update_pi_irte - set IRTE for Posted-Interrupts
 *
@@ -10622,6 +10772,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
	.flush_log_dirty = vmx_flush_log_dirty,
	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,

	.pre_block = vmx_pre_block,
	.post_block = vmx_post_block,

	.pmu_ops = &intel_pmu_ops,

	.update_pi_irte = vmx_update_pi_irte,
+24 −10
Original line number Diff line number Diff line
@@ -6335,6 +6335,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
		}
	}

	/*
	 * KVM_REQ_EVENT is not set when posted interrupts are set by
	 * VT-d hardware, so we have to update RVI unconditionally.
	 */
	if (kvm_lapic_enabled(vcpu)) {
		/*
		 * Update architecture specific hints for APIC
		 * virtual interrupt delivery.
		 */
		if (kvm_x86_ops->hwapic_irr_update)
			kvm_x86_ops->hwapic_irr_update(vcpu,
				kvm_lapic_find_highest_irr(vcpu));
	}

	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
		kvm_apic_accept_events(vcpu);
		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
@@ -6351,13 +6365,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
			kvm_x86_ops->enable_irq_window(vcpu);

		if (kvm_lapic_enabled(vcpu)) {
			/*
			 * Update architecture specific hints for APIC
			 * virtual interrupt delivery.
			 */
			if (kvm_x86_ops->hwapic_irr_update)
				kvm_x86_ops->hwapic_irr_update(vcpu,
					kvm_lapic_find_highest_irr(vcpu));
			update_cr8_intercept(vcpu);
			kvm_lapic_sync_to_vapic(vcpu);
		}
@@ -6493,10 +6500,15 @@ out:

static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
	if (!kvm_arch_vcpu_runnable(vcpu)) {
	if (!kvm_arch_vcpu_runnable(vcpu) &&
	    (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
		kvm_vcpu_block(vcpu);
		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);

		if (kvm_x86_ops->post_block)
			kvm_x86_ops->post_block(vcpu);

		if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
			return 1;
	}
@@ -6528,10 +6540,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)

	for (;;) {
		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
		    !vcpu->arch.apf.halted)
		    !vcpu->arch.apf.halted) {
			r = vcpu_enter_guest(vcpu);
		else
		} else {
			r = vcpu_block(kvm, vcpu);
		}

		if (r <= 0)
			break;

+3 −0
Original line number Diff line number Diff line
@@ -234,6 +234,9 @@ struct kvm_vcpu {
	unsigned long requests;
	unsigned long guest_debug;

	int pre_pcpu;
	struct list_head blocked_vcpu_list;

	struct mutex mutex;
	struct kvm_run *run;

Loading