KVM: Update Posted-Interrupts Descriptor when vCPU is blocked (bf9f6ac8) · Commits · e / devices / android_kernel_sony_msm8998

Documentation/virtual/kvm/locking.txt

+12 −0

Original line number	Diff line number	Diff line
		@@ -166,3 +166,15 @@ Comment: The srcu read lock must be held while accessing memslots (e.g.
		MMIO/PIO address->device structure mapping (kvm->buses).
		The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
		if it is needed by multiple functions.

		Name: blocked_vcpu_on_cpu_lock
		Type: spinlock_t
		Arch: x86
		Protects: blocked_vcpu_on_cpu
		Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
		When VT-d posted-interrupts is supported and the VM has assigned
		devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
		protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
		wakeup notification event since external interrupts from the
		assigned devices happens, we will find the vCPU on the list to
		wakeup.

arch/x86/include/asm/kvm_host.h

+11 −0

Original line number	Diff line number	Diff line
		@@ -899,6 +899,17 @@ struct kvm_x86_ops {
		/* pmu operations of sub-arch */
		const struct kvm_pmu_ops *pmu_ops;

		/*
		* Architecture specific hooks for vCPU blocking due to
		* HLT instruction.
		* Returns for .pre_block():
		* - 0 means continue to block the vCPU.
		* - 1 means we cannot block the vCPU since some event
		* happens during this period, such as, 'ON' bit in
		* posted-interrupts descriptor is set.
		*/
		int (pre_block)(struct kvm_vcpu vcpu);
		void (post_block)(struct kvm_vcpu vcpu);
		int (update_pi_irte)(struct kvm kvm, unsigned int host_irq,
		uint32_t guest_irq, bool set);
		};

arch/x86/kvm/vmx.c

+153 −0

Original line number	Diff line number	Diff line
		@@ -878,6 +878,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
		static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
		static DEFINE_PER_CPU(struct desc_ptr, host_gdt);

		/*
		* We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
		* can find which vCPU should be waken up.
		*/
		static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
		static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);

		static unsigned long *vmx_io_bitmap_a;
		static unsigned long *vmx_io_bitmap_b;
		static unsigned long *vmx_msr_bitmap_legacy;
		@@ -2986,6 +2993,8 @@ static int hardware_enable(void)
		return -EBUSY;

		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
		INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
		spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));

		/*
		* Now we can enable the vmclear operation in kdump
		@@ -6045,6 +6054,25 @@ static void update_ple_window_actual_max(void)
		ple_window_grow, INT_MIN);
		}

		/*
		* Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
		*/
		static void wakeup_handler(void)
		{
		struct kvm_vcpu *vcpu;
		int cpu = smp_processor_id();

		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
		list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
		blocked_vcpu_list) {
		struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);

		if (pi_test_on(pi_desc) == 1)
		kvm_vcpu_kick(vcpu);
		}
		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
		}

		static __init int hardware_setup(void)
		{
		int r = -ENOMEM, i, msr;
		@@ -6231,6 +6259,8 @@ static __init int hardware_setup(void)
		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
		}

		kvm_set_posted_intr_wakeup_handler(wakeup_handler);

		return alloc_kvm_area();

		out8:
		@@ -10431,6 +10461,126 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
		kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
		}

		/*
		* This routine does the following things for vCPU which is going
		* to be blocked if VT-d PI is enabled.
		* - Store the vCPU to the wakeup list, so when interrupts happen
		* we can find the right vCPU to wake up.
		* - Change the Posted-interrupt descriptor as below:
		* 'NDST' <-- vcpu->pre_pcpu
		* 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
		* - If 'ON' is set during this process, which means at least one
		* interrupt is posted for this vCPU, we cannot block it, in
		* this case, return 1, otherwise, return 0.
		*
		*/
		static int vmx_pre_block(struct kvm_vcpu *vcpu)
		{
		unsigned long flags;
		unsigned int dest;
		struct pi_desc old, new;
		struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);

		if (!kvm_arch_has_assigned_device(vcpu->kvm) \|\|
		!irq_remapping_cap(IRQ_POSTING_CAP))
		return 0;

		vcpu->pre_pcpu = vcpu->cpu;
		spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
		vcpu->pre_pcpu), flags);
		list_add_tail(&vcpu->blocked_vcpu_list,
		&per_cpu(blocked_vcpu_on_cpu,
		vcpu->pre_pcpu));
		spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
		vcpu->pre_pcpu), flags);

		do {
		old.control = new.control = pi_desc->control;

		/*
		* We should not block the vCPU if
		* an interrupt is posted for it.
		*/
		if (pi_test_on(pi_desc) == 1) {
		spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
		vcpu->pre_pcpu), flags);
		list_del(&vcpu->blocked_vcpu_list);
		spin_unlock_irqrestore(
		&per_cpu(blocked_vcpu_on_cpu_lock,
		vcpu->pre_pcpu), flags);
		vcpu->pre_pcpu = -1;

		return 1;
		}

		WARN((pi_desc->sn == 1),
		"Warning: SN field of posted-interrupts "
		"is set before blocking\n");

		/*
		* Since vCPU can be preempted during this process,
		* vcpu->cpu could be different with pre_pcpu, we
		* need to set pre_pcpu as the destination of wakeup
		* notification event, then we can find the right vCPU
		* to wakeup in wakeup handler if interrupts happen
		* when the vCPU is in blocked state.
		*/
		dest = cpu_physical_id(vcpu->pre_pcpu);

		if (x2apic_enabled())
		new.ndst = dest;
		else
		new.ndst = (dest << 8) & 0xFF00;

		/* set 'NV' to 'wakeup vector' */
		new.nv = POSTED_INTR_WAKEUP_VECTOR;
		} while (cmpxchg(&pi_desc->control, old.control,
		new.control) != old.control);

		return 0;
		}

		static void vmx_post_block(struct kvm_vcpu *vcpu)
		{
		struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
		struct pi_desc old, new;
		unsigned int dest;
		unsigned long flags;

		if (!kvm_arch_has_assigned_device(vcpu->kvm) \|\|
		!irq_remapping_cap(IRQ_POSTING_CAP))
		return;

		do {
		old.control = new.control = pi_desc->control;

		dest = cpu_physical_id(vcpu->cpu);

		if (x2apic_enabled())
		new.ndst = dest;
		else
		new.ndst = (dest << 8) & 0xFF00;

		/* Allow posting non-urgent interrupts */
		new.sn = 0;

		/* set 'NV' to 'notification vector' */
		new.nv = POSTED_INTR_VECTOR;
		} while (cmpxchg(&pi_desc->control, old.control,
		new.control) != old.control);

		if(vcpu->pre_pcpu != -1) {
		spin_lock_irqsave(
		&per_cpu(blocked_vcpu_on_cpu_lock,
		vcpu->pre_pcpu), flags);
		list_del(&vcpu->blocked_vcpu_list);
		spin_unlock_irqrestore(
		&per_cpu(blocked_vcpu_on_cpu_lock,
		vcpu->pre_pcpu), flags);
		vcpu->pre_pcpu = -1;
		}
		}

		/*
		* vmx_update_pi_irte - set IRTE for Posted-Interrupts
		*
		@@ -10622,6 +10772,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
		.flush_log_dirty = vmx_flush_log_dirty,
		.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,

		.pre_block = vmx_pre_block,
		.post_block = vmx_post_block,

		.pmu_ops = &intel_pmu_ops,

		.update_pi_irte = vmx_update_pi_irte,

arch/x86/kvm/x86.c

+24 −10

Original line number	Diff line number	Diff line
		@@ -6335,6 +6335,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
		}
		}

		/*
		* KVM_REQ_EVENT is not set when posted interrupts are set by
		* VT-d hardware, so we have to update RVI unconditionally.
		*/
		if (kvm_lapic_enabled(vcpu)) {
		/*
		* Update architecture specific hints for APIC
		* virtual interrupt delivery.
		*/
		if (kvm_x86_ops->hwapic_irr_update)
		kvm_x86_ops->hwapic_irr_update(vcpu,
		kvm_lapic_find_highest_irr(vcpu));
		}

		if (kvm_check_request(KVM_REQ_EVENT, vcpu) \|\| req_int_win) {
		kvm_apic_accept_events(vcpu);
		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
		@@ -6351,13 +6365,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
		kvm_x86_ops->enable_irq_window(vcpu);

		if (kvm_lapic_enabled(vcpu)) {
		/*
		* Update architecture specific hints for APIC
		* virtual interrupt delivery.
		*/
		if (kvm_x86_ops->hwapic_irr_update)
		kvm_x86_ops->hwapic_irr_update(vcpu,
		kvm_lapic_find_highest_irr(vcpu));
		update_cr8_intercept(vcpu);
		kvm_lapic_sync_to_vapic(vcpu);
		}
		@@ -6493,10 +6500,15 @@ out:

		static inline int vcpu_block(struct kvm kvm, struct kvm_vcpu vcpu)
		{
		if (!kvm_arch_vcpu_runnable(vcpu)) {
		if (!kvm_arch_vcpu_runnable(vcpu) &&
		(!kvm_x86_ops->pre_block \|\| kvm_x86_ops->pre_block(vcpu) == 0)) {
		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
		kvm_vcpu_block(vcpu);
		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);

		if (kvm_x86_ops->post_block)
		kvm_x86_ops->post_block(vcpu);

		if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
		return 1;
		}
		@@ -6528,10 +6540,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)

		for (;;) {
		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
		!vcpu->arch.apf.halted)
		!vcpu->arch.apf.halted) {
		r = vcpu_enter_guest(vcpu);
		else
		} else {
		r = vcpu_block(kvm, vcpu);
		}

		if (r <= 0)
		break;

include/linux/kvm_host.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -234,6 +234,9 @@ struct kvm_vcpu {
		unsigned long requests;
		unsigned long guest_debug;

		int pre_pcpu;
		struct list_head blocked_vcpu_list;

		struct mutex mutex;
		struct kvm_run *run;