Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit caa057a2 authored by Wanpeng Li's avatar Wanpeng Li Committed by Paolo Bonzini
Browse files

KVM: X86: Provide a capability to disable HLT intercepts



If host CPUs are dedicated to a VM, we can avoid VM exits on HLT.
This patch adds the per-VM capability to disable them.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Jan H. Schönherr <jschoenh@amazon.de>
Signed-off-by: default avatarWanpeng Li <wanpengli@tencent.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 4d5422ce
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -4367,6 +4367,7 @@ Returns: 0 on success, -EINVAL when args[0] contains invalid exits
Valid bits in args[0] are

#define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
#define KVM_X86_DISABLE_EXITS_HLT              (1 << 1)

Enabling this capability on a VM provides userspace with a way to no
longer intercept some instructions for improved latency in some
@@ -4375,6 +4376,7 @@ physical CPUs. More bits can be added in the future; userspace can
just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable
all such vmexits.

Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.

8. Other capabilities.
----------------------
+1 −0
Original line number Diff line number Diff line
@@ -812,6 +812,7 @@ struct kvm_arch {
	gpa_t wall_clock;

	bool mwait_in_guest;
	bool hlt_in_guest;

	bool ept_identity_pagetable_done;
	gpa_t ept_identity_map_addr;
+5 −0
Original line number Diff line number Diff line
@@ -135,6 +135,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
			return -EINVAL;
	}

	best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);

	/* Update physical-address width */
	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
	kvm_mmu_reset_context(vcpu);
+3 −1
Original line number Diff line number Diff line
@@ -1380,7 +1380,6 @@ static void init_vmcb(struct vcpu_svm *svm)
	set_intercept(svm, INTERCEPT_RDPMC);
	set_intercept(svm, INTERCEPT_CPUID);
	set_intercept(svm, INTERCEPT_INVD);
	set_intercept(svm, INTERCEPT_HLT);
	set_intercept(svm, INTERCEPT_INVLPG);
	set_intercept(svm, INTERCEPT_INVLPGA);
	set_intercept(svm, INTERCEPT_IOIO_PROT);
@@ -1403,6 +1402,9 @@ static void init_vmcb(struct vcpu_svm *svm)
		set_intercept(svm, INTERCEPT_MWAIT);
	}

	if (!kvm_hlt_in_guest(svm->vcpu.kvm))
		set_intercept(svm, INTERCEPT_HLT);

	control->iopm_base_pa = __sme_set(iopm_base);
	control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
	control->int_ctl = V_INTR_MASKING_MASK;
+24 −0
Original line number Diff line number Diff line
@@ -2556,6 +2556,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
	return 0;
}

static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
{
	/*
	 * Ensure that we clear the HLT state in the VMCS.  We don't need to
	 * explicitly skip the instruction because if the HLT state is set,
	 * then the instruction is already executing and RIP has already been
	 * advanced.
	 */
	if (kvm_hlt_in_guest(vcpu->kvm) &&
			vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
		vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
}

static void vmx_queue_exception(struct kvm_vcpu *vcpu)
{
	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2586,6 +2599,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
		intr_info |= INTR_TYPE_HARD_EXCEPTION;

	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);

	vmx_clear_hlt(vcpu);
}

static bool vmx_rdtscp_supported(void)
@@ -5545,6 +5560,8 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
	if (kvm_mwait_in_guest(vmx->vcpu.kvm))
		exec_control &= ~(CPU_BASED_MWAIT_EXITING |
				CPU_BASED_MONITOR_EXITING);
	if (kvm_hlt_in_guest(vmx->vcpu.kvm))
		exec_control &= ~CPU_BASED_HLT_EXITING;
	return exec_control;
}

@@ -5906,6 +5923,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
	update_exception_bitmap(vcpu);

	vpid_sync_context(vmx->vpid);
	if (init_event)
		vmx_clear_hlt(vcpu);
}

/*
@@ -5976,6 +5995,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
	} else
		intr |= INTR_TYPE_EXT_INTR;
	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);

	vmx_clear_hlt(vcpu);
}

static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -6006,6 +6027,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)

	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);

	vmx_clear_hlt(vcpu);
}

static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -12347,6 +12370,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)

	vmx->nested.smm.vmxon = vmx->nested.vmxon;
	vmx->nested.vmxon = false;
	vmx_clear_hlt(vcpu);
	return 0;
}

Loading