Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d048c098 authored by Radim Krčmář's avatar Radim Krčmář Committed by Paolo Bonzini
Browse files

KVM: nVMX: fix msr bitmaps to prevent L2 from accessing L0 x2APIC



msr bitmap can be used to avoid a VM exit (interception) on guest MSR
accesses.  In some configurations of VMX controls, the guest can even
directly access host's x2APIC MSRs.  See SDM 29.5 VIRTUALIZING MSR-BASED
APIC ACCESSES.

L2 could read all L0's x2APIC MSRs and write TPR, EOI, and SELF_IPI.
To do so, L1 would first trick KVM to disable all possible interceptions
by enabling APICv features and then would turn those features off;
nested_vmx_merge_msr_bitmap() only disabled interceptions, so VMX would
not intercept previously enabled MSRs even though they were not safe
with the new configuration.

Correctly re-enabling interceptions is not enough as a second bug would
still allow L1+L2 to access host's MSRs: msr bitmap was shared for all
VMCSs, so L1 could trigger a race to get the desired combination of msr
bitmap and VMX controls.

This fix allocates a msr bitmap for every L1 VCPU, allows only safe
x2APIC MSRs from L1's msr bitmap, and disables msr bitmaps if they would
have to intercept everything anyway.

Fixes: 3af18d9c ("KVM: nVMX: Prepare for using hardware MSR bitmap")
Reported-by: default avatarJim Mattson <jmattson@google.com>
Suggested-by: default avatarWincy Van <fanwenyi0529@gmail.com>
Reviewed-by: default avatarWanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: default avatarRadim Krčmář <rkrcmar@redhat.com>
parent 184ca823
Loading
Loading
Loading
Loading
+44 −63
Original line number Diff line number Diff line
@@ -435,6 +435,8 @@ struct nested_vmx {
	bool pi_pending;
	u16 posted_intr_nv;

	unsigned long *msr_bitmap;

	struct hrtimer preemption_timer;
	bool preemption_timer_expired;

@@ -924,7 +926,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
static unsigned long *vmx_msr_bitmap_longmode;
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
static unsigned long *vmx_msr_bitmap_nested;
static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap;

@@ -2508,7 +2509,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
	unsigned long *msr_bitmap;

	if (is_guest_mode(vcpu))
		msr_bitmap = vmx_msr_bitmap_nested;
		msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
	else if (cpu_has_secondary_exec_ctrls() &&
		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
@@ -6363,13 +6364,6 @@ static __init int hardware_setup(void)
	if (!vmx_msr_bitmap_longmode_x2apic)
		goto out4;

	if (nested) {
		vmx_msr_bitmap_nested =
			(unsigned long *)__get_free_page(GFP_KERNEL);
		if (!vmx_msr_bitmap_nested)
			goto out5;
	}

	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
	if (!vmx_vmread_bitmap)
		goto out6;
@@ -6392,8 +6386,6 @@ static __init int hardware_setup(void)

	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
	if (nested)
		memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);

	if (setup_vmcs_config(&vmcs_config) < 0) {
		r = -EIO;
@@ -6529,9 +6521,6 @@ static __init int hardware_setup(void)
out7:
	free_page((unsigned long)vmx_vmread_bitmap);
out6:
	if (nested)
		free_page((unsigned long)vmx_msr_bitmap_nested);
out5:
	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
out4:
	free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6557,8 +6546,6 @@ static __exit void hardware_unsetup(void)
	free_page((unsigned long)vmx_io_bitmap_a);
	free_page((unsigned long)vmx_vmwrite_bitmap);
	free_page((unsigned long)vmx_vmread_bitmap);
	if (nested)
		free_page((unsigned long)vmx_msr_bitmap_nested);

	free_kvm_area();
}
@@ -6995,16 +6982,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
		return 1;
	}

	if (cpu_has_vmx_msr_bitmap()) {
		vmx->nested.msr_bitmap =
				(unsigned long *)__get_free_page(GFP_KERNEL);
		if (!vmx->nested.msr_bitmap)
			goto out_msr_bitmap;
	}

	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
	if (!vmx->nested.cached_vmcs12)
		return -ENOMEM;
		goto out_cached_vmcs12;

	if (enable_shadow_vmcs) {
		shadow_vmcs = alloc_vmcs();
		if (!shadow_vmcs) {
			kfree(vmx->nested.cached_vmcs12);
			return -ENOMEM;
		}
		if (!shadow_vmcs)
			goto out_shadow_vmcs;
		/* mark vmcs as shadow */
		shadow_vmcs->revision_id |= (1u << 31);
		/* init shadow vmcs */
@@ -7024,6 +7016,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
	skip_emulated_instruction(vcpu);
	nested_vmx_succeed(vcpu);
	return 1;

out_shadow_vmcs:
	kfree(vmx->nested.cached_vmcs12);

out_cached_vmcs12:
	free_page((unsigned long)vmx->nested.msr_bitmap);

out_msr_bitmap:
	return -ENOMEM;
}

/*
@@ -7098,6 +7099,10 @@ static void free_nested(struct vcpu_vmx *vmx)
	vmx->nested.vmxon = false;
	free_vpid(vmx->nested.vpid02);
	nested_release_vmcs12(vmx);
	if (vmx->nested.msr_bitmap) {
		free_page((unsigned long)vmx->nested.msr_bitmap);
		vmx->nested.msr_bitmap = NULL;
	}
	if (enable_shadow_vmcs)
		free_vmcs(vmx->nested.current_shadow_vmcs);
	kfree(vmx->nested.cached_vmcs12);
@@ -9472,8 +9477,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
{
	int msr;
	struct page *page;
	unsigned long *msr_bitmap;
	unsigned long *msr_bitmap_l1;
	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;

	/* This shortcut is ok because we support only x2APIC MSRs so far. */
	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
		return false;

@@ -9482,63 +9489,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
		WARN_ON(1);
		return false;
	}
	msr_bitmap = (unsigned long *)kmap(page);
	if (!msr_bitmap) {
	msr_bitmap_l1 = (unsigned long *)kmap(page);
	if (!msr_bitmap_l1) {
		nested_release_page_clean(page);
		WARN_ON(1);
		return false;
	}

	memset(msr_bitmap_l0, 0xff, PAGE_SIZE);

	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
		if (nested_cpu_has_apic_reg_virt(vmcs12))
			for (msr = 0x800; msr <= 0x8ff; msr++)
				nested_vmx_disable_intercept_for_msr(
					msr_bitmap,
					vmx_msr_bitmap_nested,
					msr_bitmap_l1, msr_bitmap_l0,
					msr, MSR_TYPE_R);
		/* TPR is allowed */
		nested_vmx_disable_intercept_for_msr(msr_bitmap,
				vmx_msr_bitmap_nested,

		nested_vmx_disable_intercept_for_msr(
				msr_bitmap_l1, msr_bitmap_l0,
				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
				MSR_TYPE_R | MSR_TYPE_W);

		if (nested_cpu_has_vid(vmcs12)) {
			/* EOI and self-IPI are allowed */
			nested_vmx_disable_intercept_for_msr(
				msr_bitmap,
				vmx_msr_bitmap_nested,
				msr_bitmap_l1, msr_bitmap_l0,
				APIC_BASE_MSR + (APIC_EOI >> 4),
				MSR_TYPE_W);
			nested_vmx_disable_intercept_for_msr(
				msr_bitmap,
				vmx_msr_bitmap_nested,
				msr_bitmap_l1, msr_bitmap_l0,
				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
				MSR_TYPE_W);
		}
	} else {
		/*
		 * Enable reading intercept of all the x2apic
		 * MSRs. We should not rely on vmcs12 to do any
		 * optimizations here, it may have been modified
		 * by L1.
		 */
		for (msr = 0x800; msr <= 0x8ff; msr++)
			__vmx_enable_intercept_for_msr(
				vmx_msr_bitmap_nested,
				msr,
				MSR_TYPE_R);

		__vmx_enable_intercept_for_msr(
				vmx_msr_bitmap_nested,
				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
				MSR_TYPE_W);
		__vmx_enable_intercept_for_msr(
				vmx_msr_bitmap_nested,
				APIC_BASE_MSR + (APIC_EOI >> 4),
				MSR_TYPE_W);
		__vmx_enable_intercept_for_msr(
				vmx_msr_bitmap_nested,
				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
				MSR_TYPE_W);
	}
	kunmap(page);
	nested_release_page_clean(page);
@@ -9957,10 +9938,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
	}

	if (cpu_has_vmx_msr_bitmap() &&
	    exec_control & CPU_BASED_USE_MSR_BITMAPS) {
		nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
		/* MSR_BITMAP will be set by following vmx_set_efer. */
	} else
	    exec_control & CPU_BASED_USE_MSR_BITMAPS &&
	    nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
		; /* MSR_BITMAP will be set by following vmx_set_efer. */
	else
		exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;

	/*