Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 855feb67 authored by Yu Zhang's avatar Yu Zhang Committed by Paolo Bonzini
Browse files

KVM: MMU: Add 5 level EPT & Shadow page table support.



Extends the shadow paging code, so that 5 level shadow page
table can be constructed if VM is running in 5 level paging
mode.

Also extends the ept code, so that 5 level ept table can be
constructed if maxphysaddr of VM exceeds 48 bits. Unlike the
shadow logic, KVM should still use 4 level ept table for a VM
whose physical address width is less than 48 bits, even when
the VM is running in 5 level paging mode.

Signed-off-by: default avatarYu Zhang <yu.c.zhang@linux.intel.com>
[Unconditionally reset the MMU context in kvm_cpuid_update.
 Changing MAXPHYADDR invalidates the reserved bit bitmasks.
 - Paolo]
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 2a7266a8
Loading
Loading
Loading
Loading
+5 −5
Original line number Original line Diff line number Diff line
@@ -315,7 +315,7 @@ struct kvm_pio_request {
	int size;
	int size;
};
};


#define PT64_ROOT_MAX_LEVEL 4
#define PT64_ROOT_MAX_LEVEL 5


struct rsvd_bits_validate {
struct rsvd_bits_validate {
	u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL];
	u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL];
@@ -323,9 +323,9 @@ struct rsvd_bits_validate {
};
};


/*
/*
 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
 * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
 * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
 * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
 * mode.
 * current mmu mode.
 */
 */
struct kvm_mmu {
struct kvm_mmu {
	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
@@ -982,7 +982,7 @@ struct kvm_x86_ops {
	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
	int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
	int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
	int (*get_tdp_level)(void);
	int (*get_tdp_level)(struct kvm_vcpu *vcpu);
	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
	int (*get_lpage_level)(void);
	int (*get_lpage_level)(void);
	bool (*rdtscp_supported)(void);
	bool (*rdtscp_supported)(void);
+2 −0
Original line number Original line Diff line number Diff line
@@ -453,6 +453,7 @@ enum vmcs_field {


#define VMX_EPT_EXECUTE_ONLY_BIT		(1ull)
#define VMX_EPT_EXECUTE_ONLY_BIT		(1ull)
#define VMX_EPT_PAGE_WALK_4_BIT			(1ull << 6)
#define VMX_EPT_PAGE_WALK_4_BIT			(1ull << 6)
#define VMX_EPT_PAGE_WALK_5_BIT			(1ull << 7)
#define VMX_EPTP_UC_BIT				(1ull << 8)
#define VMX_EPTP_UC_BIT				(1ull << 8)
#define VMX_EPTP_WB_BIT				(1ull << 14)
#define VMX_EPTP_WB_BIT				(1ull << 14)
#define VMX_EPT_2MB_PAGE_BIT			(1ull << 16)
#define VMX_EPT_2MB_PAGE_BIT			(1ull << 16)
@@ -471,6 +472,7 @@ enum vmcs_field {
#define VMX_EPT_MT_EPTE_SHIFT			3
#define VMX_EPT_MT_EPTE_SHIFT			3
#define VMX_EPTP_PWL_MASK			0x38ull
#define VMX_EPTP_PWL_MASK			0x38ull
#define VMX_EPTP_PWL_4				0x18ull
#define VMX_EPTP_PWL_4				0x18ull
#define VMX_EPTP_PWL_5				0x20ull
#define VMX_EPTP_AD_ENABLE_BIT			(1ull << 6)
#define VMX_EPTP_AD_ENABLE_BIT			(1ull << 6)
#define VMX_EPTP_MT_MASK			0x7ull
#define VMX_EPTP_MT_MASK			0x7ull
#define VMX_EPTP_MT_WB				0x6ull
#define VMX_EPTP_MT_WB				0x6ull
+1 −0
Original line number Original line Diff line number Diff line
@@ -136,6 +136,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)


	/* Update physical-address width */
	/* Update physical-address width */
	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
	kvm_mmu_reset_context(vcpu);


	kvm_pmu_refresh(vcpu);
	kvm_pmu_refresh(vcpu);
	return 0;
	return 0;
+29 −14
Original line number Original line Diff line number Diff line
@@ -3322,8 +3322,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
		return;
		return;


	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL &&
	if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL &&
	    (vcpu->arch.mmu.root_level == PT64_ROOT_4LEVEL ||
	    (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
	     vcpu->arch.mmu.direct_map)) {
	     vcpu->arch.mmu.direct_map)) {
		hpa_t root = vcpu->arch.mmu.root_hpa;
		hpa_t root = vcpu->arch.mmu.root_hpa;


@@ -3375,13 +3375,14 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
	struct kvm_mmu_page *sp;
	struct kvm_mmu_page *sp;
	unsigned i;
	unsigned i;


	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) {
	if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
		spin_lock(&vcpu->kvm->mmu_lock);
		spin_lock(&vcpu->kvm->mmu_lock);
		if(make_mmu_pages_available(vcpu) < 0) {
		if(make_mmu_pages_available(vcpu) < 0) {
			spin_unlock(&vcpu->kvm->mmu_lock);
			spin_unlock(&vcpu->kvm->mmu_lock);
			return 1;
			return 1;
		}
		}
		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_4LEVEL, 1, ACC_ALL);
		sp = kvm_mmu_get_page(vcpu, 0, 0,
				vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
		++sp->root_count;
		++sp->root_count;
		spin_unlock(&vcpu->kvm->mmu_lock);
		spin_unlock(&vcpu->kvm->mmu_lock);
		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
@@ -3425,7 +3426,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
	 * Do we shadow a long mode page table? If so we need to
	 * Do we shadow a long mode page table? If so we need to
	 * write-protect the guests page table root.
	 * write-protect the guests page table root.
	 */
	 */
	if (vcpu->arch.mmu.root_level == PT64_ROOT_4LEVEL) {
	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
		hpa_t root = vcpu->arch.mmu.root_hpa;
		hpa_t root = vcpu->arch.mmu.root_hpa;


		MMU_WARN_ON(VALID_PAGE(root));
		MMU_WARN_ON(VALID_PAGE(root));
@@ -3435,8 +3436,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
			spin_unlock(&vcpu->kvm->mmu_lock);
			spin_unlock(&vcpu->kvm->mmu_lock);
			return 1;
			return 1;
		}
		}
		sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_4LEVEL,
		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
				      0, ACC_ALL);
				vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
		root = __pa(sp->spt);
		root = __pa(sp->spt);
		++sp->root_count;
		++sp->root_count;
		spin_unlock(&vcpu->kvm->mmu_lock);
		spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3531,7 +3532,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)


	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
	if (vcpu->arch.mmu.root_level == PT64_ROOT_4LEVEL) {
	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
		hpa_t root = vcpu->arch.mmu.root_hpa;
		hpa_t root = vcpu->arch.mmu.root_hpa;
		sp = page_header(root);
		sp = page_header(root);
		mmu_sync_children(vcpu, sp);
		mmu_sync_children(vcpu, sp);
@@ -4057,6 +4058,12 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
		rsvd_check->rsvd_bits_mask[1][0] =
		rsvd_check->rsvd_bits_mask[1][0] =
			rsvd_check->rsvd_bits_mask[0][0];
			rsvd_check->rsvd_bits_mask[0][0];
		break;
		break;
	case PT64_ROOT_5LEVEL:
		rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd |
			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
			rsvd_bits(maxphyaddr, 51);
		rsvd_check->rsvd_bits_mask[1][4] =
			rsvd_check->rsvd_bits_mask[0][4];
	case PT64_ROOT_4LEVEL:
	case PT64_ROOT_4LEVEL:
		rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
		rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
			nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
@@ -4098,6 +4105,8 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
{
{
	u64 bad_mt_xwr;
	u64 bad_mt_xwr;


	rsvd_check->rsvd_bits_mask[0][4] =
		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
	rsvd_check->rsvd_bits_mask[0][3] =
	rsvd_check->rsvd_bits_mask[0][3] =
		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
	rsvd_check->rsvd_bits_mask[0][2] =
	rsvd_check->rsvd_bits_mask[0][2] =
@@ -4107,6 +4116,7 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
	rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
	rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);


	/* large page */
	/* large page */
	rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
	rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
	rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
	rsvd_check->rsvd_bits_mask[1][2] =
	rsvd_check->rsvd_bits_mask[1][2] =
		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
@@ -4367,7 +4377,10 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
static void paging64_init_context(struct kvm_vcpu *vcpu,
static void paging64_init_context(struct kvm_vcpu *vcpu,
				  struct kvm_mmu *context)
				  struct kvm_mmu *context)
{
{
	paging64_init_context_common(vcpu, context, PT64_ROOT_4LEVEL);
	int root_level = is_la57_mode(vcpu) ?
			 PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;

	paging64_init_context_common(vcpu, context, root_level);
}
}


static void paging32_init_context(struct kvm_vcpu *vcpu,
static void paging32_init_context(struct kvm_vcpu *vcpu,
@@ -4408,7 +4421,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
	context->sync_page = nonpaging_sync_page;
	context->sync_page = nonpaging_sync_page;
	context->invlpg = nonpaging_invlpg;
	context->invlpg = nonpaging_invlpg;
	context->update_pte = nonpaging_update_pte;
	context->update_pte = nonpaging_update_pte;
	context->shadow_root_level = kvm_x86_ops->get_tdp_level();
	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
	context->root_hpa = INVALID_PAGE;
	context->root_hpa = INVALID_PAGE;
	context->direct_map = true;
	context->direct_map = true;
	context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
	context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
@@ -4422,7 +4435,8 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
		context->root_level = 0;
		context->root_level = 0;
	} else if (is_long_mode(vcpu)) {
	} else if (is_long_mode(vcpu)) {
		context->nx = is_nx(vcpu);
		context->nx = is_nx(vcpu);
		context->root_level = PT64_ROOT_4LEVEL;
		context->root_level = is_la57_mode(vcpu) ?
				PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
		reset_rsvds_bits_mask(vcpu, context);
		reset_rsvds_bits_mask(vcpu, context);
		context->gva_to_gpa = paging64_gva_to_gpa;
		context->gva_to_gpa = paging64_gva_to_gpa;
	} else if (is_pae(vcpu)) {
	} else if (is_pae(vcpu)) {
@@ -4479,7 +4493,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,


	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
	MMU_WARN_ON(VALID_PAGE(context->root_hpa));


	context->shadow_root_level = kvm_x86_ops->get_tdp_level();
	context->shadow_root_level = PT64_ROOT_4LEVEL;


	context->nx = true;
	context->nx = true;
	context->ept_ad = accessed_dirty;
	context->ept_ad = accessed_dirty;
@@ -4488,7 +4502,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
	context->sync_page = ept_sync_page;
	context->sync_page = ept_sync_page;
	context->invlpg = ept_invlpg;
	context->invlpg = ept_invlpg;
	context->update_pte = ept_update_pte;
	context->update_pte = ept_update_pte;
	context->root_level = context->shadow_root_level;
	context->root_level = PT64_ROOT_4LEVEL;
	context->root_hpa = INVALID_PAGE;
	context->root_hpa = INVALID_PAGE;
	context->direct_map = false;
	context->direct_map = false;
	context->base_role.ad_disabled = !accessed_dirty;
	context->base_role.ad_disabled = !accessed_dirty;
@@ -4533,7 +4547,8 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
		g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
		g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
	} else if (is_long_mode(vcpu)) {
	} else if (is_long_mode(vcpu)) {
		g_context->nx = is_nx(vcpu);
		g_context->nx = is_nx(vcpu);
		g_context->root_level = PT64_ROOT_4LEVEL;
		g_context->root_level = is_la57_mode(vcpu) ?
					PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
		reset_rsvds_bits_mask(vcpu, g_context);
		reset_rsvds_bits_mask(vcpu, g_context);
		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
	} else if (is_pae(vcpu)) {
	} else if (is_pae(vcpu)) {
+1 −0
Original line number Original line Diff line number Diff line
@@ -37,6 +37,7 @@
#define PT32_DIR_PSE36_MASK \
#define PT32_DIR_PSE36_MASK \
	(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
	(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)


#define PT64_ROOT_5LEVEL 5
#define PT64_ROOT_4LEVEL 4
#define PT64_ROOT_4LEVEL 4
#define PT32_ROOT_LEVEL 2
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
#define PT32E_ROOT_LEVEL 3
Loading