nEPT: Add nEPT violation/misconfigration support (25d92081) · Commits · e / devices / android_kernel_xiaomi_markw

arch/x86/include/asm/kvm_host.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -286,6 +286,7 @@ struct kvm_mmu {
		u64 *pae_root;
		u64 *lm_root;
		u64 rsvd_bits_mask[2][4];
		u64 bad_mt_xwr;

		/*
		* Bitmap: bit set = last pte in walk
		@@ -512,6 +513,9 @@ struct kvm_vcpu_arch {
		* instruction.
		*/
		bool write_fault_to_shadow_pgtable;

		/* set at EPT violation at this point */
		unsigned long exit_qualification;
		};

		struct kvm_lpage_info {

arch/x86/kvm/mmu.c

+50 −11

Original line number	Diff line number	Diff line
		@@ -3519,6 +3519,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
		int maxphyaddr = cpuid_maxphyaddr(vcpu);
		u64 exb_bit_rsvd = 0;

		context->bad_mt_xwr = 0;

		if (!context->nx)
		exb_bit_rsvd = rsvd_bits(63, 63);
		switch (context->root_level) {
		@@ -3574,7 +3576,40 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
		}
		}

		static void update_permission_bitmask(struct kvm_vcpu vcpu, struct kvm_mmu mmu)
		static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
		struct kvm_mmu *context, bool execonly)
		{
		int maxphyaddr = cpuid_maxphyaddr(vcpu);
		int pte;

		context->rsvd_bits_mask[0][3] =
		rsvd_bits(maxphyaddr, 51) \| rsvd_bits(3, 7);
		context->rsvd_bits_mask[0][2] =
		rsvd_bits(maxphyaddr, 51) \| rsvd_bits(3, 6);
		context->rsvd_bits_mask[0][1] =
		rsvd_bits(maxphyaddr, 51) \| rsvd_bits(3, 6);
		context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);

		/* large page */
		context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
		context->rsvd_bits_mask[1][2] =
		rsvd_bits(maxphyaddr, 51) \| rsvd_bits(12, 29);
		context->rsvd_bits_mask[1][1] =
		rsvd_bits(maxphyaddr, 51) \| rsvd_bits(12, 20);
		context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];

		for (pte = 0; pte < 64; pte++) {
		int rwx_bits = pte & 7;
		int mt = pte >> 3;
		if (mt == 0x2 \|\| mt == 0x3 \|\| mt == 0x7 \|\|
		rwx_bits == 0x2 \|\| rwx_bits == 0x6 \|\|
		(rwx_bits == 0x4 && !execonly))
		context->bad_mt_xwr \|= (1ull << pte);
		}
		}

		static void update_permission_bitmask(struct kvm_vcpu *vcpu,
		struct kvm_mmu *mmu, bool ept)
		{
		unsigned bit, byte, pfec;
		u8 map;
		@@ -3592,12 +3627,16 @@ static void update_permission_bitmask(struct kvm_vcpu vcpu, struct kvm_mmu mmu
		w = bit & ACC_WRITE_MASK;
		u = bit & ACC_USER_MASK;

		if (!ept) {
		/* Not really needed: !nx will cause pte.nx to fault */
		x \|= !mmu->nx;
		/* Allow supervisor writes if !cr0.wp */
		w \|= !is_write_protection(vcpu) && !uf;
		/* Disallow supervisor fetches of user code if cr4.smep */
		x &= !(smep && u && !uf);
		} else
		/* Not really needed: no U/S accesses on ept */
		u = 1;

		fault = (ff && !x) \|\| (uf && !u) \|\| (wf && !w);
		map \|= fault << bit;
		@@ -3632,7 +3671,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
		context->root_level = level;

		reset_rsvds_bits_mask(vcpu, context);
		update_permission_bitmask(vcpu, context);
		update_permission_bitmask(vcpu, context, false);
		update_last_pte_bitmap(vcpu, context);

		ASSERT(is_pae(vcpu));
		@@ -3662,7 +3701,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
		context->root_level = PT32_ROOT_LEVEL;

		reset_rsvds_bits_mask(vcpu, context);
		update_permission_bitmask(vcpu, context);
		update_permission_bitmask(vcpu, context, false);
		update_last_pte_bitmap(vcpu, context);

		context->new_cr3 = paging_new_cr3;
		@@ -3724,7 +3763,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
		context->gva_to_gpa = paging32_gva_to_gpa;
		}

		update_permission_bitmask(vcpu, context);
		update_permission_bitmask(vcpu, context, false);
		update_last_pte_bitmap(vcpu, context);

		return 0;
		@@ -3803,7 +3842,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
		}

		update_permission_bitmask(vcpu, g_context);
		update_permission_bitmask(vcpu, g_context, false);
		update_last_pte_bitmap(vcpu, g_context);

		return 0;

arch/x86/kvm/paging_tmpl.h

+22 −3

Original line number	Diff line number	Diff line
		@@ -129,10 +129,10 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)

		static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
		{
		int bit7;
		int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f;

		bit7 = (gpte >> 7) & 1;
		return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
		return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) \|
		((mmu->bad_mt_xwr & (1ull << low6)) != 0);
		}

		static inline int FNAME(is_present_gpte)(unsigned long pte)
		@@ -386,6 +386,25 @@ error:
		walker->fault.vector = PF_VECTOR;
		walker->fault.error_code_valid = true;
		walker->fault.error_code = errcode;

		#if PTTYPE == PTTYPE_EPT
		/*
		* Use PFERR_RSVD_MASK in error_code to to tell if EPT
		* misconfiguration requires to be injected. The detection is
		* done by is_rsvd_bits_set() above.
		*
		* We set up the value of exit_qualification to inject:
		* [2:0] - Derive from [2:0] of real exit_qualification at EPT violation
		* [5:3] - Calculated by the page walk of the guest EPT page tables
		* [7:8] - Derived from [7:8] of real exit_qualification
		*
		* The other bits are set to 0.
		*/
		if (!(errcode & PFERR_RSVD_MASK)) {
		vcpu->arch.exit_qualification &= 0x187;
		vcpu->arch.exit_qualification \|= ((pt_access & pte) & 0x7) << 3;
		}
		#endif
		walker->fault.address = addr;
		walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;

arch/x86/kvm/vmx.c

+19 −0

Original line number	Diff line number	Diff line
		@@ -5317,9 +5317,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)

		/* It is a write fault? */
		error_code = exit_qualification & (1U << 1);
		/* It is a fetch fault? */
		error_code \|= (exit_qualification & (1U << 2)) << 2;
		/* ept page table is present? */
		error_code \|= (exit_qualification >> 3) & 0x1;

		vcpu->arch.exit_qualification = exit_qualification;

		return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
		}

		@@ -7348,6 +7352,21 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
		entry->ecx \|= bit(X86_FEATURE_VMX);
		}

		static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
		struct x86_exception *fault)
		{
		struct vmcs12 *vmcs12;
		nested_vmx_vmexit(vcpu);
		vmcs12 = get_vmcs12(vcpu);

		if (fault->error_code & PFERR_RSVD_MASK)
		vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
		else
		vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
		vmcs12->exit_qualification = vcpu->arch.exit_qualification;
		vmcs12->guest_physical_address = fault->address;
		}

		/*
		* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
		* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it