Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2d344105 authored by Huaitong Han's avatar Huaitong Han Committed by Paolo Bonzini
Browse files

KVM, pkeys: introduce pkru_mask to cache conditions



PKEYS defines a new status bit in the PFEC. PFEC.PK (bit 5), if some
conditions is true, the fault is considered as a PKU violation.
pkru_mask indicates if we need to check PKRU.ADi and PKRU.WDi, and
does cache some conditions for permission_fault.

[ Huaitong: Xiao helps to modify many sections. ]

Signed-off-by: default avatarHuaitong Han <huaitong.han@intel.com>
Signed-off-by: default avatarXiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 1be0e61c
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -335,6 +335,14 @@ struct kvm_mmu {
	 */
	u8 permissions[16];

	/*
	* The pkru_mask indicates if protection key checks are needed.  It
	* consists of 16 domains indexed by page fault error code bits [4:1],
	* with PFEC.RSVD replaced by ACC_USER_MASK from the page tables.
	* Each domain has 2 bits which are ANDed with AD and WD from PKRU.
	*/
	u32 pkru_mask;

	u64 *pae_root;
	u64 *lm_root;

+80 −0
Original line number Diff line number Diff line
@@ -3923,6 +3923,81 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
	}
}

/*
* PKU is an additional mechanism by which the paging controls access to
* user-mode addresses based on the value in the PKRU register.  Protection
* key violations are reported through a bit in the page fault error code.
* Unlike other bits of the error code, the PK bit is not known at the
* call site of e.g. gva_to_gpa; it must be computed directly in
* permission_fault based on two bits of PKRU, on some machine state (CR4,
* CR0, EFER, CPL), and on other bits of the error code and the page tables.
*
* In particular the following conditions come from the error code, the
* page tables and the machine state:
* - PK is always zero unless CR4.PKE=1 and EFER.LMA=1
* - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch)
* - PK is always zero if U=0 in the page tables
* - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access.
*
* The PKRU bitmask caches the result of these four conditions.  The error
* code (minus the P bit) and the page table's U bit form an index into the
* PKRU bitmask.  Two bits of the PKRU bitmask are then extracted and ANDed
* with the two bits of the PKRU register corresponding to the protection key.
* For the first three conditions above the bits will be 00, thus masking
* away both AD and WD.  For all reads or if the last condition holds, WD
* only will be masked away.
*/
static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
				bool ept)
{
	unsigned bit;
	bool wp;

	if (ept) {
		mmu->pkru_mask = 0;
		return;
	}

	/* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */
	if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) {
		mmu->pkru_mask = 0;
		return;
	}

	wp = is_write_protection(vcpu);

	for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) {
		unsigned pfec, pkey_bits;
		bool check_pkey, check_write, ff, uf, wf, pte_user;

		pfec = bit << 1;
		ff = pfec & PFERR_FETCH_MASK;
		uf = pfec & PFERR_USER_MASK;
		wf = pfec & PFERR_WRITE_MASK;

		/* PFEC.RSVD is replaced by ACC_USER_MASK. */
		pte_user = pfec & PFERR_RSVD_MASK;

		/*
		 * Only need to check the access which is not an
		 * instruction fetch and is to a user page.
		 */
		check_pkey = (!ff && pte_user);
		/*
		 * write access is controlled by PKRU if it is a
		 * user access or CR0.WP = 1.
		 */
		check_write = check_pkey && wf && (uf || wp);

		/* PKRU.AD stops both read and write access. */
		pkey_bits = !!check_pkey;
		/* PKRU.WD stops write access. */
		pkey_bits |= (!!check_write) << 1;

		mmu->pkru_mask |= (pkey_bits & 3) << pfec;
	}
}

static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
	unsigned root_level = mmu->root_level;
@@ -3941,6 +4016,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,

	reset_rsvds_bits_mask(vcpu, context);
	update_permission_bitmask(vcpu, context, false);
	update_pkru_bitmask(vcpu, context, false);
	update_last_nonleaf_level(vcpu, context);

	MMU_WARN_ON(!is_pae(vcpu));
@@ -3968,6 +4044,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,

	reset_rsvds_bits_mask(vcpu, context);
	update_permission_bitmask(vcpu, context, false);
	update_pkru_bitmask(vcpu, context, false);
	update_last_nonleaf_level(vcpu, context);

	context->page_fault = paging32_page_fault;
@@ -4026,6 +4103,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
	}

	update_permission_bitmask(vcpu, context, false);
	update_pkru_bitmask(vcpu, context, false);
	update_last_nonleaf_level(vcpu, context);
	reset_tdp_shadow_zero_bits_mask(vcpu, context);
}
@@ -4078,6 +4156,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
	context->direct_map = false;

	update_permission_bitmask(vcpu, context, true);
	update_pkru_bitmask(vcpu, context, true);
	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
@@ -4132,6 +4211,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
	}

	update_permission_bitmask(vcpu, g_context, false);
	update_pkru_bitmask(vcpu, g_context, false);
	update_last_nonleaf_level(vcpu, g_context);
}