Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit edc90b7d authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Paolo Bonzini
Browse files

KVM: MMU: fix SMAP virtualization



KVM may turn a user page to a kernel page when kernel writes a readonly
user page if CR0.WP = 1. This shadow page entry will be reused after
SMAP is enabled so that kernel is allowed to access this user page

Fix it by setting SMAP && !CR0.WP into shadow page's role and reset mmu
once CR4.SMAP is updated

Signed-off-by: default avatarXiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 428e3d08
Loading
Loading
Loading
Loading
+14 −4
Original line number Original line Diff line number Diff line
@@ -169,6 +169,10 @@ Shadow pages contain the following information:
    Contains the value of cr4.smep && !cr0.wp for which the page is valid
    Contains the value of cr4.smep && !cr0.wp for which the page is valid
    (pages for which this is true are different from other pages; see the
    (pages for which this is true are different from other pages; see the
    treatment of cr0.wp=0 below).
    treatment of cr0.wp=0 below).
  role.smap_andnot_wp:
    Contains the value of cr4.smap && !cr0.wp for which the page is valid
    (pages for which this is true are different from other pages; see the
    treatment of cr0.wp=0 below).
  gfn:
  gfn:
    Either the guest page table containing the translations shadowed by this
    Either the guest page table containing the translations shadowed by this
    page, or the base page frame for linear translations.  See role.direct.
    page, or the base page frame for linear translations.  See role.direct.
@@ -344,10 +348,16 @@ on fault type:


(user write faults generate a #PF)
(user write faults generate a #PF)


In the first case there is an additional complication if CR4.SMEP is
In the first case there are two additional complications:
enabled: since we've turned the page into a kernel page, the kernel may now
- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
execute it.  We handle this by also setting spte.nx.  If we get a user
  the kernel may now execute it.  We handle this by also setting spte.nx.
fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back.
  If we get a user fetch or read fault, we'll change spte.u=1 and
  spte.nx=gpte.nx back.
- if CR4.SMAP is disabled: since the page has been changed to a kernel
  page, it can not be reused when CR4.SMAP is enabled. We set
  CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
  here we do not care the case that CR4.SMAP is enabled since KVM will
  directly inject #PF to guest due to failed permission check.


To prevent an spte that was converted into a kernel page with cr0.wp=0
To prevent an spte that was converted into a kernel page with cr0.wp=0
from being written by the kernel after cr0.wp has changed to 1, we make
from being written by the kernel after cr0.wp has changed to 1, we make
+1 −0
Original line number Original line Diff line number Diff line
@@ -207,6 +207,7 @@ union kvm_mmu_page_role {
		unsigned nxe:1;
		unsigned nxe:1;
		unsigned cr0_wp:1;
		unsigned cr0_wp:1;
		unsigned smep_andnot_wp:1;
		unsigned smep_andnot_wp:1;
		unsigned smap_andnot_wp:1;
	};
	};
};
};


+12 −4
Original line number Original line Diff line number Diff line
@@ -3736,7 +3736,7 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
	}
	}
}
}


void update_permission_bitmask(struct kvm_vcpu *vcpu,
static void update_permission_bitmask(struct kvm_vcpu *vcpu,
				      struct kvm_mmu *mmu, bool ept)
				      struct kvm_mmu *mmu, bool ept)
{
{
	unsigned bit, byte, pfec;
	unsigned bit, byte, pfec;
@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
{
{
	bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
	bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
	bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
	struct kvm_mmu *context = &vcpu->arch.mmu;
	struct kvm_mmu *context = &vcpu->arch.mmu;


	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
	context->base_role.cr0_wp  = is_write_protection(vcpu);
	context->base_role.cr0_wp  = is_write_protection(vcpu);
	context->base_role.smep_andnot_wp
	context->base_role.smep_andnot_wp
		= smep && !is_write_protection(vcpu);
		= smep && !is_write_protection(vcpu);
	context->base_role.smap_andnot_wp
		= smap && !is_write_protection(vcpu);
}
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);


@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
		       const u8 *new, int bytes)
		       const u8 *new, int bytes)
{
{
	gfn_t gfn = gpa >> PAGE_SHIFT;
	gfn_t gfn = gpa >> PAGE_SHIFT;
	union kvm_mmu_page_role mask = { .word = 0 };
	struct kvm_mmu_page *sp;
	struct kvm_mmu_page *sp;
	LIST_HEAD(invalid_list);
	LIST_HEAD(invalid_list);
	u64 entry, gentry, *spte;
	u64 entry, gentry, *spte;
	int npte;
	int npte;
	bool remote_flush, local_flush, zap_page;
	bool remote_flush, local_flush, zap_page;
	union kvm_mmu_page_role mask = (union kvm_mmu_page_role) {
		.cr0_wp = 1,
		.cr4_pae = 1,
		.nxe = 1,
		.smep_andnot_wp = 1,
		.smap_andnot_wp = 1,
	};


	/*
	/*
	 * If we don't have indirect shadow pages, it means no page is
	 * If we don't have indirect shadow pages, it means no page is
@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
	++vcpu->kvm->stat.mmu_pte_write;
	++vcpu->kvm->stat.mmu_pte_write;
	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);


	mask.cr0_wp = mask.cr4_pae = mask.nxe = mask.smep_andnot_wp = 1;
	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
		if (detect_write_misaligned(sp, gpa, bytes) ||
		if (detect_write_misaligned(sp, gpa, bytes) ||
		      detect_write_flooding(sp)) {
		      detect_write_flooding(sp)) {
+0 −2
Original line number Original line Diff line number Diff line
@@ -71,8 +71,6 @@ enum {
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
		bool ept);


static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
{
+3 −5
Original line number Original line Diff line number Diff line
@@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
{
	unsigned long old_cr4 = kvm_read_cr4(vcpu);
	unsigned long old_cr4 = kvm_read_cr4(vcpu);
	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
				   X86_CR4_PAE | X86_CR4_SMEP;
				   X86_CR4_SMEP | X86_CR4_SMAP;

	if (cr4 & CR4_RESERVED_BITS)
	if (cr4 & CR4_RESERVED_BITS)
		return 1;
		return 1;


@@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
		kvm_mmu_reset_context(vcpu);
		kvm_mmu_reset_context(vcpu);


	if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
		update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);

	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
		kvm_update_cpuid(vcpu);
		kvm_update_cpuid(vcpu);