Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 852e3c19 authored by Joerg Roedel's avatar Joerg Roedel Committed by Avi Kivity
Browse files

KVM: MMU: make direct mapping paths aware of mapping levels



Signed-off-by: default avatarJoerg Roedel <joerg.roedel@amd.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent d25797b2
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -315,7 +315,7 @@ struct kvm_vcpu_arch {
	struct {
	struct {
		gfn_t gfn;	/* presumed gfn during guest pte update */
		gfn_t gfn;	/* presumed gfn during guest pte update */
		pfn_t pfn;	/* pfn corresponding to that gfn */
		pfn_t pfn;	/* pfn corresponding to that gfn */
		int largepage;
		int level;
		unsigned long mmu_seq;
		unsigned long mmu_seq;
	} update_pte;
	} update_pte;


+49 −34
Original line number Original line Diff line number Diff line
@@ -257,7 +257,7 @@ static int is_last_spte(u64 pte, int level)
{
{
	if (level == PT_PAGE_TABLE_LEVEL)
	if (level == PT_PAGE_TABLE_LEVEL)
		return 1;
		return 1;
	if (level == PT_DIRECTORY_LEVEL && is_large_pte(pte))
	if (is_large_pte(pte))
		return 1;
		return 1;
	return 0;
	return 0;
}
}
@@ -753,7 +753,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
{
{
	int i;
	int i, j;
	int retval = 0;
	int retval = 0;


	/*
	/*
@@ -772,11 +772,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
		end = start + (memslot->npages << PAGE_SHIFT);
		end = start + (memslot->npages << PAGE_SHIFT);
		if (hva >= start && hva < end) {
		if (hva >= start && hva < end) {
			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
			int idx = gfn_offset /

			          KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL);
			retval |= handler(kvm, &memslot->rmap[gfn_offset]);
			retval |= handler(kvm, &memslot->rmap[gfn_offset]);

			for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
				int idx = gfn_offset;
				idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
				retval |= handler(kvm,
				retval |= handler(kvm,
					&memslot->lpage_info[0][idx].rmap_pde);
					&memslot->lpage_info[j][idx].rmap_pde);
			}
		}
		}
	}
	}


@@ -814,12 +818,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)


#define RMAP_RECYCLE_THRESHOLD 1000
#define RMAP_RECYCLE_THRESHOLD 1000


static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage)
static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
{
{
	unsigned long *rmapp;
	unsigned long *rmapp;
	struct kvm_mmu_page *sp;

	sp = page_header(__pa(spte));


	gfn = unalias_gfn(vcpu->kvm, gfn);
	gfn = unalias_gfn(vcpu->kvm, gfn);
	rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);


	kvm_unmap_rmapp(vcpu->kvm, rmapp);
	kvm_unmap_rmapp(vcpu->kvm, rmapp);
	kvm_flush_remote_tlbs(vcpu->kvm);
	kvm_flush_remote_tlbs(vcpu->kvm);
@@ -1734,7 +1741,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,


static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
		    unsigned pte_access, int user_fault,
		    unsigned pte_access, int user_fault,
		    int write_fault, int dirty, int largepage,
		    int write_fault, int dirty, int level,
		    gfn_t gfn, pfn_t pfn, bool speculative,
		    gfn_t gfn, pfn_t pfn, bool speculative,
		    bool can_unsync)
		    bool can_unsync)
{
{
@@ -1757,7 +1764,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
		spte |= shadow_nx_mask;
		spte |= shadow_nx_mask;
	if (pte_access & ACC_USER_MASK)
	if (pte_access & ACC_USER_MASK)
		spte |= shadow_user_mask;
		spte |= shadow_user_mask;
	if (largepage)
	if (level > PT_PAGE_TABLE_LEVEL)
		spte |= PT_PAGE_SIZE_MASK;
		spte |= PT_PAGE_SIZE_MASK;
	if (tdp_enabled)
	if (tdp_enabled)
		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
@@ -1768,7 +1775,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
	if ((pte_access & ACC_WRITE_MASK)
	if ((pte_access & ACC_WRITE_MASK)
	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {


		if (largepage && has_wrprotected_page(vcpu->kvm, gfn, 1)) {
		if (level > PT_PAGE_TABLE_LEVEL &&
		    has_wrprotected_page(vcpu->kvm, gfn, level)) {
			ret = 1;
			ret = 1;
			spte = shadow_trap_nonpresent_pte;
			spte = shadow_trap_nonpresent_pte;
			goto set_pte;
			goto set_pte;
@@ -1806,7 +1814,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
			 unsigned pt_access, unsigned pte_access,
			 unsigned pt_access, unsigned pte_access,
			 int user_fault, int write_fault, int dirty,
			 int user_fault, int write_fault, int dirty,
			 int *ptwrite, int largepage, gfn_t gfn,
			 int *ptwrite, int level, gfn_t gfn,
			 pfn_t pfn, bool speculative)
			 pfn_t pfn, bool speculative)
{
{
	int was_rmapped = 0;
	int was_rmapped = 0;
@@ -1823,7 +1831,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
		 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
		 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
		 * the parent of the now unreachable PTE.
		 * the parent of the now unreachable PTE.
		 */
		 */
		if (largepage && !is_large_pte(*sptep)) {
		if (level > PT_PAGE_TABLE_LEVEL &&
		    !is_large_pte(*sptep)) {
			struct kvm_mmu_page *child;
			struct kvm_mmu_page *child;
			u64 pte = *sptep;
			u64 pte = *sptep;


@@ -1836,8 +1845,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
		} else
		} else
			was_rmapped = 1;
			was_rmapped = 1;
	}
	}

	if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
	if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
		      dirty, largepage, gfn, pfn, speculative, true)) {
		      dirty, level, gfn, pfn, speculative, true)) {
		if (write_fault)
		if (write_fault)
			*ptwrite = 1;
			*ptwrite = 1;
		kvm_x86_ops->tlb_flush(vcpu);
		kvm_x86_ops->tlb_flush(vcpu);
@@ -1857,7 +1867,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
		if (!is_rmap_spte(*sptep))
		if (!is_rmap_spte(*sptep))
			kvm_release_pfn_clean(pfn);
			kvm_release_pfn_clean(pfn);
		if (rmap_count > RMAP_RECYCLE_THRESHOLD)
		if (rmap_count > RMAP_RECYCLE_THRESHOLD)
			rmap_recycle(vcpu, gfn, largepage);
			rmap_recycle(vcpu, sptep, gfn);
	} else {
	} else {
		if (was_writeble)
		if (was_writeble)
			kvm_release_pfn_dirty(pfn);
			kvm_release_pfn_dirty(pfn);
@@ -1875,7 +1885,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
}
}


static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
			int largepage, gfn_t gfn, pfn_t pfn)
			int level, gfn_t gfn, pfn_t pfn)
{
{
	struct kvm_shadow_walk_iterator iterator;
	struct kvm_shadow_walk_iterator iterator;
	struct kvm_mmu_page *sp;
	struct kvm_mmu_page *sp;
@@ -1883,11 +1893,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
	gfn_t pseudo_gfn;
	gfn_t pseudo_gfn;


	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
		if (iterator.level == PT_PAGE_TABLE_LEVEL
		if (iterator.level == level) {
		    || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
				     0, write, 1, &pt_write,
				     0, write, 1, &pt_write,
				     largepage, gfn, pfn, false);
				     level, gfn, pfn, false);
			++vcpu->stat.pf_fixed;
			++vcpu->stat.pf_fixed;
			break;
			break;
		}
		}
@@ -1915,14 +1924,20 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
{
{
	int r;
	int r;
	int largepage = 0;
	int level;
	pfn_t pfn;
	pfn_t pfn;
	unsigned long mmu_seq;
	unsigned long mmu_seq;


	if (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL) {
	level = mapping_level(vcpu, gfn);
		gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);

		largepage = 1;
	/*
	}
	 * This path builds a PAE pagetable - so we can map 2mb pages at
	 * maximum. Therefore check if the level is larger than that.
	 */
	if (level > PT_DIRECTORY_LEVEL)
		level = PT_DIRECTORY_LEVEL;

	gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);


	mmu_seq = vcpu->kvm->mmu_notifier_seq;
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
	smp_rmb();
	smp_rmb();
@@ -1938,7 +1953,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
	if (mmu_notifier_retry(vcpu, mmu_seq))
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
		goto out_unlock;
	kvm_mmu_free_some_pages(vcpu);
	kvm_mmu_free_some_pages(vcpu);
	r = __direct_map(vcpu, v, write, largepage, gfn, pfn);
	r = __direct_map(vcpu, v, write, level, gfn, pfn);
	spin_unlock(&vcpu->kvm->mmu_lock);
	spin_unlock(&vcpu->kvm->mmu_lock);




@@ -2114,7 +2129,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
{
{
	pfn_t pfn;
	pfn_t pfn;
	int r;
	int r;
	int largepage = 0;
	int level;
	gfn_t gfn = gpa >> PAGE_SHIFT;
	gfn_t gfn = gpa >> PAGE_SHIFT;
	unsigned long mmu_seq;
	unsigned long mmu_seq;


@@ -2125,10 +2140,10 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
	if (r)
	if (r)
		return r;
		return r;


	if (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL) {
	level = mapping_level(vcpu, gfn);
		gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);

		largepage = 1;
	gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
	}

	mmu_seq = vcpu->kvm->mmu_notifier_seq;
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
	smp_rmb();
	smp_rmb();
	pfn = gfn_to_pfn(vcpu->kvm, gfn);
	pfn = gfn_to_pfn(vcpu->kvm, gfn);
@@ -2141,7 +2156,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
		goto out_unlock;
		goto out_unlock;
	kvm_mmu_free_some_pages(vcpu);
	kvm_mmu_free_some_pages(vcpu);
	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
			 largepage, gfn, pfn);
			 level, gfn, pfn);
	spin_unlock(&vcpu->kvm->mmu_lock);
	spin_unlock(&vcpu->kvm->mmu_lock);


	return r;
	return r;
@@ -2448,7 +2463,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
				  const void *new)
				  const void *new)
{
{
	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
		if (!vcpu->arch.update_pte.largepage ||
		if (vcpu->arch.update_pte.level == PT_PAGE_TABLE_LEVEL ||
		    sp->role.glevels == PT32_ROOT_LEVEL) {
		    sp->role.glevels == PT32_ROOT_LEVEL) {
			++vcpu->kvm->stat.mmu_pde_zapped;
			++vcpu->kvm->stat.mmu_pde_zapped;
			return;
			return;
@@ -2498,7 +2513,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
	u64 gpte = 0;
	u64 gpte = 0;
	pfn_t pfn;
	pfn_t pfn;


	vcpu->arch.update_pte.largepage = 0;
	vcpu->arch.update_pte.level = PT_PAGE_TABLE_LEVEL;


	if (bytes != 4 && bytes != 8)
	if (bytes != 4 && bytes != 8)
		return;
		return;
@@ -2530,7 +2545,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
	if (is_large_pte(gpte) &&
	if (is_large_pte(gpte) &&
	    (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL)) {
	    (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL)) {
		gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
		gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
		vcpu->arch.update_pte.largepage = 1;
		vcpu->arch.update_pte.level = PT_DIRECTORY_LEVEL;
	}
	}
	vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
	vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
	smp_rmb();
	smp_rmb();
+3 −3
Original line number Original line Diff line number Diff line
@@ -253,7 +253,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
	pt_element_t gpte;
	pt_element_t gpte;
	unsigned pte_access;
	unsigned pte_access;
	pfn_t pfn;
	pfn_t pfn;
	int largepage = vcpu->arch.update_pte.largepage;
	int level = vcpu->arch.update_pte.level;


	gpte = *(const pt_element_t *)pte;
	gpte = *(const pt_element_t *)pte;
	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
@@ -272,7 +272,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
		return;
		return;
	kvm_get_pfn(pfn);
	kvm_get_pfn(pfn);
	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
		     gpte & PT_DIRTY_MASK, NULL, largepage,
		     gpte & PT_DIRTY_MASK, NULL, level,
		     gpte_to_gfn(gpte), pfn, true);
		     gpte_to_gfn(gpte), pfn, true);
}
}


@@ -306,7 +306,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
				     gw->pte_access & access,
				     gw->pte_access & access,
				     user_fault, write_fault,
				     user_fault, write_fault,
				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
				     ptwrite, largepage,
				     ptwrite, level,
				     gw->gfn, pfn, false);
				     gw->gfn, pfn, false);
			break;
			break;
		}
		}