Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 640d9b0d authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Avi Kivity
Browse files

KVM: MMU: optimize to handle dirty bit



If dirty bit is not set, we can make the pte access read-only to avoid handing
dirty bit everywhere

Signed-off-by: default avatarXiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent bebb106a
Loading
Loading
Loading
Loading
+6 −7
Original line number Original line Diff line number Diff line
@@ -1923,7 +1923,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,


static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
		    unsigned pte_access, int user_fault,
		    unsigned pte_access, int user_fault,
		    int write_fault, int dirty, int level,
		    int write_fault, int level,
		    gfn_t gfn, pfn_t pfn, bool speculative,
		    gfn_t gfn, pfn_t pfn, bool speculative,
		    bool can_unsync, bool host_writable)
		    bool can_unsync, bool host_writable)
{
{
@@ -1938,8 +1938,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
	spte = PT_PRESENT_MASK;
	spte = PT_PRESENT_MASK;
	if (!speculative)
	if (!speculative)
		spte |= shadow_accessed_mask;
		spte |= shadow_accessed_mask;
	if (!dirty)

		pte_access &= ~ACC_WRITE_MASK;
	if (pte_access & ACC_EXEC_MASK)
	if (pte_access & ACC_EXEC_MASK)
		spte |= shadow_x_mask;
		spte |= shadow_x_mask;
	else
	else
@@ -2023,7 +2022,7 @@ done:


static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
			 unsigned pt_access, unsigned pte_access,
			 unsigned pt_access, unsigned pte_access,
			 int user_fault, int write_fault, int dirty,
			 int user_fault, int write_fault,
			 int *ptwrite, int level, gfn_t gfn,
			 int *ptwrite, int level, gfn_t gfn,
			 pfn_t pfn, bool speculative,
			 pfn_t pfn, bool speculative,
			 bool host_writable)
			 bool host_writable)
@@ -2059,7 +2058,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
	}
	}


	if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
	if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
		      dirty, level, gfn, pfn, speculative, true,
		      level, gfn, pfn, speculative, true,
		      host_writable)) {
		      host_writable)) {
		if (write_fault)
		if (write_fault)
			*ptwrite = 1;
			*ptwrite = 1;
@@ -2129,7 +2128,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,


	for (i = 0; i < ret; i++, gfn++, start++)
	for (i = 0; i < ret; i++, gfn++, start++)
		mmu_set_spte(vcpu, start, ACC_ALL,
		mmu_set_spte(vcpu, start, ACC_ALL,
			     access, 0, 0, 1, NULL,
			     access, 0, 0, NULL,
			     sp->role.level, gfn,
			     sp->role.level, gfn,
			     page_to_pfn(pages[i]), true, true);
			     page_to_pfn(pages[i]), true, true);


@@ -2193,7 +2192,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
			unsigned pte_access = ACC_ALL;
			unsigned pte_access = ACC_ALL;


			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
				     0, write, 1, &pt_write,
				     0, write, &pt_write,
				     level, gfn, pfn, prefault, map_writable);
				     level, gfn, pfn, prefault, map_writable);
			direct_pte_prefetch(vcpu, iterator.sptep);
			direct_pte_prefetch(vcpu, iterator.sptep);
			++vcpu->stat.pf_fixed;
			++vcpu->stat.pf_fixed;
+19 −27
Original line number Original line Diff line number Diff line
@@ -101,11 +101,15 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
	return (ret != orig_pte);
	return (ret != orig_pte);
}
}


static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte,
				   bool last)
{
{
	unsigned access;
	unsigned access;


	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
	if (last && !is_dirty_gpte(gpte))
		access &= ~ACC_WRITE_MASK;

#if PTTYPE == 64
#if PTTYPE == 64
	if (vcpu->arch.mmu.nx)
	if (vcpu->arch.mmu.nx)
		access &= ~(gpte >> PT64_NX_SHIFT);
		access &= ~(gpte >> PT64_NX_SHIFT);
@@ -232,8 +236,6 @@ retry_walk:
			pte |= PT_ACCESSED_MASK;
			pte |= PT_ACCESSED_MASK;
		}
		}


		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);

		walker->ptes[walker->level - 1] = pte;
		walker->ptes[walker->level - 1] = pte;


		if (FNAME(is_last_gpte)(walker, vcpu, mmu, pte)) {
		if (FNAME(is_last_gpte)(walker, vcpu, mmu, pte)) {
@@ -268,7 +270,7 @@ retry_walk:
			break;
			break;
		}
		}


		pt_access = pte_access;
		pt_access &= FNAME(gpte_access)(vcpu, pte, false);
		--walker->level;
		--walker->level;
	}
	}


@@ -293,6 +295,7 @@ retry_walk:
		walker->ptes[walker->level - 1] = pte;
		walker->ptes[walker->level - 1] = pte;
	}
	}


	pte_access = pt_access & FNAME(gpte_access)(vcpu, pte, true);
	walker->pt_access = pt_access;
	walker->pt_access = pt_access;
	walker->pte_access = pte_access;
	walker->pte_access = pte_access;
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
@@ -367,7 +370,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
		return;
		return;


	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true);
	pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
	pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
	if (is_error_pfn(pfn)) {
	if (is_error_pfn(pfn)) {
		kvm_release_pfn_clean(pfn);
		kvm_release_pfn_clean(pfn);
@@ -379,7 +382,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
	 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
	 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
	 */
	 */
	mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
	mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
		     is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL,
		     NULL, PT_PAGE_TABLE_LEVEL,
		     gpte_to_gfn(gpte), pfn, true, true);
		     gpte_to_gfn(gpte), pfn, true, true);
}
}


@@ -430,7 +433,6 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
		unsigned pte_access;
		unsigned pte_access;
		gfn_t gfn;
		gfn_t gfn;
		pfn_t pfn;
		pfn_t pfn;
		bool dirty;


		if (spte == sptep)
		if (spte == sptep)
			continue;
			continue;
@@ -443,18 +445,18 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
			continue;
			continue;


		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte,
								  true);
		gfn = gpte_to_gfn(gpte);
		gfn = gpte_to_gfn(gpte);
		dirty = is_dirty_gpte(gpte);
		pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
		pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
				      (pte_access & ACC_WRITE_MASK) && dirty);
				      pte_access & ACC_WRITE_MASK);
		if (is_error_pfn(pfn)) {
		if (is_error_pfn(pfn)) {
			kvm_release_pfn_clean(pfn);
			kvm_release_pfn_clean(pfn);
			break;
			break;
		}
		}


		mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
		mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
			     dirty, NULL, PT_PAGE_TABLE_LEVEL, gfn,
			     NULL, PT_PAGE_TABLE_LEVEL, gfn,
			     pfn, true, true);
			     pfn, true, true);
	}
	}
}
}
@@ -470,7 +472,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
{
{
	unsigned access = gw->pt_access;
	unsigned access = gw->pt_access;
	struct kvm_mmu_page *sp = NULL;
	struct kvm_mmu_page *sp = NULL;
	bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]);
	int top_level;
	int top_level;
	unsigned direct_access;
	unsigned direct_access;
	struct kvm_shadow_walk_iterator it;
	struct kvm_shadow_walk_iterator it;
@@ -479,8 +480,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
		return NULL;
		return NULL;


	direct_access = gw->pt_access & gw->pte_access;
	direct_access = gw->pt_access & gw->pte_access;
	if (!dirty)
		direct_access &= ~ACC_WRITE_MASK;


	top_level = vcpu->arch.mmu.root_level;
	top_level = vcpu->arch.mmu.root_level;
	if (top_level == PT32E_ROOT_LEVEL)
	if (top_level == PT32E_ROOT_LEVEL)
@@ -539,7 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
	}
	}


	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
		     user_fault, write_fault, dirty, ptwrite, it.level,
		     user_fault, write_fault, ptwrite, it.level,
		     gw->gfn, pfn, prefault, map_writable);
		     gw->gfn, pfn, prefault, map_writable);
	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
	FNAME(pte_prefetch)(vcpu, gw, it.sptep);


@@ -622,17 +621,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
		return 0;
		return 0;


	/* mmio */
	/* mmio */
	if (is_error_pfn(pfn)) {
	if (is_error_pfn(pfn))
		unsigned access = walker.pte_access;
		bool dirty = is_dirty_gpte(walker.ptes[walker.level - 1]);

		if (!dirty)
			access &= ~ACC_WRITE_MASK;

		return kvm_handle_bad_page(vcpu, mmu_is_nested(vcpu) ? 0 :
		return kvm_handle_bad_page(vcpu, mmu_is_nested(vcpu) ? 0 :
					   addr, access, walker.gfn, pfn);
				      addr, walker.pte_access, walker.gfn, pfn);
	}

	spin_lock(&vcpu->kvm->mmu_lock);
	spin_lock(&vcpu->kvm->mmu_lock);
	if (mmu_notifier_retry(vcpu, mmu_seq))
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
		goto out_unlock;
@@ -849,11 +840,12 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
		}
		}


		nr_present++;
		nr_present++;
		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte,
								  true);
		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;


		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
			 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
			 PT_PAGE_TABLE_LEVEL, gfn,
			 spte_to_pfn(sp->spt[i]), true, false,
			 spte_to_pfn(sp->spt[i]), true, false,
			 host_writable);
			 host_writable);
	}
	}