Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b94742c9 authored by Junaid Shahid's avatar Junaid Shahid Committed by Paolo Bonzini
Browse files

kvm: x86: Add multi-entry LRU cache for previous CR3s



Adds support for storing multiple previous CR3/root_hpa pairs maintained
as an LRU cache, so that the lockless CR3 switch path can be used when
switching back to any of them.

Signed-off-by: default avatarJunaid Shahid <junaids@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent faff8758
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -335,6 +335,8 @@ struct kvm_mmu_root_info {
#define KVM_MMU_ROOT_INFO_INVALID \
	((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })

#define KVM_MMU_NUM_PREV_ROOTS 3

/*
 * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
 * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
@@ -363,7 +365,7 @@ struct kvm_mmu {
	u8 shadow_root_level;
	u8 ept_ad;
	bool direct_map;
	struct kvm_mmu_root_info prev_root;
	struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];

	/*
	 * Bitmap; bit set = permission fault
@@ -1296,7 +1298,7 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
}

#define KVM_MMU_ROOT_CURRENT		BIT(0)
#define KVM_MMU_ROOT_PREVIOUS	BIT(1)
#define KVM_MMU_ROOT_PREVIOUS(i)	BIT(1+i)
#define KVM_MMU_ROOTS_ALL		(~0UL)

int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
+78 −33
Original line number Diff line number Diff line
@@ -3445,17 +3445,25 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
	LIST_HEAD(invalid_list);
	struct kvm_mmu *mmu = &vcpu->arch.mmu;
	bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
	bool free_prev_root = roots_to_free & KVM_MMU_ROOT_PREVIOUS;

	BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);

	/* Before acquiring the MMU lock, see if we need to do any real work. */
	if (!(free_active_root && VALID_PAGE(mmu->root_hpa)) &&
	    !(free_prev_root && VALID_PAGE(mmu->prev_root.hpa)))
	if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
			if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
			    VALID_PAGE(mmu->prev_roots[i].hpa))
				break;

		if (i == KVM_MMU_NUM_PREV_ROOTS)
			return;
	}

	spin_lock(&vcpu->kvm->mmu_lock);

	if (free_prev_root)
		mmu_free_root_page(vcpu->kvm, &mmu->prev_root.hpa,
	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
		if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
			mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
					   &invalid_list);

	if (free_active_root) {
@@ -4064,6 +4072,38 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
	context->nx = false;
}

/*
 * Find out if a previously cached root matching the new CR3/role is available.
 * The current root is also inserted into the cache.
 * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
 * returned.
 * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
 * false is returned. This root should now be freed by the caller.
 */
static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
				  union kvm_mmu_page_role new_role)
{
	uint i;
	struct kvm_mmu_root_info root;
	struct kvm_mmu *mmu = &vcpu->arch.mmu;

	root.cr3 = mmu->get_cr3(vcpu);
	root.hpa = mmu->root_hpa;

	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
		swap(root, mmu->prev_roots[i]);

		if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) &&
		    page_header(root.hpa) != NULL &&
		    new_role.word == page_header(root.hpa)->role.word)
			break;
	}

	mmu->root_hpa = root.hpa;

	return i < KVM_MMU_NUM_PREV_ROOTS;
}

static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
			    union kvm_mmu_page_role new_role,
			    bool skip_tlb_flush)
@@ -4077,18 +4117,10 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
	 */
	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
	    mmu->root_level >= PT64_ROOT_4LEVEL) {
		gpa_t prev_cr3 = mmu->prev_root.cr3;

		if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
			return false;

		swap(mmu->root_hpa, mmu->prev_root.hpa);
		mmu->prev_root.cr3 = mmu->get_cr3(vcpu);

		if (new_cr3 == prev_cr3 &&
		    VALID_PAGE(mmu->root_hpa) &&
		    page_header(mmu->root_hpa) != NULL &&
		    new_role.word == page_header(mmu->root_hpa)->role.word) {
		if (cached_root_available(vcpu, new_cr3, new_role)) {
			/*
			 * It is possible that the cached previous root page is
			 * obsolete because of a change in the MMU
@@ -4854,8 +4886,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
{
	if (reset_roots) {
		uint i;

		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
		vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;

		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
			vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
	}

	if (mmu_is_nested(vcpu))
@@ -5225,6 +5261,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
	struct kvm_mmu *mmu = &vcpu->arch.mmu;
	int i;

	/* INVLPG on a * non-canonical address is a NOP according to the SDM.  */
	if (is_noncanonical_address(gva, vcpu))
@@ -5235,16 +5272,17 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
	/*
	 * INVLPG is required to invalidate any global mappings for the VA,
	 * irrespective of PCID. Since it would take us roughly similar amount
	 * of work to determine whether the prev_root mapping of the VA is
	 * marked global, or to just sync it blindly, so we might as well just
	 * always sync it.
	 * of work to determine whether any of the prev_root mappings of the VA
	 * is marked global, or to just sync it blindly, so we might as well
	 * just always sync it.
	 *
	 * Mappings not reachable via the current cr3 or the prev_root.cr3 will
	 * be synced when switching to that cr3, so nothing needs to be done
	 * here for them.
	 * Mappings not reachable via the current cr3 or the prev_roots will be
	 * synced when switching to that cr3, so nothing needs to be done here
	 * for them.
	 */
	if (VALID_PAGE(mmu->prev_root.hpa))
		mmu->invlpg(vcpu, gva, mmu->prev_root.hpa);
	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
		if (VALID_PAGE(mmu->prev_roots[i].hpa))
			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);

	kvm_x86_ops->tlb_flush_gva(vcpu, gva);
	++vcpu->stat.invlpg;
@@ -5255,17 +5293,20 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
{
	struct kvm_mmu *mmu = &vcpu->arch.mmu;
	bool tlb_flush = false;
	uint i;

	if (pcid == kvm_get_active_pcid(vcpu)) {
		mmu->invlpg(vcpu, gva, mmu->root_hpa);
		tlb_flush = true;
	}

	if (VALID_PAGE(mmu->prev_root.hpa) &&
	    pcid == kvm_get_pcid(vcpu, mmu->prev_root.cr3)) {
		mmu->invlpg(vcpu, gva, mmu->prev_root.hpa);
	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
		if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
		    pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) {
			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
			tlb_flush = true;
		}
	}

	if (tlb_flush)
		kvm_x86_ops->tlb_flush_gva(vcpu, gva);
@@ -5273,9 +5314,9 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
	++vcpu->stat.invlpg;

	/*
	 * Mappings not reachable via the current cr3 or the prev_root.cr3 will
	 * be synced when switching to that cr3, so nothing needs to be done
	 * here for them.
	 * Mappings not reachable via the current cr3 or the prev_roots will be
	 * synced when switching to that cr3, so nothing needs to be done here
	 * for them.
	 */
}
EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
@@ -5321,12 +5362,16 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)

int kvm_mmu_create(struct kvm_vcpu *vcpu)
{
	uint i;

	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
	vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
	vcpu->arch.mmu.translate_gpa = translate_gpa;
	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;

	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
		vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;

	return alloc_mmu_pages(vcpu);
}

+8 −4
Original line number Diff line number Diff line
@@ -8788,6 +8788,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
	bool pcid_enabled;
	gva_t gva;
	struct x86_exception e;
	unsigned i;
	unsigned long roots_to_free = 0;
	struct {
		u64 pcid;
		u64 gla;
@@ -8846,12 +8848,14 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
		}

		if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_root.cr3)
		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
			if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
			    == operand.pcid)
			kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_PREVIOUS);
				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);

		kvm_mmu_free_roots(vcpu, roots_to_free);
		/*
		 * If neither the current cr3 nor the prev_root.cr3 use the
		 * If neither the current cr3 nor any of the prev_roots use the
		 * given PCID, then nothing needs to be done here because a
		 * resync will happen anyway before switching to any other CR3.
		 */