Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f8f55942 authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Gleb Natapov
Browse files

KVM: MMU: fast invalidate all mmio sptes



This patch tries to introduce a very simple and scale way to invalidate
all mmio sptes - it need not walk any shadow pages and hold mmu-lock

KVM maintains a global mmio valid generation-number which is stored in
kvm->memslots.generation and every mmio spte stores the current global
generation-number into his available bits when it is created

When KVM need zap all mmio sptes, it just simply increase the global
generation-number. When guests do mmio access, KVM intercepts a MMIO #PF
then it walks the shadow page table and get the mmio spte. If the
generation-number on the spte does not equal the global generation-number,
it will go to the normal #PF handler to update the mmio spte

Since 19 bits are used to store generation-number on mmio spte, we zap all
mmio sptes when the number is round

Signed-off-by: default avatarXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: default avatarGleb Natapov <gleb@redhat.com>
Reviewed-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent b37fbea6
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -773,7 +773,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
				     struct kvm_memory_slot *slot,
				     struct kvm_memory_slot *slot,
				     gfn_t gfn_offset, unsigned long mask);
				     gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_zap_mmio_sptes(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);


+46 −8
Original line number Original line Diff line number Diff line
@@ -205,9 +205,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
#define MMIO_SPTE_GEN_LOW_SHIFT		3
#define MMIO_SPTE_GEN_LOW_SHIFT		3
#define MMIO_SPTE_GEN_HIGH_SHIFT	52
#define MMIO_SPTE_GEN_HIGH_SHIFT	52


#define MMIO_GEN_SHIFT			19
#define MMIO_GEN_LOW_SHIFT		9
#define MMIO_GEN_LOW_SHIFT		9
#define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 1)
#define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 1)
#define MMIO_MAX_GEN			((1 << 19) - 1)
#define MMIO_GEN_MASK			((1 << MMIO_GEN_SHIFT) - 1)
#define MMIO_MAX_GEN			((1 << MMIO_GEN_SHIFT) - 1)


static u64 generation_mmio_spte_mask(unsigned int gen)
static u64 generation_mmio_spte_mask(unsigned int gen)
{
{
@@ -231,17 +233,23 @@ static unsigned int get_mmio_spte_generation(u64 spte)
	return gen;
	return gen;
}
}


static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
{
	return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
}

static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
			   unsigned access)
			   unsigned access)
{
{
	struct kvm_mmu_page *sp =  page_header(__pa(sptep));
	struct kvm_mmu_page *sp =  page_header(__pa(sptep));
	u64 mask = generation_mmio_spte_mask(0);
	unsigned int gen = kvm_current_mmio_generation(kvm);
	u64 mask = generation_mmio_spte_mask(gen);


	access &= ACC_WRITE_MASK | ACC_USER_MASK;
	access &= ACC_WRITE_MASK | ACC_USER_MASK;
	mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
	mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
	sp->mmio_cached = true;
	sp->mmio_cached = true;


	trace_mark_mmio_spte(sptep, gfn, access, 0);
	trace_mark_mmio_spte(sptep, gfn, access, gen);
	mmu_spte_set(sptep, mask);
	mmu_spte_set(sptep, mask);
}
}


@@ -273,6 +281,12 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
	return false;
	return false;
}
}


static bool check_mmio_spte(struct kvm *kvm, u64 spte)
{
	return likely(get_mmio_spte_generation(spte) ==
			kvm_current_mmio_generation(kvm));
}

static inline u64 rsvd_bits(int s, int e)
static inline u64 rsvd_bits(int s, int e)
{
{
	return ((1ULL << (e - s + 1)) - 1) << s;
	return ((1ULL << (e - s + 1)) - 1) << s;
@@ -3237,6 +3251,9 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
		gfn_t gfn = get_mmio_spte_gfn(spte);
		gfn_t gfn = get_mmio_spte_gfn(spte);
		unsigned access = get_mmio_spte_access(spte);
		unsigned access = get_mmio_spte_access(spte);


		if (!check_mmio_spte(vcpu->kvm, spte))
			return RET_MMIO_PF_INVALID;

		if (direct)
		if (direct)
			addr = 0;
			addr = 0;


@@ -3278,8 +3295,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,


	pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
	pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);


	if (unlikely(error_code & PFERR_RSVD_MASK))
	if (unlikely(error_code & PFERR_RSVD_MASK)) {
		return handle_mmio_page_fault(vcpu, gva, error_code, true);
		r = handle_mmio_page_fault(vcpu, gva, error_code, true);

		if (likely(r != RET_MMIO_PF_INVALID))
			return r;
	}


	r = mmu_topup_memory_caches(vcpu);
	r = mmu_topup_memory_caches(vcpu);
	if (r)
	if (r)
@@ -3355,8 +3376,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
	ASSERT(vcpu);
	ASSERT(vcpu);
	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));


	if (unlikely(error_code & PFERR_RSVD_MASK))
	if (unlikely(error_code & PFERR_RSVD_MASK)) {
		return handle_mmio_page_fault(vcpu, gpa, error_code, true);
		r = handle_mmio_page_fault(vcpu, gpa, error_code, true);

		if (likely(r != RET_MMIO_PF_INVALID))
			return r;
	}


	r = mmu_topup_memory_caches(vcpu);
	r = mmu_topup_memory_caches(vcpu);
	if (r)
	if (r)
@@ -4329,7 +4354,7 @@ void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
	spin_unlock(&kvm->mmu_lock);
	spin_unlock(&kvm->mmu_lock);
}
}


void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
static void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
{
{
	struct kvm_mmu_page *sp, *node;
	struct kvm_mmu_page *sp, *node;
	LIST_HEAD(invalid_list);
	LIST_HEAD(invalid_list);
@@ -4352,6 +4377,19 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
	return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
	return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
}
}


void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
{
	/*
	 * The very rare case: if the generation-number is round,
	 * zap all shadow pages.
	 *
	 * The max value is MMIO_MAX_GEN - 1 since it is not called
	 * when mark memslot invalid.
	 */
	if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1)))
		kvm_mmu_zap_mmio_sptes(kvm);
}

static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
{
{
	struct kvm *kvm;
	struct kvm *kvm;
+4 −1
Original line number Original line Diff line number Diff line
@@ -57,11 +57,14 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
 * Return values of handle_mmio_page_fault_common:
 * Return values of handle_mmio_page_fault_common:
 * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
 * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
 *			directly.
 *			directly.
 * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
 *			fault path update the mmio spte.
 * RET_MMIO_PF_RETRY: let CPU fault again on the address.
 * RET_MMIO_PF_RETRY: let CPU fault again on the address.
 * RET_MMIO_PF_BUG: bug is detected.
 * RET_MMIO_PF_BUG: bug is detected.
 */
 */
enum {
enum {
	RET_MMIO_PF_EMULATE = 1,
	RET_MMIO_PF_EMULATE = 1,
	RET_MMIO_PF_INVALID = 2,
	RET_MMIO_PF_RETRY = 0,
	RET_MMIO_PF_RETRY = 0,
	RET_MMIO_PF_BUG = -1
	RET_MMIO_PF_BUG = -1
};
};
+5 −2
Original line number Original line Diff line number Diff line
@@ -552,9 +552,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,


	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);


	if (unlikely(error_code & PFERR_RSVD_MASK))
	if (unlikely(error_code & PFERR_RSVD_MASK)) {
		return handle_mmio_page_fault(vcpu, addr, error_code,
		r = handle_mmio_page_fault(vcpu, addr, error_code,
					      mmu_is_nested(vcpu));
					      mmu_is_nested(vcpu));
		if (likely(r != RET_MMIO_PF_INVALID))
			return r;
	};


	r = mmu_topup_memory_caches(vcpu);
	r = mmu_topup_memory_caches(vcpu);
	if (r)
	if (r)
+4 −0
Original line number Original line Diff line number Diff line
@@ -5369,6 +5369,10 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
	if (likely(ret == RET_MMIO_PF_EMULATE))
	if (likely(ret == RET_MMIO_PF_EMULATE))
		return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
		return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
					      EMULATE_DONE;
					      EMULATE_DONE;

	if (unlikely(ret == RET_MMIO_PF_INVALID))
		return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0);

	if (unlikely(ret == RET_MMIO_PF_RETRY))
	if (unlikely(ret == RET_MMIO_PF_RETRY))
		return 1;
		return 1;


Loading