Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 612819c3 authored by Marcelo Tosatti's avatar Marcelo Tosatti Committed by Avi Kivity
Browse files

KVM: propagate fault r/w information to gup(), allow read-only memory



As suggested by Andrea, pass r/w error code to gup(), upgrading read fault
to writable if host pte allows it.

Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent 7905d9a5
Loading
Loading
Loading
Loading
+17 −10
Original line number Diff line number Diff line
@@ -2216,7 +2216,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
}

static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
			int level, gfn_t gfn, pfn_t pfn)
			int map_writable, int level, gfn_t gfn, pfn_t pfn)
{
	struct kvm_shadow_walk_iterator iterator;
	struct kvm_mmu_page *sp;
@@ -2225,9 +2225,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,

	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
		if (iterator.level == level) {
			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
			unsigned pte_access = ACC_ALL;

			if (!map_writable)
				pte_access &= ~ACC_WRITE_MASK;
			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
				     0, write, 1, &pt_write,
				     level, gfn, pfn, false, true);
				     level, gfn, pfn, false, map_writable);
			direct_pte_prefetch(vcpu, iterator.sptep);
			++vcpu->stat.pf_fixed;
			break;
@@ -2288,6 +2292,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
	int level;
	pfn_t pfn;
	unsigned long mmu_seq;
	bool map_writable;

	level = mapping_level(vcpu, gfn);

@@ -2302,7 +2307,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)

	mmu_seq = vcpu->kvm->mmu_notifier_seq;
	smp_rmb();
	pfn = gfn_to_pfn(vcpu->kvm, gfn);
	pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, &map_writable);

	/* mmio */
	if (is_error_pfn(pfn))
@@ -2312,7 +2317,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
	kvm_mmu_free_some_pages(vcpu);
	r = __direct_map(vcpu, v, write, level, gfn, pfn);
	r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn);
	spin_unlock(&vcpu->kvm->mmu_lock);


@@ -2611,11 +2616,11 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu)
}

static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
			 gva_t gva, pfn_t *pfn)
			 gva_t gva, pfn_t *pfn, bool write, bool *writable)
{
	bool async;

	*pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async);
	*pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable);

	if (!async)
		return false; /* *pfn has correct page already */
@@ -2632,7 +2637,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn,
			return true;
	}

	*pfn = gfn_to_pfn(vcpu->kvm, gfn);
	*pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable);

	return false;
}
@@ -2645,6 +2650,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
	int level;
	gfn_t gfn = gpa >> PAGE_SHIFT;
	unsigned long mmu_seq;
	int write = error_code & PFERR_WRITE_MASK;
	bool map_writable;

	ASSERT(vcpu);
	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -2660,7 +2667,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
	smp_rmb();

	if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn))
	if (try_async_pf(vcpu, no_apf, gfn, gpa, &pfn, write, &map_writable))
		return 0;

	/* mmio */
@@ -2670,7 +2677,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
	kvm_mmu_free_some_pages(vcpu);
	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
	r = __direct_map(vcpu, gpa, write, map_writable,
			 level, gfn, pfn);
	spin_unlock(&vcpu->kvm->mmu_lock);

+9 −4
Original line number Diff line number Diff line
@@ -427,7 +427,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			 struct guest_walker *gw,
			 int user_fault, int write_fault, int hlevel,
			 int *ptwrite, pfn_t pfn)
			 int *ptwrite, pfn_t pfn, bool map_writable)
{
	unsigned access = gw->pt_access;
	struct kvm_mmu_page *sp = NULL;
@@ -501,7 +501,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,

	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
		     user_fault, write_fault, dirty, ptwrite, it.level,
		     gw->gfn, pfn, false, true);
		     gw->gfn, pfn, false, map_writable);
	FNAME(pte_prefetch)(vcpu, gw, it.sptep);

	return it.sptep;
@@ -539,6 +539,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
	pfn_t pfn;
	int level = PT_PAGE_TABLE_LEVEL;
	unsigned long mmu_seq;
	bool map_writable;

	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);

@@ -569,13 +570,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
	smp_rmb();

	if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn))
	if (try_async_pf(vcpu, no_apf, walker.gfn, addr, &pfn, write_fault,
			 &map_writable))
		return 0;

	/* mmio */
	if (is_error_pfn(pfn))
		return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);

	if (!map_writable)
		walker.pte_access &= ~ACC_WRITE_MASK;

	spin_lock(&vcpu->kvm->mmu_lock);
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
@@ -583,7 +588,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
	kvm_mmu_free_some_pages(vcpu);
	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
			     level, &write_pt, pfn);
			     level, &write_pt, pfn, map_writable);
	(void)sptep;
	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
		 sptep, *sptep, write_pt);
+4 −1
Original line number Diff line number Diff line
@@ -334,8 +334,11 @@ void kvm_set_page_accessed(struct page *page);

pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async);
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
		       bool write_fault, bool *writable);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
		      bool *writable);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
			 struct kvm_memory_slot *slot, gfn_t gfn);
int memslot_id(struct kvm *kvm, gfn_t gfn);
+41 −10
Original line number Diff line number Diff line
@@ -959,7 +959,7 @@ static pfn_t get_fault_pfn(void)
}

static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
			bool *async)
			bool *async, bool write_fault, bool *writable)
{
	struct page *page[1];
	int npages = 0;
@@ -968,12 +968,34 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
	/* we can do it either atomically or asynchronously, not both */
	BUG_ON(atomic && async);

	BUG_ON(!write_fault && !writable);

	if (writable)
		*writable = true;

	if (atomic || async)
		npages = __get_user_pages_fast(addr, 1, 1, page);

	if (unlikely(npages != 1) && !atomic) {
		might_sleep();
		npages = get_user_pages_fast(addr, 1, 1, page);

		if (writable)
			*writable = write_fault;

		npages = get_user_pages_fast(addr, 1, write_fault, page);

		/* map read fault as writable if possible */
		if (unlikely(!write_fault) && npages == 1) {
			struct page *wpage[1];

			npages = __get_user_pages_fast(addr, 1, 1, wpage);
			if (npages == 1) {
				*writable = true;
				put_page(page[0]);
				page[0] = wpage[0];
			}
			npages = 1;
		}
	}

	if (unlikely(npages != 1)) {
@@ -1011,11 +1033,12 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,

pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
{
	return hva_to_pfn(kvm, addr, true, NULL);
	return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
}
EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);

static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async)
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
			  bool write_fault, bool *writable)
{
	unsigned long addr;

@@ -1028,32 +1051,40 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async)
		return page_to_pfn(bad_page);
	}

	return hva_to_pfn(kvm, addr, atomic, async);
	return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
}

pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
{
	return __gfn_to_pfn(kvm, gfn, true, NULL);
	return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);

pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async)
pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
		       bool write_fault, bool *writable)
{
	return __gfn_to_pfn(kvm, gfn, false, async);
	return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_async);

pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
{
	return __gfn_to_pfn(kvm, gfn, false, NULL);
	return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn);

pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
		      bool *writable)
{
	return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);

pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
			 struct kvm_memory_slot *slot, gfn_t gfn)
{
	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
	return hva_to_pfn(kvm, addr, false, NULL);
	return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
}

int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,