Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bad3b507 authored by Paul Mackerras's avatar Paul Mackerras Committed by Avi Kivity
Browse files

KVM: PPC: Book3s HV: Maintain separate guest and host views of R and C bits



This allows both the guest and the host to use the referenced (R) and
changed (C) bits in the guest hashed page table.  The guest has a view
of R and C that is maintained in the guest_rpte field of the revmap
entry for the HPTE, and the host has a view that is maintained in the
rmap entry for the associated gfn.

Both view are updated from the guest HPT.  If a bit (R or C) is zero
in either view, it will be initially set to zero in the HPTE (or HPTEs),
until set to 1 by hardware.  When an HPTE is removed for any reason,
the R and C bits from the HPTE are ORed into both views.  We have to
be careful to read the R and C bits from the HPTE after invalidating
it, but before unlocking it, in case of any late updates by the hardware.

Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarAlexander Graf <agraf@suse.de>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent a92bce95
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -200,8 +200,9 @@ struct revmap_entry {
 * index in the guest HPT of a HPTE that points to the page.
 */
#define KVMPPC_RMAP_LOCK_BIT	63
#define KVMPPC_RMAP_REF_BIT	33
#define KVMPPC_RMAP_REFERENCED	(1ul << KVMPPC_RMAP_REF_BIT)
#define KVMPPC_RMAP_RC_SHIFT	32
#define KVMPPC_RMAP_REFERENCED	(HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHANGED	(HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_PRESENT	0x100000000ul
#define KVMPPC_RMAP_INDEX	0xfffffffful

+30 −18
Original line number Diff line number Diff line
@@ -505,6 +505,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
	unsigned long is_io;
	unsigned int writing, write_ok;
	struct vm_area_struct *vma;
	unsigned long rcbits;

	/*
	 * Real-mode code has already searched the HPT and found the
@@ -640,11 +641,17 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
		goto out_unlock;
	}

	/* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
	rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
	r &= rcbits | ~(HPTE_R_R | HPTE_R_C);

	if (hptep[0] & HPTE_V_VALID) {
		/* HPTE was previously valid, so we need to invalidate it */
		unlock_rmap(rmap);
		hptep[0] |= HPTE_V_ABSENT;
		kvmppc_invalidate_hpte(kvm, hptep, index);
		/* don't lose previous R and C bits */
		r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
	} else {
		kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
	}
@@ -701,50 +708,55 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
	struct revmap_entry *rev = kvm->arch.revmap;
	unsigned long h, i, j;
	unsigned long *hptep;
	unsigned long ptel, psize;
	unsigned long ptel, psize, rcbits;

	for (;;) {
		while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
			cpu_relax();
		lock_rmap(rmapp);
		if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
			__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
			unlock_rmap(rmapp);
			break;
		}

		/*
		 * To avoid an ABBA deadlock with the HPTE lock bit,
		 * we have to unlock the rmap chain before locking the HPTE.
		 * Thus we remove the first entry, unlock the rmap chain,
		 * lock the HPTE and then check that it is for the
		 * page we're unmapping before changing it to non-present.
		 * we can't spin on the HPTE lock while holding the
		 * rmap chain lock.
		 */
		i = *rmapp & KVMPPC_RMAP_INDEX;
		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
			/* unlock rmap before spinning on the HPTE lock */
			unlock_rmap(rmapp);
			while (hptep[0] & HPTE_V_HVLOCK)
				cpu_relax();
			continue;
		}
		j = rev[i].forw;
		if (j == i) {
			/* chain is now empty */
			j = 0;
			*rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
		} else {
			/* remove i from chain */
			h = rev[i].back;
			rev[h].forw = j;
			rev[j].back = h;
			rev[i].forw = rev[i].back = i;
			j |= KVMPPC_RMAP_PRESENT;
			*rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
		}
		smp_wmb();
		*rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT);

		/* Now lock, check and modify the HPTE */
		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
		while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
			cpu_relax();
		/* Now check and modify the HPTE */
		ptel = rev[i].guest_rpte;
		psize = hpte_page_size(hptep[0], ptel);
		if ((hptep[0] & HPTE_V_VALID) &&
		    hpte_rpn(ptel, psize) == gfn) {
			kvmppc_invalidate_hpte(kvm, hptep, i);
			hptep[0] |= HPTE_V_ABSENT;
			kvmppc_invalidate_hpte(kvm, hptep, i);
			/* Harvest R and C */
			rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
			*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
			rev[i].guest_rpte = ptel | rcbits;
		}
		unlock_rmap(rmapp);
		hptep[0] &= ~HPTE_V_HVLOCK;
	}
	return 0;
@@ -767,7 +779,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
	kvm_unmap_rmapp(kvm, rmapp, gfn);
	while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
		cpu_relax();
	__clear_bit(KVMPPC_RMAP_REF_BIT, rmapp);
	*rmapp &= ~KVMPPC_RMAP_REFERENCED;
	__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
	return 1;
}
+26 −19
Original line number Diff line number Diff line
@@ -87,15 +87,17 @@ EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);

/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
				unsigned long hpte_v)
				struct revmap_entry *rev,
				unsigned long hpte_v, unsigned long hpte_r)
{
	struct revmap_entry *rev, *next, *prev;
	struct revmap_entry *next, *prev;
	unsigned long gfn, ptel, head;
	struct kvm_memory_slot *memslot;
	unsigned long *rmap;
	unsigned long rcbits;

	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
	ptel = rev->guest_rpte;
	rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
	ptel = rev->guest_rpte |= rcbits;
	gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
	memslot = builtin_gfn_to_memslot(kvm, gfn);
	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
@@ -116,6 +118,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
		else
			*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
	}
	*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
	unlock_rmap(rmap);
}

@@ -162,6 +165,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
	pte_t pte;
	unsigned int writing;
	unsigned long mmu_seq;
	unsigned long rcbits;
	bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;

	psize = hpte_page_size(pteh, ptel);
@@ -320,6 +324,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
		} else {
			kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
						realmode);
			/* Only set R/C in real HPTE if already set in *rmap */
			rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
			ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
		}
	}

@@ -394,7 +401,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
			asm volatile("tlbiel %0" : : "r" (rb));
			asm volatile("ptesync" : : : "memory");
		}
		remove_revmap_chain(kvm, pte_index, v);
		/* Read PTE low word after tlbie to get final R/C values */
		remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
	}
	r = rev->guest_rpte;
	unlock_hpte(hpte, 0);
@@ -469,12 +477,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)

			args[j] = ((0x80 | flags) << 56) + pte_index;
			rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
			/* insert R and C bits from guest PTE */

			if (!(hp[0] & HPTE_V_VALID)) {
				/* insert R and C bits from PTE */
				rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
				args[j] |= rcbits << (56 - 5);

			if (!(hp[0] & HPTE_V_VALID))
				continue;
			}

			hp[0] &= ~HPTE_V_VALID;		/* leave it locked */
			tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
@@ -505,13 +514,16 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
			asm volatile("ptesync" : : : "memory");
		}

		/* Read PTE low words after tlbie to get final R/C values */
		for (k = 0; k < n; ++k) {
			j = indexes[k];
			pte_index = args[j] & ((1ul << 56) - 1);
			hp = hptes[k];
			rev = revs[k];
			remove_revmap_chain(kvm, pte_index, hp[0]);
			unlock_hpte(hp, 0);
			remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
			rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
			args[j] |= rcbits << (56 - 5);
			hp[0] = 0;
		}
	}

@@ -595,7 +607,6 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
		pte_index &= ~3;
		n = 4;
	}
	if (flags & H_R_XLATE)
	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
	for (i = 0; i < n; ++i, ++pte_index) {
		hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
@@ -605,12 +616,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
			v &= ~HPTE_V_ABSENT;
			v |= HPTE_V_VALID;
		}
		if (v & HPTE_V_VALID) {
			if (rev)
				r = rev[i].guest_rpte;
			else
				r = hpte[1] | HPTE_R_RPN;
		}
		if (v & HPTE_V_VALID)
			r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
		vcpu->arch.gpr[4 + i * 2] = v;
		vcpu->arch.gpr[5 + i * 2] = r;
	}