Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 35307b9a authored by Marc Zyngier's avatar Marc Zyngier Committed by Christoffer Dall
Browse files

arm/arm64: KVM: Implement Stage-2 page aging



Until now, KVM/arm didn't care much for page aging (who was swapping
anyway?), and simply provided empty hooks to the core KVM code. With
server-type systems now being available, things are quite different.

This patch implements very simple support for page aging, by clearing
the Access flag in the Stage-2 page tables. On access fault, the current
fault handling will write the PTE or PMD again, putting the Access flag
back on.

It should be possible to implement a much faster handling for Access
faults, but that's left for a later patch.

With this in place, performance in VMs is degraded much more gracefully.

Signed-off-by: default avatarMarc Zyngier <marc.zyngier@arm.com>
Acked-by: default avatarChristoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: default avatarChristoffer Dall <christoffer.dall@linaro.org>
parent 1d2ebacc
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -185,6 +185,7 @@
#define HSR_COND	(0xfU << HSR_COND_SHIFT)

#define FSC_FAULT	(0x04)
#define FSC_ACCESS	(0x08)
#define FSC_PERM	(0x0c)

/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
+2 −11
Original line number Diff line number Diff line
@@ -167,19 +167,10 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);

unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);

/* We do not have shadow page tables, hence the empty hooks */
static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
			      unsigned long end)
{
	return 0;
}

static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
{
	return 0;
}

static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
							 unsigned long address)
{
+64 −1
Original line number Diff line number Diff line
@@ -1299,6 +1299,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,

out_unlock:
	spin_unlock(&kvm->mmu_lock);
	kvm_set_pfn_accessed(pfn);
	kvm_release_pfn_clean(pfn);
	return ret;
}
@@ -1333,7 +1334,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)

	/* Check the stage-2 fault is trans. fault or write fault */
	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
	if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
	    fault_status != FSC_ACCESS) {
		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
			kvm_vcpu_trap_get_class(vcpu),
			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1475,6 +1477,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
}

static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
	pmd_t *pmd;
	pte_t *pte;

	pmd = stage2_get_pmd(kvm, NULL, gpa);
	if (!pmd || pmd_none(*pmd))	/* Nothing there */
		return 0;

	if (kvm_pmd_huge(*pmd)) {	/* THP, HugeTLB */
		if (pmd_young(*pmd)) {
			*pmd = pmd_mkold(*pmd);
			return 1;
		}

		return 0;
	}

	pte = pte_offset_kernel(pmd, gpa);
	if (pte_none(*pte))
		return 0;

	if (pte_young(*pte)) {
		*pte = pte_mkold(*pte);	/* Just a page... */
		return 1;
	}

	return 0;
}

static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
	pmd_t *pmd;
	pte_t *pte;

	pmd = stage2_get_pmd(kvm, NULL, gpa);
	if (!pmd || pmd_none(*pmd))	/* Nothing there */
		return 0;

	if (kvm_pmd_huge(*pmd))		/* THP, HugeTLB */
		return pmd_young(*pmd);

	pte = pte_offset_kernel(pmd, gpa);
	if (!pte_none(*pte))		/* Just a page... */
		return pte_young(*pte);

	return 0;
}

int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
	trace_kvm_age_hva(start, end);
	return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
}

int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
{
	trace_kvm_test_age_hva(hva);
	return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
}

void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
	mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+33 −0
Original line number Diff line number Diff line
@@ -210,6 +210,39 @@ TRACE_EVENT(kvm_set_spte_hva,
	TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
);

TRACE_EVENT(kvm_age_hva,
	TP_PROTO(unsigned long start, unsigned long end),
	TP_ARGS(start, end),

	TP_STRUCT__entry(
		__field(	unsigned long,	start		)
		__field(	unsigned long,	end		)
	),

	TP_fast_assign(
		__entry->start		= start;
		__entry->end		= end;
	),

	TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
		  __entry->start, __entry->end)
);

TRACE_EVENT(kvm_test_age_hva,
	TP_PROTO(unsigned long hva),
	TP_ARGS(hva),

	TP_STRUCT__entry(
		__field(	unsigned long,	hva		)
	),

	TP_fast_assign(
		__entry->hva		= hva;
	),

	TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
);

TRACE_EVENT(kvm_hvc,
	TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
	TP_ARGS(vcpu_pc, r0, imm),
+1 −0
Original line number Diff line number Diff line
@@ -90,6 +90,7 @@
#define ESR_ELx_FSC		(0x3F)
#define ESR_ELx_FSC_TYPE	(0x3C)
#define ESR_ELx_FSC_EXTABT	(0x10)
#define ESR_ELx_FSC_ACCESS	(0x08)
#define ESR_ELx_FSC_FAULT	(0x04)
#define ESR_ELx_FSC_PERM	(0x0C)
#define ESR_ELx_CV		(UL(1) << 24)
Loading