Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f7ed45be authored by Christoffer Dall's avatar Christoffer Dall
Browse files

KVM: ARM: World-switch implementation



Provides complete world-switch implementation to switch to other guests
running in non-secure modes. Includes Hyp exception handlers that
capture necessary exception information and stores the information on
the VCPU and KVM structures.

The following Hyp-ABI is also documented in the code:

Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
   Switching to Hyp mode is done through a simple HVC #0 instruction. The
   exception vector code will check that the HVC comes from VMID==0 and if
   so will push the necessary state (SPSR, lr_usr) on the Hyp stack.
   - r0 contains a pointer to a HYP function
   - r1, r2, and r3 contain arguments to the above function.
   - The HYP function will be called with its arguments in r0, r1 and r2.
   On HYP function return, we return directly to SVC.

A call to a function executing in Hyp mode is performed like the following:

        <svc code>
        ldr     r0, =BSYM(my_hyp_fn)
        ldr     r1, =my_param
        hvc #0  ; Call my_hyp_fn(my_param) from HYP mode
        <svc code>

Otherwise, the world-switch is pretty straight-forward. All state that
can be modified by the guest is first backed up on the Hyp stack and the
VCPU values is loaded onto the hardware. State, which is not loaded, but
theoretically modifiable by the guest is protected through the
virtualiation features to generate a trap and cause software emulation.
Upon guest returns, all state is restored from hardware onto the VCPU
struct and the original state is restored from the Hyp-stack onto the
hardware.

SMP support using the VMPIDR calculated on the basis of the host MPIDR
and overriding the low bits with KVM vcpu_id contributed by Marc Zyngier.

Reuse of VMIDs has been implemented by Antonios Motakis and adapated from
a separate patch into the appropriate patches introducing the
functionality. Note that the VMIDs are stored per VM as required by the ARM
architecture reference manual.

To support VFP/NEON we trap those instructions using the HPCTR. When
we trap, we switch the FPU.  After a guest exit, the VFP state is
returned to the host.  When disabling access to floating point
instructions, we also mask FPEXC_EN in order to avoid the guest
receiving Undefined instruction exceptions before we have a chance to
switch back the floating point state.  We are reusing vfp_hard_struct,
so we depend on VFPv3 being enabled in the host kernel, if not, we still
trap cp10 and cp11 in order to inject an undefined instruction exception
whenever the guest tries to use VFP/NEON. VFP/NEON developed by
Antionios Motakis and Rusty Russell.

Aborts that are permission faults, and not stage-1 page table walk, do
not report the faulting address in the HPFAR.  We have to resolve the
IPA, and store it just like the HPFAR register on the VCPU struct. If
the IPA cannot be resolved, it means another CPU is playing with the
page tables, and we simply restart the guest.  This quirk was fixed by
Marc Zyngier.

Reviewed-by: default avatarWill Deacon <will.deacon@arm.com>
Reviewed-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarAntonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: default avatarMarc Zyngier <marc.zyngier@arm.com>
Signed-off-by: default avatarChristoffer Dall <c.dall@virtualopensystems.com>
parent 86ce8535
Loading
Loading
Loading
Loading
+51 −0
Original line number Diff line number Diff line
@@ -98,6 +98,18 @@
#define TTBCR_T0SZ	3
#define HTCR_MASK	(TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)

/* Hyp System Trap Register */
#define HSTR_T(x)	(1 << x)
#define HSTR_TTEE	(1 << 16)
#define HSTR_TJDBX	(1 << 17)

/* Hyp Coprocessor Trap Register */
#define HCPTR_TCP(x)	(1 << x)
#define HCPTR_TCP_MASK	(0x3fff)
#define HCPTR_TASE	(1 << 15)
#define HCPTR_TTA	(1 << 20)
#define HCPTR_TCPAC	(1 << 31)

/* Hyp Debug Configuration Register bits */
#define HDCR_TDRA	(1 << 11)
#define HDCR_TDOSA	(1 << 10)
@@ -144,6 +156,45 @@
#else
#define VTTBR_X		(5 - KVM_T0SZ)
#endif
#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
#define VTTBR_VMID_SHIFT  (48LLU)
#define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)

/* Hyp Syndrome Register (HSR) bits */
#define HSR_EC_SHIFT	(26)
#define HSR_EC		(0x3fU << HSR_EC_SHIFT)
#define HSR_IL		(1U << 25)
#define HSR_ISS		(HSR_IL - 1)
#define HSR_ISV_SHIFT	(24)
#define HSR_ISV		(1U << HSR_ISV_SHIFT)
#define HSR_FSC		(0x3f)
#define HSR_FSC_TYPE	(0x3c)
#define HSR_WNR		(1 << 6)

#define FSC_FAULT	(0x04)
#define FSC_PERM	(0x0c)

/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK	(~0xf)

#define HSR_EC_UNKNOWN	(0x00)
#define HSR_EC_WFI	(0x01)
#define HSR_EC_CP15_32	(0x03)
#define HSR_EC_CP15_64	(0x04)
#define HSR_EC_CP14_MR	(0x05)
#define HSR_EC_CP14_LS	(0x06)
#define HSR_EC_CP_0_13	(0x07)
#define HSR_EC_CP10_ID	(0x08)
#define HSR_EC_JAZELLE	(0x09)
#define HSR_EC_BXJ	(0x0A)
#define HSR_EC_CP14_64	(0x0C)
#define HSR_EC_SVC_HYP	(0x11)
#define HSR_EC_HVC	(0x12)
#define HSR_EC_SMC	(0x13)
#define HSR_EC_IABT	(0x20)
#define HSR_EC_IABT_HYP	(0x21)
#define HSR_EC_DABT	(0x24)
#define HSR_EC_DABT_HYP	(0x25)

#endif /* __ARM_KVM_ARM_H__ */
+13 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@

#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/fpstate.h>

#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
#define KVM_MEMORY_SLOTS 32
@@ -85,6 +86,14 @@ struct kvm_vcpu_arch {
	u32 hxfar;		/* Hyp Data/Inst Fault Address Register */
	u32 hpfar;		/* Hyp IPA Fault Address Register */

	/* Floating point registers (VFP and Advanced SIMD/NEON) */
	struct vfp_hard_struct vfp_guest;
	struct vfp_hard_struct *vfp_host;

	/*
	 * Anything that is not used directly from assembly code goes
	 * here.
	 */
	/* Interrupt related fields */
	u32 irq_lines;		/* IRQ and FIQ levels */

@@ -93,6 +102,9 @@ struct kvm_vcpu_arch {

	/* Cache some mmu pages needed inside spinlock regions */
	struct kvm_mmu_memory_cache mmu_page_cache;

	/* Detect first run of a vcpu */
	bool has_run_once;
};

struct kvm_vm_stat {
@@ -112,6 +124,7 @@ struct kvm_one_reg;
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
u64 kvm_call_hyp(void *hypfn, ...);
void force_vm_exit(const cpumask_t *mask);

#define KVM_ARCH_WANT_MMU_NOTIFIER
struct kvm;
+25 −0
Original line number Diff line number Diff line
@@ -13,6 +13,9 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#ifdef CONFIG_KVM_ARM_HOST
#include <linux/kvm_host.h>
#endif
#include <asm/cacheflush.h>
#include <asm/glue-df.h>
#include <asm/glue-pf.h>
@@ -146,5 +149,27 @@ int main(void)
  DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
  DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
  DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
#ifdef CONFIG_KVM_ARM_HOST
  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
  DEFINE(VCPU_MIDR,		offsetof(struct kvm_vcpu, arch.midr));
  DEFINE(VCPU_CP15,		offsetof(struct kvm_vcpu, arch.cp15));
  DEFINE(VCPU_VFP_GUEST,	offsetof(struct kvm_vcpu, arch.vfp_guest));
  DEFINE(VCPU_VFP_HOST,		offsetof(struct kvm_vcpu, arch.vfp_host));
  DEFINE(VCPU_REGS,		offsetof(struct kvm_vcpu, arch.regs));
  DEFINE(VCPU_USR_REGS,		offsetof(struct kvm_vcpu, arch.regs.usr_regs));
  DEFINE(VCPU_SVC_REGS,		offsetof(struct kvm_vcpu, arch.regs.svc_regs));
  DEFINE(VCPU_ABT_REGS,		offsetof(struct kvm_vcpu, arch.regs.abt_regs));
  DEFINE(VCPU_UND_REGS,		offsetof(struct kvm_vcpu, arch.regs.und_regs));
  DEFINE(VCPU_IRQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.irq_regs));
  DEFINE(VCPU_FIQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
  DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
  DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
  DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.hsr));
  DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.hxfar));
  DEFINE(VCPU_HPFAR,		offsetof(struct kvm_vcpu, arch.hpfar));
  DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.hyp_pc));
  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
#endif
  return 0; 
}
+199 −1
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_emulate.h>

#ifdef REQUIRES_VIRT
__asm__(".arch_extension	virt");
@@ -49,6 +50,10 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
static struct vfp_hard_struct __percpu *kvm_host_vfp_state;
static unsigned long hyp_default_vectors;

/* The VMID used in the VTTBR */
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
static u8 kvm_next_vmid;
static DEFINE_SPINLOCK(kvm_vmid_lock);

int kvm_arch_hardware_enable(void *garbage)
{
@@ -276,6 +281,8 @@ int __attribute_const__ kvm_target_cpu(void)

int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
	/* Force users to call KVM_ARM_VCPU_INIT */
	vcpu->arch.target = -1;
	return 0;
}

@@ -286,6 +293,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
	vcpu->cpu = cpu;
	vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state);
}

void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -316,9 +324,199 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
	return 0;
}

/* Just ensure a guest exit from a particular CPU */
static void exit_vm_noop(void *info)
{
}

void force_vm_exit(const cpumask_t *mask)
{
	smp_call_function_many(mask, exit_vm_noop, NULL, true);
}

/**
 * need_new_vmid_gen - check that the VMID is still valid
 * @kvm: The VM's VMID to checkt
 *
 * return true if there is a new generation of VMIDs being used
 *
 * The hardware supports only 256 values with the value zero reserved for the
 * host, so we check if an assigned value belongs to a previous generation,
 * which which requires us to assign a new value. If we're the first to use a
 * VMID for the new generation, we must flush necessary caches and TLBs on all
 * CPUs.
 */
static bool need_new_vmid_gen(struct kvm *kvm)
{
	return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
}

/**
 * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
 * @kvm	The guest that we are about to run
 *
 * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
 * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
 * caches and TLBs.
 */
static void update_vttbr(struct kvm *kvm)
{
	phys_addr_t pgd_phys;
	u64 vmid;

	if (!need_new_vmid_gen(kvm))
		return;

	spin_lock(&kvm_vmid_lock);

	/*
	 * We need to re-check the vmid_gen here to ensure that if another vcpu
	 * already allocated a valid vmid for this vm, then this vcpu should
	 * use the same vmid.
	 */
	if (!need_new_vmid_gen(kvm)) {
		spin_unlock(&kvm_vmid_lock);
		return;
	}

	/* First user of a new VMID generation? */
	if (unlikely(kvm_next_vmid == 0)) {
		atomic64_inc(&kvm_vmid_gen);
		kvm_next_vmid = 1;

		/*
		 * On SMP we know no other CPUs can use this CPU's or each
		 * other's VMID after force_vm_exit returns since the
		 * kvm_vmid_lock blocks them from reentry to the guest.
		 */
		force_vm_exit(cpu_all_mask);
		/*
		 * Now broadcast TLB + ICACHE invalidation over the inner
		 * shareable domain to make sure all data structures are
		 * clean.
		 */
		kvm_call_hyp(__kvm_flush_vm_context);
	}

	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
	kvm->arch.vmid = kvm_next_vmid;
	kvm_next_vmid++;

	/* update vttbr to be used with the new vmid */
	pgd_phys = virt_to_phys(kvm->arch.pgd);
	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
	kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
	kvm->arch.vttbr |= vmid;

	spin_unlock(&kvm_vmid_lock);
}

/*
 * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
 * proper exit to QEMU.
 */
static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
		       int exception_index)
{
	run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
	return 0;
}

static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
{
	if (likely(vcpu->arch.has_run_once))
		return 0;

	vcpu->arch.has_run_once = true;
	return 0;
}

/**
 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
 * @vcpu:	The VCPU pointer
 * @run:	The kvm_run structure pointer used for userspace state exchange
 *
 * This function is called through the VCPU_RUN ioctl called from user space. It
 * will execute VM code in a loop until the time slice for the process is used
 * or some emulation is needed from user space in which case the function will
 * return with return value 0 and with the kvm_run structure filled in with the
 * required data for the requested emulation.
 */
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
	return -EINVAL;
	int ret;
	sigset_t sigsaved;

	/* Make sure they initialize the vcpu with KVM_ARM_VCPU_INIT */
	if (unlikely(vcpu->arch.target < 0))
		return -ENOEXEC;

	ret = kvm_vcpu_first_run_init(vcpu);
	if (ret)
		return ret;

	if (vcpu->sigset_active)
		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);

	ret = 1;
	run->exit_reason = KVM_EXIT_UNKNOWN;
	while (ret > 0) {
		/*
		 * Check conditions before entering the guest
		 */
		cond_resched();

		update_vttbr(vcpu->kvm);

		local_irq_disable();

		/*
		 * Re-check atomic conditions
		 */
		if (signal_pending(current)) {
			ret = -EINTR;
			run->exit_reason = KVM_EXIT_INTR;
		}

		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
			local_irq_enable();
			continue;
		}

		/**************************************************************
		 * Enter the guest
		 */
		trace_kvm_entry(*vcpu_pc(vcpu));
		kvm_guest_enter();
		vcpu->mode = IN_GUEST_MODE;

		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);

		vcpu->mode = OUTSIDE_GUEST_MODE;
		kvm_guest_exit();
		trace_kvm_exit(*vcpu_pc(vcpu));
		/*
		 * We may have taken a host interrupt in HYP mode (ie
		 * while executing the guest). This interrupt is still
		 * pending, as we haven't serviced it yet!
		 *
		 * We're now back in SVC mode, with interrupts
		 * disabled.  Enabling the interrupts now will have
		 * the effect of taking the interrupt again, in SVC
		 * mode this time.
		 */
		local_irq_enable();

		/*
		 * Back from guest
		 *************************************************************/

		ret = handle_exit(vcpu, run, ret);
	}

	if (vcpu->sigset_active)
		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
	return ret;
}

static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
+393 −3
Original line number Diff line number Diff line
@@ -20,9 +20,12 @@
#include <linux/const.h>
#include <asm/unified.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
#include <asm/vfpmacros.h>
#include "interrupts_head.S"

	.text

@@ -31,23 +34,164 @@ __kvm_hyp_code_start:

/********************************************************************
 * Flush per-VMID TLBs
 *
 * void __kvm_tlb_flush_vmid(struct kvm *kvm);
 *
 * We rely on the hardware to broadcast the TLB invalidation to all CPUs
 * inside the inner-shareable domain (which is the case for all v7
 * implementations).  If we come across a non-IS SMP implementation, we'll
 * have to use an IPI based mechanism. Until then, we stick to the simple
 * hardware assisted version.
 */
ENTRY(__kvm_tlb_flush_vmid)
	push	{r2, r3}

	add	r0, r0, #KVM_VTTBR
	ldrd	r2, r3, [r0]
	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
	isb
	mcr     p15, 0, r0, c8, c3, 0	@ TLBIALLIS (rt ignored)
	dsb
	isb
	mov	r2, #0
	mov	r3, #0
	mcrr	p15, 6, r2, r3, c2	@ Back to VMID #0
	isb				@ Not necessary if followed by eret

	pop	{r2, r3}
	bx	lr
ENDPROC(__kvm_tlb_flush_vmid)

/********************************************************************
 * Flush TLBs and instruction caches of current CPU for all VMIDs
 * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
 * domain, for all VMIDs
 *
 * void __kvm_flush_vm_context(void);
 */
ENTRY(__kvm_flush_vm_context)
	mov	r0, #0			@ rn parameter for c15 flushes is SBZ

	/* Invalidate NS Non-Hyp TLB Inner Shareable (TLBIALLNSNHIS) */
	mcr     p15, 4, r0, c8, c3, 4
	/* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
	mcr     p15, 0, r0, c7, c1, 0
	dsb
	isb				@ Not necessary if followed by eret

	bx	lr
ENDPROC(__kvm_flush_vm_context)


/********************************************************************
 *  Hypervisor world-switch code
 *
 *
 * int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 */
ENTRY(__kvm_vcpu_run)
	bx	lr
	@ Save the vcpu pointer
	mcr	p15, 4, vcpu, c13, c0, 2	@ HTPIDR

	save_host_regs

	@ Store hardware CP15 state and load guest state
	read_cp15_state store_to_vcpu = 0
	write_cp15_state read_from_vcpu = 1

	@ If the host kernel has not been configured with VFPv3 support,
	@ then it is safer if we deny guests from using it as well.
#ifdef CONFIG_VFPv3
	@ Set FPEXC_EN so the guest doesn't trap floating point instructions
	VFPFMRX r2, FPEXC		@ VMRS
	push	{r2}
	orr	r2, r2, #FPEXC_EN
	VFPFMXR FPEXC, r2		@ VMSR
#endif

	@ Configure Hyp-role
	configure_hyp_role vmentry

	@ Trap coprocessor CRx accesses
	set_hstr vmentry
	set_hcptr vmentry, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
	set_hdcr vmentry

	@ Write configured ID register into MIDR alias
	ldr	r1, [vcpu, #VCPU_MIDR]
	mcr	p15, 4, r1, c0, c0, 0

	@ Write guest view of MPIDR into VMPIDR
	ldr	r1, [vcpu, #CP15_OFFSET(c0_MPIDR)]
	mcr	p15, 4, r1, c0, c0, 5

	@ Set up guest memory translation
	ldr	r1, [vcpu, #VCPU_KVM]
	add	r1, r1, #KVM_VTTBR
	ldrd	r2, r3, [r1]
	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR

	@ We're all done, just restore the GPRs and go to the guest
	restore_guest_regs
	clrex				@ Clear exclusive monitor
	eret

__kvm_vcpu_return:
	/*
	 * return convention:
	 * guest r0, r1, r2 saved on the stack
	 * r0: vcpu pointer
	 * r1: exception code
	 */
	save_guest_regs

	@ Set VMID == 0
	mov	r2, #0
	mov	r3, #0
	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR

	@ Don't trap coprocessor accesses for host kernel
	set_hstr vmexit
	set_hdcr vmexit
	set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))

#ifdef CONFIG_VFPv3
	@ Save floating point registers we if let guest use them.
	tst	r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
	bne	after_vfp_restore

	@ Switch VFP/NEON hardware state to the host's
	add	r7, vcpu, #VCPU_VFP_GUEST
	store_vfp_state r7
	add	r7, vcpu, #VCPU_VFP_HOST
	ldr	r7, [r7]
	restore_vfp_state r7

after_vfp_restore:
	@ Restore FPEXC_EN which we clobbered on entry
	pop	{r2}
	VFPFMXR FPEXC, r2
#endif

	@ Reset Hyp-role
	configure_hyp_role vmexit

	@ Let host read hardware MIDR
	mrc	p15, 0, r2, c0, c0, 0
	mcr	p15, 4, r2, c0, c0, 0

	@ Back to hardware MPIDR
	mrc	p15, 0, r2, c0, c0, 5
	mcr	p15, 4, r2, c0, c0, 5

	@ Store guest CP15 state and restore host state
	read_cp15_state store_to_vcpu = 1
	write_cp15_state read_from_vcpu = 0

	restore_host_regs
	clrex				@ Clear exclusive monitor
	mov	r0, r1			@ Return the return code
	mov	r1, #0			@ Clear upper bits in return value
	bx	lr			@ return to IOCTL

/********************************************************************
 *  Call function in Hyp mode
@@ -77,12 +221,258 @@ ENTRY(kvm_call_hyp)

/********************************************************************
 * Hypervisor exception vector and handlers
 *
 *
 * The KVM/ARM Hypervisor ABI is defined as follows:
 *
 * Entry to Hyp mode from the host kernel will happen _only_ when an HVC
 * instruction is issued since all traps are disabled when running the host
 * kernel as per the Hyp-mode initialization at boot time.
 *
 * HVC instructions cause a trap to the vector page + offset 0x18 (see hyp_hvc
 * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
 * host kernel) and they cause a trap to the vector page + offset 0xc when HVC
 * instructions are called from within Hyp-mode.
 *
 * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
 *    Switching to Hyp mode is done through a simple HVC #0 instruction. The
 *    exception vector code will check that the HVC comes from VMID==0 and if
 *    so will push the necessary state (SPSR, lr_usr) on the Hyp stack.
 *    - r0 contains a pointer to a HYP function
 *    - r1, r2, and r3 contain arguments to the above function.
 *    - The HYP function will be called with its arguments in r0, r1 and r2.
 *    On HYP function return, we return directly to SVC.
 *
 * Note that the above is used to execute code in Hyp-mode from a host-kernel
 * point of view, and is a different concept from performing a world-switch and
 * executing guest code SVC mode (with a VMID != 0).
 */

/* Handle undef, svc, pabt, or dabt by crashing with a user notice */
.macro bad_exception exception_code, panic_str
	push	{r0-r2}
	mrrc	p15, 6, r0, r1, c2	@ Read VTTBR
	lsr	r1, r1, #16
	ands	r1, r1, #0xff
	beq	99f

	load_vcpu			@ Load VCPU pointer
	.if \exception_code == ARM_EXCEPTION_DATA_ABORT
	mrc	p15, 4, r2, c5, c2, 0	@ HSR
	mrc	p15, 4, r1, c6, c0, 0	@ HDFAR
	str	r2, [vcpu, #VCPU_HSR]
	str	r1, [vcpu, #VCPU_HxFAR]
	.endif
	.if \exception_code == ARM_EXCEPTION_PREF_ABORT
	mrc	p15, 4, r2, c5, c2, 0	@ HSR
	mrc	p15, 4, r1, c6, c0, 2	@ HIFAR
	str	r2, [vcpu, #VCPU_HSR]
	str	r1, [vcpu, #VCPU_HxFAR]
	.endif
	mov	r1, #\exception_code
	b	__kvm_vcpu_return

	@ We were in the host already. Let's craft a panic-ing return to SVC.
99:	mrs	r2, cpsr
	bic	r2, r2, #MODE_MASK
	orr	r2, r2, #SVC_MODE
THUMB(	orr	r2, r2, #PSR_T_BIT	)
	msr	spsr_cxsf, r2
	mrs	r1, ELR_hyp
	ldr	r2, =BSYM(panic)
	msr	ELR_hyp, r2
	ldr	r0, =\panic_str
	eret
.endm

	.text

	.align 5
__kvm_hyp_vector:
	.globl __kvm_hyp_vector
	nop

	@ Hyp-mode exception vector
	W(b)	hyp_reset
	W(b)	hyp_undef
	W(b)	hyp_svc
	W(b)	hyp_pabt
	W(b)	hyp_dabt
	W(b)	hyp_hvc
	W(b)	hyp_irq
	W(b)	hyp_fiq

	.align
hyp_reset:
	b	hyp_reset

	.align
hyp_undef:
	bad_exception ARM_EXCEPTION_UNDEFINED, und_die_str

	.align
hyp_svc:
	bad_exception ARM_EXCEPTION_HVC, svc_die_str

	.align
hyp_pabt:
	bad_exception ARM_EXCEPTION_PREF_ABORT, pabt_die_str

	.align
hyp_dabt:
	bad_exception ARM_EXCEPTION_DATA_ABORT, dabt_die_str

	.align
hyp_hvc:
	/*
	 * Getting here is either becuase of a trap from a guest or from calling
	 * HVC from the host kernel, which means "switch to Hyp mode".
	 */
	push	{r0, r1, r2}

	@ Check syndrome register
	mrc	p15, 4, r1, c5, c2, 0	@ HSR
	lsr	r0, r1, #HSR_EC_SHIFT
#ifdef CONFIG_VFPv3
	cmp	r0, #HSR_EC_CP_0_13
	beq	switch_to_guest_vfp
#endif
	cmp	r0, #HSR_EC_HVC
	bne	guest_trap		@ Not HVC instr.

	/*
	 * Let's check if the HVC came from VMID 0 and allow simple
	 * switch to Hyp mode
	 */
	mrrc    p15, 6, r0, r2, c2
	lsr     r2, r2, #16
	and     r2, r2, #0xff
	cmp     r2, #0
	bne	guest_trap		@ Guest called HVC

host_switch_to_hyp:
	pop	{r0, r1, r2}

	push	{lr}
	mrs	lr, SPSR
	push	{lr}

	mov	lr, r0
	mov	r0, r1
	mov	r1, r2
	mov	r2, r3

THUMB(	orr	lr, #1)
	blx	lr			@ Call the HYP function

	pop	{lr}
	msr	SPSR_csxf, lr
	pop	{lr}
	eret

guest_trap:
	load_vcpu			@ Load VCPU pointer to r0
	str	r1, [vcpu, #VCPU_HSR]

	@ Check if we need the fault information
	lsr	r1, r1, #HSR_EC_SHIFT
	cmp	r1, #HSR_EC_IABT
	mrceq	p15, 4, r2, c6, c0, 2	@ HIFAR
	beq	2f
	cmp	r1, #HSR_EC_DABT
	bne	1f
	mrc	p15, 4, r2, c6, c0, 0	@ HDFAR

2:	str	r2, [vcpu, #VCPU_HxFAR]

	/*
	 * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode:
	 *
	 * Abort on the stage 2 translation for a memory access from a
	 * Non-secure PL1 or PL0 mode:
	 *
	 * For any Access flag fault or Translation fault, and also for any
	 * Permission fault on the stage 2 translation of a memory access
	 * made as part of a translation table walk for a stage 1 translation,
	 * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR
	 * is UNKNOWN.
	 */

	/* Check for permission fault, and S1PTW */
	mrc	p15, 4, r1, c5, c2, 0	@ HSR
	and	r0, r1, #HSR_FSC_TYPE
	cmp	r0, #FSC_PERM
	tsteq	r1, #(1 << 7)		@ S1PTW
	mrcne	p15, 4, r2, c6, c0, 4	@ HPFAR
	bne	3f

	/* Resolve IPA using the xFAR */
	mcr	p15, 0, r2, c7, c8, 0	@ ATS1CPR
	isb
	mrrc	p15, 0, r0, r1, c7	@ PAR
	tst	r0, #1
	bne	4f			@ Failed translation
	ubfx	r2, r0, #12, #20
	lsl	r2, r2, #4
	orr	r2, r2, r1, lsl #24

3:	load_vcpu			@ Load VCPU pointer to r0
	str	r2, [r0, #VCPU_HPFAR]

1:	mov	r1, #ARM_EXCEPTION_HVC
	b	__kvm_vcpu_return

4:	pop	{r0, r1, r2}		@ Failed translation, return to guest
	eret

/*
 * If VFPv3 support is not available, then we will not switch the VFP
 * registers; however cp10 and cp11 accesses will still trap and fallback
 * to the regular coprocessor emulation code, which currently will
 * inject an undefined exception to the guest.
 */
#ifdef CONFIG_VFPv3
switch_to_guest_vfp:
	load_vcpu			@ Load VCPU pointer to r0
	push	{r3-r7}

	@ NEON/VFP used.  Turn on VFP access.
	set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11))

	@ Switch VFP/NEON hardware state to the guest's
	add	r7, r0, #VCPU_VFP_HOST
	ldr	r7, [r7]
	store_vfp_state r7
	add	r7, r0, #VCPU_VFP_GUEST
	restore_vfp_state r7

	pop	{r3-r7}
	pop	{r0-r2}
	eret
#endif

	.align
hyp_irq:
	push	{r0, r1, r2}
	mov	r1, #ARM_EXCEPTION_IRQ
	load_vcpu			@ Load VCPU pointer to r0
	b	__kvm_vcpu_return

	.align
hyp_fiq:
	b	hyp_fiq

	.ltorg

__kvm_hyp_code_end:
	.globl	__kvm_hyp_code_end

	.section ".rodata"

und_die_str:
	.ascii	"unexpected undefined exception in Hyp mode at: %#08x"
pabt_die_str:
	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x"
dabt_die_str:
	.ascii	"unexpected data abort in Hyp mode at: %#08x"
svc_die_str:
	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x"
Loading