Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit de56a948 authored by Paul Mackerras's avatar Paul Mackerras Committed by Avi Kivity
Browse files

KVM: PPC: Add support for Book3S processors in hypervisor mode



This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode.  Using hypervisor mode means
that the guest can use the processor's supervisor mode.  That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host.  This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.

This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses.  That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification.  In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.

Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.

This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.

With the guest running in supervisor mode, most exceptions go straight
to the guest.  We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest.  Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.

We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.

In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount.  Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.

The POWER7 processor has a restriction that all threads in a core have
to be in the same partition.  MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest.  At present we require the host and guest to run
in single-thread mode because of this hardware restriction.

This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA).  We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management.  This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.

This also adds a few new exports needed by the book3s_hv code.

Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarAlexander Graf <agraf@suse.de>
parent 3c42bf8a
Loading
Loading
Loading
Loading
+17 −0
Original line number Original line Diff line number Diff line
@@ -1532,6 +1532,23 @@ Userspace can now handle the hypercall and when it's done modify the gprs as
necessary. Upon guest entry all guest GPRs will then be replaced by the values
necessary. Upon guest entry all guest GPRs will then be replaced by the values
in this struct.
in this struct.


		/* KVM_EXIT_PAPR_HCALL */
		struct {
			__u64 nr;
			__u64 ret;
			__u64 args[9];
		} papr_hcall;

This is used on 64-bit PowerPC when emulating a pSeries partition,
e.g. with the 'pseries' machine type in qemu.  It occurs when the
guest does a hypercall using the 'sc 1' instruction.  The 'nr' field
contains the hypercall number (from the guest R3), and 'args' contains
the arguments (from the guest R4 - R12).  Userspace should put the
return code in 'ret' and any extra returned values in args[].
The possible hypercalls are defined in the Power Architecture Platform
Requirements (PAPR) document available from www.power.org (free
developer registration required to access it).

		/* Fix the size of the union. */
		/* Fix the size of the union. */
		char padding[256];
		char padding[256];
	};
	};
+15 −4
Original line number Original line Diff line number Diff line
@@ -134,6 +134,17 @@ do_kvm_##n: \
#define KVM_HANDLER_SKIP(area, h, n)
#define KVM_HANDLER_SKIP(area, h, n)
#endif
#endif


#ifdef CONFIG_KVM_BOOK3S_PR
#define KVMTEST_PR(n)			__KVMTEST(n)
#define KVM_HANDLER_PR(area, h, n)	__KVM_HANDLER(area, h, n)
#define KVM_HANDLER_PR_SKIP(area, h, n)	__KVM_HANDLER_SKIP(area, h, n)

#else
#define KVMTEST_PR(n)
#define KVM_HANDLER_PR(area, h, n)
#define KVM_HANDLER_PR_SKIP(area, h, n)
#endif

#define NOTEST(n)
#define NOTEST(n)


/*
/*
@@ -210,7 +221,7 @@ label##_pSeries: \
	HMT_MEDIUM;					\
	HMT_MEDIUM;					\
	SET_SCRATCH0(r13);		/* save r13 */		\
	SET_SCRATCH0(r13);		/* save r13 */		\
	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common,	\
	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common,	\
				 EXC_STD, KVMTEST, vec)
				 EXC_STD, KVMTEST_PR, vec)


#define STD_EXCEPTION_HV(loc, vec, label)		\
#define STD_EXCEPTION_HV(loc, vec, label)		\
	. = loc;					\
	. = loc;					\
@@ -227,8 +238,8 @@ label##_hv: \
	beq	masked_##h##interrupt
	beq	masked_##h##interrupt
#define _SOFTEN_TEST(h)	__SOFTEN_TEST(h)
#define _SOFTEN_TEST(h)	__SOFTEN_TEST(h)


#define SOFTEN_TEST(vec)						\
#define SOFTEN_TEST_PR(vec)						\
	KVMTEST(vec);							\
	KVMTEST_PR(vec);						\
	_SOFTEN_TEST(EXC_STD)
	_SOFTEN_TEST(EXC_STD)


#define SOFTEN_TEST_HV(vec)						\
#define SOFTEN_TEST_HV(vec)						\
@@ -248,7 +259,7 @@ label##_hv: \
	.globl label##_pSeries;						\
	.globl label##_pSeries;						\
label##_pSeries:							\
label##_pSeries:							\
	_MASKABLE_EXCEPTION_PSERIES(vec, label,				\
	_MASKABLE_EXCEPTION_PSERIES(vec, label,				\
				    EXC_STD, SOFTEN_TEST)
				    EXC_STD, SOFTEN_TEST_PR)


#define MASKABLE_EXCEPTION_HV(loc, vec, label)				\
#define MASKABLE_EXCEPTION_HV(loc, vec, label)				\
	. = loc;							\
	. = loc;							\
+4 −0
Original line number Original line Diff line number Diff line
@@ -64,8 +64,12 @@
#define BOOK3S_INTERRUPT_PROGRAM	0x700
#define BOOK3S_INTERRUPT_PROGRAM	0x700
#define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
#define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
#define BOOK3S_INTERRUPT_DECREMENTER	0x900
#define BOOK3S_INTERRUPT_DECREMENTER	0x900
#define BOOK3S_INTERRUPT_HV_DECREMENTER	0x980
#define BOOK3S_INTERRUPT_SYSCALL	0xc00
#define BOOK3S_INTERRUPT_SYSCALL	0xc00
#define BOOK3S_INTERRUPT_TRACE		0xd00
#define BOOK3S_INTERRUPT_TRACE		0xd00
#define BOOK3S_INTERRUPT_H_DATA_STORAGE	0xe00
#define BOOK3S_INTERRUPT_H_INST_STORAGE	0xe20
#define BOOK3S_INTERRUPT_H_EMUL_ASSIST	0xe40
#define BOOK3S_INTERRUPT_PERFMON	0xf00
#define BOOK3S_INTERRUPT_PERFMON	0xf00
#define BOOK3S_INTERRUPT_ALTIVEC	0xf20
#define BOOK3S_INTERRUPT_ALTIVEC	0xf20
#define BOOK3S_INTERRUPT_VSX		0xf40
#define BOOK3S_INTERRUPT_VSX		0xf40
+119 −18
Original line number Original line Diff line number Diff line
@@ -116,6 +116,7 @@ extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr);
extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr);
extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr);
extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
@@ -127,10 +128,12 @@ extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
extern int kvmppc_mmu_hpte_sysinit(void);
extern int kvmppc_mmu_hpte_sysinit(void);
extern void kvmppc_mmu_hpte_sysexit(void);
extern void kvmppc_mmu_hpte_sysexit(void);
extern int kvmppc_mmu_hv_init(void);


extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
			   bool upper, u32 val);
			   bool upper, u32 val);
extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
@@ -140,6 +143,7 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
extern void kvmppc_handler_lowmem_trampoline(void);
extern void kvmppc_handler_lowmem_trampoline(void);
extern void kvmppc_handler_trampoline_enter(void);
extern void kvmppc_handler_trampoline_enter(void);
extern void kvmppc_rmcall(ulong srr0, ulong srr1);
extern void kvmppc_rmcall(ulong srr0, ulong srr1);
extern void kvmppc_hv_entry_trampoline(void);
extern void kvmppc_load_up_fpu(void);
extern void kvmppc_load_up_fpu(void);
extern void kvmppc_load_up_altivec(void);
extern void kvmppc_load_up_altivec(void);
extern void kvmppc_load_up_vsx(void);
extern void kvmppc_load_up_vsx(void);
@@ -151,6 +155,19 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
	return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu);
	return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu);
}
}


extern void kvm_return_point(void);

/* Also add subarch specific defines */

#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
#include <asm/kvm_book3s_32.h>
#endif
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#include <asm/kvm_book3s_64.h>
#endif

#ifdef CONFIG_KVM_BOOK3S_PR

static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
{
{
	return to_book3s(vcpu)->hior;
	return to_book3s(vcpu)->hior;
@@ -165,16 +182,6 @@ static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
		vcpu->arch.shared->int_pending = 0;
		vcpu->arch.shared->int_pending = 0;
}
}


static inline ulong dsisr(void)
{
	ulong r;
	asm ( "mfdsisr %0 " : "=r" (r) );
	return r;
}

extern void kvm_return_point(void);
static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu);

static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
{
	if ( num < 14 ) {
	if ( num < 14 ) {
@@ -281,6 +288,108 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)


	return crit;
	return crit;
}
}
#else /* CONFIG_KVM_BOOK3S_PR */

static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
{
	return 0;
}

static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
			unsigned long pending_now, unsigned long old_pending)
{
	/* Recalculate LPCR:MER based on the presence of
	 * a pending external interrupt
	 */
	if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &pending_now) ||
	    test_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &pending_now))
		vcpu->arch.lpcr |= LPCR_MER;
	else
		vcpu->arch.lpcr &= ~((u64)LPCR_MER);
}

static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
	vcpu->arch.gpr[num] = val;
}

static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
{
	return vcpu->arch.gpr[num];
}

static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{
	vcpu->arch.cr = val;
}

static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.cr;
}

static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
{
	vcpu->arch.xer = val;
}

static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.xer;
}

static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
{
	vcpu->arch.ctr = val;
}

static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.ctr;
}

static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
{
	vcpu->arch.lr = val;
}

static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.lr;
}

static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
{
	vcpu->arch.pc = val;
}

static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.pc;
}

static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
{
	ulong pc = kvmppc_get_pc(vcpu);

	/* Load the instruction manually if it failed to do so in the
	 * exit path */
	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);

	return vcpu->arch.last_inst;
}

static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.fault_dar;
}

static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
{
	return false;
}
#endif


/* Magic register values loaded into r3 and r4 before the 'sc' assembly
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
 * instruction for the OSI hypercalls */
 * instruction for the OSI hypercalls */
@@ -289,12 +398,4 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)


#define INS_DCBZ			0x7c0007ec
#define INS_DCBZ			0x7c0007ec


/* Also add subarch specific defines */

#ifdef CONFIG_PPC_BOOK3S_32
#include <asm/kvm_book3s_32.h>
#else
#include <asm/kvm_book3s_64.h>
#endif

#endif /* __ASM_KVM_BOOK3S_H__ */
#endif /* __ASM_KVM_BOOK3S_H__ */
+2 −0
Original line number Original line Diff line number Diff line
@@ -20,9 +20,11 @@
#ifndef __ASM_KVM_BOOK3S_64_H__
#ifndef __ASM_KVM_BOOK3S_64_H__
#define __ASM_KVM_BOOK3S_64_H__
#define __ASM_KVM_BOOK3S_64_H__


#ifdef CONFIG_KVM_BOOK3S_PR
static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
{
{
	return &get_paca()->shadow_vcpu;
	return &get_paca()->shadow_vcpu;
}
}
#endif


#endif /* __ASM_KVM_BOOK3S_64_H__ */
#endif /* __ASM_KVM_BOOK3S_64_H__ */
Loading