Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6d6ab940 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge branch 'kvm-ppc-next' of...

Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

 into HEAD

Apart from various bugfixes and code cleanups, the major new feature
is the ability to run guests using the hashed page table (HPT) MMU
mode on a host that is using the radix MMU mode.  Because of limitations
in the current POWER9 chip (all SMT threads in each core must use the
same MMU mode, HPT or radix), this requires the host to be configured
to run similar to POWER8: the host runs in single-threaded mode (only
thread 0 of each core online), and have KVM be able to wake up the other
threads when a KVM guest is to be run, and use the other threads for
running guest VCPUs.  A new module parameter, called "indep_threads_mode",
is normally Y on POWER9 but must be set to N before any HPT guests can
be run on a radix host:

    # echo N >/sys/module/kvm_hv/parameters/indep_threads_mode
    # ppc64_cpu --smt=off

Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parents 9ffd986c c0101509
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -216,7 +216,8 @@ extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa,
			bool writing, bool *writable);
extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
			unsigned long *rmap, long pte_index, int realmode);
extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize);
extern void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot,
			unsigned long gfn, unsigned long psize);
extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
			unsigned long pte_index);
void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
+119 −21
Original line number Diff line number Diff line
@@ -20,6 +20,8 @@
#ifndef __ASM_KVM_BOOK3S_64_H__
#define __ASM_KVM_BOOK3S_64_H__

#include <linux/string.h>
#include <asm/bitops.h>
#include <asm/book3s/64/mmu-hash.h>

/* Power architecture requires HPT is at least 256kiB, at most 64TiB */
@@ -107,18 +109,96 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
	hpte[0] = cpu_to_be64(hpte_v);
}

/*
 * These functions encode knowledge of the POWER7/8/9 hardware
 * interpretations of the HPTE LP (large page size) field.
 */
static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
{
	unsigned int lphi;

	if (!(h & HPTE_V_LARGE))
		return 12;	/* 4kB */
	lphi = (l >> 16) & 0xf;
	switch ((l >> 12) & 0xf) {
	case 0:
		return !lphi ? 24 : -1;		/* 16MB */
		break;
	case 1:
		return 16;			/* 64kB */
		break;
	case 3:
		return !lphi ? 34 : -1;		/* 16GB */
		break;
	case 7:
		return (16 << 8) + 12;		/* 64kB in 4kB */
		break;
	case 8:
		if (!lphi)
			return (24 << 8) + 16;	/* 16MB in 64kkB */
		if (lphi == 3)
			return (24 << 8) + 12;	/* 16MB in 4kB */
		break;
	}
	return -1;
}

static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
{
	return kvmppc_hpte_page_shifts(h, l) & 0xff;
}

static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l)
{
	int tmp = kvmppc_hpte_page_shifts(h, l);

	if (tmp >= 0x100)
		tmp >>= 8;
	return tmp;
}

static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
{
	return 1ul << kvmppc_hpte_actual_page_shift(v, r);
}

static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
{
	switch (base_shift) {
	case 12:
		switch (actual_shift) {
		case 12:
			return 0;
		case 16:
			return 7;
		case 24:
			return 0x38;
		}
		break;
	case 16:
		switch (actual_shift) {
		case 16:
			return 1;
		case 24:
			return 8;
		}
		break;
	case 24:
		return 0;
	}
	return -1;
}

static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
					     unsigned long pte_index)
{
	int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
	unsigned int penc;
	int a_pgshift, b_pgshift;
	unsigned long rb = 0, va_low, sllp;
	unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);

	if (v & HPTE_V_LARGE) {
		i = hpte_page_sizes[lp];
		b_psize = i & 0xf;
		a_psize = i >> 4;
	b_pgshift = a_pgshift = kvmppc_hpte_page_shifts(v, r);
	if (a_pgshift >= 0x100) {
		b_pgshift &= 0xff;
		a_pgshift >>= 8;
	}

	/*
@@ -152,37 +232,33 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
		va_low ^= v >> (SID_SHIFT_1T - 16);
	va_low &= 0x7ff;

	switch (b_psize) {
	case MMU_PAGE_4K:
		sllp = get_sllp_encoding(a_psize);
	if (b_pgshift == 12) {
		if (a_pgshift > 12) {
			sllp = (a_pgshift == 16) ? 5 : 4;
			rb |= sllp << 5;	/*  AP field */
		}
		rb |= (va_low & 0x7ff) << 12;	/* remaining 11 bits of AVA */
		break;
	default:
	{
	} else {
		int aval_shift;
		/*
		 * remaining bits of AVA/LP fields
		 * Also contain the rr bits of LP
		 */
		rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000;
		rb |= (va_low << b_pgshift) & 0x7ff000;
		/*
		 * Now clear not needed LP bits based on actual psize
		 */
		rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1);
		rb &= ~((1ul << a_pgshift) - 1);
		/*
		 * AVAL field 58..77 - base_page_shift bits of va
		 * we have space for 58..64 bits, Missing bits should
		 * be zero filled. +1 is to take care of L bit shift
		 */
		aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1;
		aval_shift = 64 - (77 - b_pgshift) + 1;
		rb |= ((va_low << aval_shift) & 0xfe);

		rb |= 1;		/* L field */
		penc = mmu_psize_defs[b_psize].penc[a_psize];
		rb |= penc << 12;	/* LP field */
		break;
	}
		rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */
	}
	rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8;	/* B field */
	return rb;
@@ -370,6 +446,28 @@ static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt)
	return (1UL << (hpt->order - 7)) - 1;
}

/* Set bits in a dirty bitmap, which is in LE format */
static inline void set_dirty_bits(unsigned long *map, unsigned long i,
				  unsigned long npages)
{

	if (npages >= 8)
		memset((char *)map + i / 8, 0xff, npages / 8);
	else
		for (; npages; ++i, --npages)
			__set_bit_le(i, map);
}

static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i,
					 unsigned long npages)
{
	if (npages >= 8)
		memset((char *)map + i / 8, 0xff, npages / 8);
	else
		for (; npages; ++i, --npages)
			set_bit_le(i, map);
}

#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */

#endif /* __ASM_KVM_BOOK3S_64_H__ */
+12 −5
Original line number Diff line number Diff line
@@ -82,6 +82,16 @@ struct kvm_split_mode {
	u8		do_nap;
	u8		napped[MAX_SMT_THREADS];
	struct kvmppc_vcore *vc[MAX_SUBCORES];
	/* Bits for changing lpcr on P9 */
	unsigned long	lpcr_req;
	unsigned long	lpidr_req;
	unsigned long	host_lpcr;
	u32		do_set;
	u32		do_restore;
	union {
		u32	allphases;
		u8	phase[4];
	} lpcr_sync;
};

/*
@@ -104,14 +114,11 @@ struct kvmppc_host_state {
	u8 napping;

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
	/*
	 * hwthread_req/hwthread_state pair is used to pull sibling threads
	 * out of guest on pre-ISAv3.0B CPUs where threads share MMU.
	 */
	u8 hwthread_req;
	u8 hwthread_state;
	u8 host_ipi;
	u8 ptid;
	u8 ptid;		/* thread number within subcore when split */
	u8 tid;			/* thread number within whole core */
	struct kvm_vcpu *kvm_vcpu;
	struct kvmppc_vcore *kvm_vcore;
	void __iomem *xics_phys;
+2 −4
Original line number Diff line number Diff line
@@ -235,10 +235,7 @@ struct revmap_entry {
 */
#define KVMPPC_RMAP_LOCK_BIT	63
#define KVMPPC_RMAP_RC_SHIFT	32
#define KVMPPC_RMAP_CHG_SHIFT	48
#define KVMPPC_RMAP_REFERENCED	(HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHANGED	(HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHG_ORDER	(0x3ful << KVMPPC_RMAP_CHG_SHIFT)
#define KVMPPC_RMAP_PRESENT	0x100000000ul
#define KVMPPC_RMAP_INDEX	0xfffffffful

@@ -276,7 +273,7 @@ struct kvm_arch {
	int tlbie_lock;
	unsigned long lpcr;
	unsigned long vrma_slb_v;
	int hpte_setup_done;
	int mmu_ready;
	atomic_t vcpus_running;
	u32 online_vcores;
	atomic_t hpte_mod_interest;
@@ -284,6 +281,7 @@ struct kvm_arch {
	cpumask_t cpu_in_guest;
	u8 radix;
	u8 fwnmi_enabled;
	bool threads_indep;
	pgd_t *pgtable;
	u64 process_table;
	struct dentry *debugfs_dir;
+3 −0
Original line number Diff line number Diff line
@@ -168,6 +168,7 @@ extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
extern void kvmppc_rmap_reset(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
				struct kvm_userspace_memory_region *mem);
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
@@ -177,6 +178,8 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
		struct iommu_group *grp);
extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
		struct iommu_group *grp);
extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);

extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
				struct kvm_create_spapr_tce_64 *args);
Loading