Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8a5de182 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm

Pull second batch of changes for KVM/{arm,arm64} from Marc Zyngier:
 "The most obvious thing is the sizeable MMU changes to support 48bit
  VAs on arm64.

  Summary:

   - support for 48bit IPA and VA (EL2)
   - a number of fixes for devices mapped into guests
   - yet another VGIC fix for BE
   - a fix for CPU hotplug
   - a few compile fixes (disabled VGIC, strict mm checks)"

[ I'm pulling directly from Marc at the request of Paolo Bonzini, whose
  backpack was stolen at Düsseldorf airport and will do new keys and
  rebuild his web of trust.    - Linus ]

* tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm:
  arm/arm64: KVM: Fix BE accesses to GICv2 EISR and ELRSR regs
  arm: kvm: STRICT_MM_TYPECHECKS fix for user_mem_abort
  arm/arm64: KVM: Ensure memslots are within KVM_PHYS_SIZE
  arm64: KVM: Implement 48 VA support for KVM EL2 and Stage-2
  arm/arm64: KVM: map MMIO regions at creation time
  arm64: kvm: define PAGE_S2_DEVICE as read-only by default
  ARM: kvm: define PAGE_S2_DEVICE as read-only by default
  arm/arm64: KVM: add 'writable' parameter to kvm_phys_addr_ioremap
  arm/arm64: KVM: fix potential NULL dereference in user_mem_abort()
  arm/arm64: KVM: use __GFP_ZERO not memset() to get zeroed pages
  ARM: KVM: fix vgic-disabled build
  arm: kvm: fix CPU hotplug
parents 857b50f5 2df36a5d
Loading
Loading
Loading
Loading
+27 −4
Original line number Diff line number Diff line
@@ -37,6 +37,11 @@
 */
#define TRAMPOLINE_VA		UL(CONFIG_VECTORS_BASE)

/*
 * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
 */
#define KVM_MMU_CACHE_MIN_PAGES	2

#ifndef __ASSEMBLY__

#include <asm/cacheflush.h>
@@ -50,7 +55,7 @@ void free_hyp_pgds(void);
int kvm_alloc_stage2_pgd(struct kvm *kvm);
void kvm_free_stage2_pgd(struct kvm *kvm);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
			  phys_addr_t pa, unsigned long size);
			  phys_addr_t pa, unsigned long size, bool writable);

int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);

@@ -83,6 +88,11 @@ static inline void kvm_clean_pgd(pgd_t *pgd)
	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
}

static inline void kvm_clean_pmd(pmd_t *pmd)
{
	clean_dcache_area(pmd, PTRS_PER_PMD * sizeof(pmd_t));
}

static inline void kvm_clean_pmd_entry(pmd_t *pmd)
{
	clean_pmd_entry(pmd);
@@ -123,10 +133,23 @@ static inline bool kvm_page_empty(void *ptr)
}


#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
#define kvm_pud_table_empty(pudp) (0)
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
#define kvm_pud_table_empty(kvm, pudp) (0)

#define KVM_PREALLOC_LEVEL	0

static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
{
	return 0;
}

static inline void kvm_free_hwpgd(struct kvm *kvm) { }

static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
	return kvm->arch.pgd;
}

struct kvm;

+1 −1
Original line number Diff line number Diff line
@@ -100,7 +100,7 @@ extern pgprot_t pgprot_s2_device;
#define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_HYP)
#define PAGE_HYP_DEVICE		_MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
#define PAGE_S2			_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR)
#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY)

#define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
#define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
+3 −2
Original line number Diff line number Diff line
@@ -409,7 +409,7 @@ static void update_vttbr(struct kvm *kvm)
	kvm_next_vmid++;

	/* update vttbr to be used with the new vmid */
	pgd_phys = virt_to_phys(kvm->arch.pgd);
	pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
	kvm->arch.vttbr = pgd_phys | vmid;
@@ -808,6 +808,7 @@ static int hyp_init_cpu_notify(struct notifier_block *self,
	switch (action) {
	case CPU_STARTING:
	case CPU_STARTING_FROZEN:
		if (__hyp_get_vectors() == hyp_default_vectors)
			cpu_init_hyp_mode(NULL);
		break;
	}
+7 −0
Original line number Diff line number Diff line
@@ -433,10 +433,17 @@ ARM_BE8(rev r10, r10 )
	str	r3, [r11, #VGIC_V2_CPU_HCR]
	str	r4, [r11, #VGIC_V2_CPU_VMCR]
	str	r5, [r11, #VGIC_V2_CPU_MISR]
#ifdef CONFIG_CPU_ENDIAN_BE8
	str	r6, [r11, #(VGIC_V2_CPU_EISR + 4)]
	str	r7, [r11, #VGIC_V2_CPU_EISR]
	str	r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
	str	r9, [r11, #VGIC_V2_CPU_ELRSR]
#else
	str	r6, [r11, #VGIC_V2_CPU_EISR]
	str	r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
	str	r8, [r11, #VGIC_V2_CPU_ELRSR]
	str	r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
#endif
	str	r10, [r11, #VGIC_V2_CPU_APR]

	/* Clear GICH_HCR */
+197 −38
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;

#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))

#define kvm_pmd_huge(_x)	(pmd_huge(_x) || pmd_trans_huge(_x))

@@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
		}
	} while (pte++, addr += PAGE_SIZE, addr != end);

	if (kvm_pte_table_empty(start_pte))
	if (kvm_pte_table_empty(kvm, start_pte))
		clear_pmd_entry(kvm, pmd, start_addr);
}

@@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
		}
	} while (pmd++, addr = next, addr != end);

	if (kvm_pmd_table_empty(start_pmd))
	if (kvm_pmd_table_empty(kvm, start_pmd))
		clear_pud_entry(kvm, pud, start_addr);
}

@@ -182,7 +182,7 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
		}
	} while (pud++, addr = next, addr != end);

	if (kvm_pud_table_empty(start_pud))
	if (kvm_pud_table_empty(kvm, start_pud))
		clear_pgd_entry(kvm, pgd, start_addr);
}

@@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void)
	if (boot_hyp_pgd) {
		unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
		unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
		free_pages((unsigned long)boot_hyp_pgd, pgd_order);
		free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
		boot_hyp_pgd = NULL;
	}

@@ -343,7 +343,7 @@ void free_hyp_pgds(void)
		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
			unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);

		free_pages((unsigned long)hyp_pgd, pgd_order);
		free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
		hyp_pgd = NULL;
	}

@@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
	return 0;
}

static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
				   unsigned long end, unsigned long pfn,
				   pgprot_t prot)
{
	pud_t *pud;
	pmd_t *pmd;
	unsigned long addr, next;
	int ret;

	addr = start;
	do {
		pud = pud_offset(pgd, addr);

		if (pud_none_or_clear_bad(pud)) {
			pmd = pmd_alloc_one(NULL, addr);
			if (!pmd) {
				kvm_err("Cannot allocate Hyp pmd\n");
				return -ENOMEM;
			}
			pud_populate(NULL, pud, pmd);
			get_page(virt_to_page(pud));
			kvm_flush_dcache_to_poc(pud, sizeof(*pud));
		}

		next = pud_addr_end(addr, end);
		ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
		if (ret)
			return ret;
		pfn += (next - addr) >> PAGE_SHIFT;
	} while (addr = next, addr != end);

	return 0;
}

static int __create_hyp_mappings(pgd_t *pgdp,
				 unsigned long start, unsigned long end,
				 unsigned long pfn, pgprot_t prot)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	unsigned long addr, next;
	int err = 0;

@@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp,
	end = PAGE_ALIGN(end);
	do {
		pgd = pgdp + pgd_index(addr);
		pud = pud_offset(pgd, addr);

		if (pud_none_or_clear_bad(pud)) {
			pmd = pmd_alloc_one(NULL, addr);
			if (!pmd) {
				kvm_err("Cannot allocate Hyp pmd\n");
		if (pgd_none(*pgd)) {
			pud = pud_alloc_one(NULL, addr);
			if (!pud) {
				kvm_err("Cannot allocate Hyp pud\n");
				err = -ENOMEM;
				goto out;
			}
			pud_populate(NULL, pud, pmd);
			get_page(virt_to_page(pud));
			kvm_flush_dcache_to_poc(pud, sizeof(*pud));
			pgd_populate(NULL, pgd, pud);
			get_page(virt_to_page(pgd));
			kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
		}

		next = pgd_addr_end(addr, end);
		err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
		err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
		if (err)
			goto out;
		pfn += (next - addr) >> PAGE_SHIFT;
@@ -521,6 +553,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 */
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
	int ret;
	pgd_t *pgd;

	if (kvm->arch.pgd != NULL) {
@@ -528,15 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
		return -EINVAL;
	}

	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
	if (KVM_PREALLOC_LEVEL > 0) {
		/*
		 * Allocate fake pgd for the page table manipulation macros to
		 * work.  This is not used by the hardware and we have no
		 * alignment requirement for this allocation.
		 */
		pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
				       GFP_KERNEL | __GFP_ZERO);
	} else {
		/*
		 * Allocate actual first-level Stage-2 page table used by the
		 * hardware for Stage-2 page table walks.
		 */
		pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
	}

	if (!pgd)
		return -ENOMEM;

	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
	ret = kvm_prealloc_hwpgd(kvm, pgd);
	if (ret)
		goto out_err;

	kvm_clean_pgd(pgd);
	kvm->arch.pgd = pgd;

	return 0;
out_err:
	if (KVM_PREALLOC_LEVEL > 0)
		kfree(pgd);
	else
		free_pages((unsigned long)pgd, S2_PGD_ORDER);
	return ret;
}

/**
@@ -572,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
		return;

	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
	kvm_free_hwpgd(kvm);
	if (KVM_PREALLOC_LEVEL > 0)
		kfree(kvm->arch.pgd);
	else
		free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
	kvm->arch.pgd = NULL;
}

static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
			     phys_addr_t addr)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	pgd = kvm->arch.pgd + pgd_index(addr);
	pud = pud_offset(pgd, addr);
	if (WARN_ON(pgd_none(*pgd))) {
		if (!cache)
			return NULL;
		pud = mmu_memory_cache_alloc(cache);
		pgd_populate(NULL, pgd, pud);
		get_page(virt_to_page(pgd));
	}

	return pud_offset(pgd, addr);
}

static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
			     phys_addr_t addr)
{
	pud_t *pud;
	pmd_t *pmd;

	pud = stage2_get_pud(kvm, cache, addr);
	if (pud_none(*pud)) {
		if (!cache)
			return NULL;
@@ -630,7 +706,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
	pmd_t *pmd;
	pte_t *pte, old_pte;

	/* Create stage-2 page table mapping - Level 1 */
	/* Create stage-2 page table mapping - Levels 0 and 1 */
	pmd = stage2_get_pmd(kvm, cache, addr);
	if (!pmd) {
		/*
@@ -675,7 +751,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
 * @size:	The size of the mapping
 */
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
			  phys_addr_t pa, unsigned long size)
			  phys_addr_t pa, unsigned long size, bool writable)
{
	phys_addr_t addr, end;
	int ret = 0;
@@ -688,7 +764,11 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
	for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);

		ret = mmu_topup_memory_cache(&cache, 2, 2);
		if (writable)
			kvm_set_s2pte_writable(&pte);

		ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
						KVM_NR_MEM_OBJS);
		if (ret)
			goto out;
		spin_lock(&kvm->mmu_lock);
@@ -777,6 +857,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	/* Let's check if we will get back a huge page backed by hugetlbfs */
	down_read(&current->mm->mmap_sem);
	vma = find_vma_intersection(current->mm, hva, hva + 1);
	if (unlikely(!vma)) {
		kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
		up_read(&current->mm->mmap_sem);
		return -EFAULT;
	}

	if (is_vm_hugetlb_page(vma)) {
		hugetlb = true;
		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
@@ -797,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	up_read(&current->mm->mmap_sem);

	/* We need minimum second+third level pages */
	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
	ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
				     KVM_NR_MEM_OBJS);
	if (ret)
		return ret;

@@ -843,7 +930,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		}
		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
				     mem_type == PAGE_S2_DEVICE);
			pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
	}


@@ -916,6 +1003,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
		goto out_unlock;
	}

	/* Userspace should not be able to register out-of-bounds IPAs */
	VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);

	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
	if (ret == 0)
		ret = 1;
@@ -1072,8 +1162,8 @@ int kvm_mmu_init(void)
			 (unsigned long)phys_base);
	}

	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);

	if (!hyp_pgd || !boot_hyp_pgd) {
		kvm_err("Hyp mode PGD not allocated\n");
@@ -1126,13 +1216,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
				   const struct kvm_memory_slot *old,
				   enum kvm_mr_change change)
{
	gpa_t gpa = old->base_gfn << PAGE_SHIFT;
	phys_addr_t size = old->npages << PAGE_SHIFT;
	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
		spin_lock(&kvm->mmu_lock);
		unmap_stage2_range(kvm, gpa, size);
		spin_unlock(&kvm->mmu_lock);
	}
}

int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -1140,7 +1223,77 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
				   struct kvm_userspace_memory_region *mem,
				   enum kvm_mr_change change)
{
	hva_t hva = mem->userspace_addr;
	hva_t reg_end = hva + mem->memory_size;
	bool writable = !(mem->flags & KVM_MEM_READONLY);
	int ret = 0;

	if (change != KVM_MR_CREATE && change != KVM_MR_MOVE)
		return 0;

	/*
	 * Prevent userspace from creating a memory region outside of the IPA
	 * space addressable by the KVM guest IPA space.
	 */
	if (memslot->base_gfn + memslot->npages >=
	    (KVM_PHYS_SIZE >> PAGE_SHIFT))
		return -EFAULT;

	/*
	 * A memory region could potentially cover multiple VMAs, and any holes
	 * between them, so iterate over all of them to find out if we can map
	 * any of them right now.
	 *
	 *     +--------------------------------------------+
	 * +---------------+----------------+   +----------------+
	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
	 * +---------------+----------------+   +----------------+
	 *     |               memory region                |
	 *     +--------------------------------------------+
	 */
	do {
		struct vm_area_struct *vma = find_vma(current->mm, hva);
		hva_t vm_start, vm_end;

		if (!vma || vma->vm_start >= reg_end)
			break;

		/*
		 * Mapping a read-only VMA is only allowed if the
		 * memory region is configured as read-only.
		 */
		if (writable && !(vma->vm_flags & VM_WRITE)) {
			ret = -EPERM;
			break;
		}

		/*
		 * Take the intersection of this VMA with the memory region
		 */
		vm_start = max(hva, vma->vm_start);
		vm_end = min(reg_end, vma->vm_end);

		if (vma->vm_flags & VM_PFNMAP) {
			gpa_t gpa = mem->guest_phys_addr +
				    (vm_start - mem->userspace_addr);
			phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
					 vm_start - vma->vm_start;

			ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
						    vm_end - vm_start,
						    writable);
			if (ret)
				break;
		}
		hva = vm_end;
	} while (hva < reg_end);

	if (ret) {
		spin_lock(&kvm->mmu_lock);
		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
		spin_unlock(&kvm->mmu_lock);
	}
	return ret;
}

void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -1165,4 +1318,10 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
				   struct kvm_memory_slot *slot)
{
	gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
	phys_addr_t size = slot->npages << PAGE_SHIFT;

	spin_lock(&kvm->mmu_lock);
	unmap_stage2_range(kvm, gpa, size);
	spin_unlock(&kvm->mmu_lock);
}
Loading