Merge branch 'kvm-arm-next-3.13-2' into kvm-arm-next (e4b3c9c2) · Commits · e / devices / android_kernel_teracube_emerald

arch/arm/include/asm/kvm_mmu.h

+14 −3

Original line number	Diff line number	Diff line
		@@ -62,6 +62,12 @@ phys_addr_t kvm_get_idmap_vector(void);
		int kvm_mmu_init(void);
		void kvm_clear_hyp_idmap(void);

		static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
		{
		*pmd = new_pmd;
		flush_pmd_entry(pmd);
		}

		static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
		{
		*pte = new_pte;
		@@ -103,9 +109,15 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
		pte_val(*pte) \|= L_PTE_S2_RDWR;
		}

		static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
		{
		pmd_val(*pmd) \|= L_PMD_S2_RDWR;
		}

		struct kvm;

		static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
		static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
		unsigned long size)
		{
		/*
		* If we are going to insert an instruction page and the icache is
		@@ -120,8 +132,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
		* need any kind of flushing (DDI 0406C.b - Page B3-1392).
		*/
		if (icache_is_pipt()) {
		unsigned long hva = gfn_to_hva(kvm, gfn);
		__cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
		__cpuc_coherent_user_range(hva, hva + size);
		} else if (!icache_is_vivt_asid_tagged()) {
		/* any kind of VIPT cache */
		__flush_icache_all();

arch/arm/include/asm/pgtable-3level.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -126,6 +126,8 @@
		#define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */
		#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */

		#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */

		/*
		* Hyp-mode PL2 PTE definitions for LPAE.
		*/

arch/arm/kvm/mmu.c

+185 −38

Original line number	Diff line number	Diff line
		@@ -19,6 +19,7 @@
		#include <linux/mman.h>
		#include <linux/kvm_host.h>
		#include <linux/io.h>
		#include <linux/hugetlb.h>
		#include <trace/events/kvm.h>
		#include <asm/pgalloc.h>
		#include <asm/cacheflush.h>
		@@ -41,6 +42,8 @@ static unsigned long hyp_idmap_start;
		static unsigned long hyp_idmap_end;
		static phys_addr_t hyp_idmap_vector;

		#define kvm_pmd_huge(_x) (pmd_huge(_x) \|\| pmd_trans_huge(_x))

		static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
		{
		/*
		@@ -93,19 +96,29 @@ static bool page_empty(void *ptr)

		static void clear_pud_entry(struct kvm kvm, pud_t pud, phys_addr_t addr)
		{
		if (pud_huge(*pud)) {
		pud_clear(pud);
		kvm_tlb_flush_vmid_ipa(kvm, addr);
		} else {
		pmd_t *pmd_table = pmd_offset(pud, 0);
		pud_clear(pud);
		kvm_tlb_flush_vmid_ipa(kvm, addr);
		pmd_free(NULL, pmd_table);
		}
		put_page(virt_to_page(pud));
		}

		static void clear_pmd_entry(struct kvm kvm, pmd_t pmd, phys_addr_t addr)
		{
		if (kvm_pmd_huge(*pmd)) {
		pmd_clear(pmd);
		kvm_tlb_flush_vmid_ipa(kvm, addr);
		} else {
		pte_t *pte_table = pte_offset_kernel(pmd, 0);
		pmd_clear(pmd);
		kvm_tlb_flush_vmid_ipa(kvm, addr);
		pte_free_kernel(NULL, pte_table);
		}
		put_page(virt_to_page(pmd));
		}

		@@ -136,18 +149,32 @@ static void unmap_range(struct kvm kvm, pgd_t pgdp,
		continue;
		}

		if (pud_huge(*pud)) {
		/*
		* If we are dealing with a huge pud, just clear it and
		* move on.
		*/
		clear_pud_entry(kvm, pud, addr);
		addr = pud_addr_end(addr, end);
		continue;
		}

		pmd = pmd_offset(pud, addr);
		if (pmd_none(*pmd)) {
		addr = pmd_addr_end(addr, end);
		continue;
		}

		if (!kvm_pmd_huge(*pmd)) {
		pte = pte_offset_kernel(pmd, addr);
		clear_pte_entry(kvm, pte, addr);
		next = addr + PAGE_SIZE;
		}

		/* If we emptied the pte, walk back up the ladder */
		if (page_empty(pte)) {
		/*
		* If the pmd entry is to be cleared, walk back up the ladder
		*/
		if (kvm_pmd_huge(*pmd) \|\| page_empty(pte)) {
		clear_pmd_entry(kvm, pmd, addr);
		next = pmd_addr_end(addr, end);
		if (page_empty(pmd) && !page_empty(pud)) {
		@@ -420,29 +447,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
		kvm->arch.pgd = NULL;
		}


		static int stage2_set_pte(struct kvm kvm, struct kvm_mmu_memory_cache cache,
		phys_addr_t addr, const pte_t *new_pte, bool iomap)
		static pmd_t stage2_get_pmd(struct kvm kvm, struct kvm_mmu_memory_cache *cache,
		phys_addr_t addr)
		{
		pgd_t *pgd;
		pud_t *pud;
		pmd_t *pmd;
		pte_t *pte, old_pte;

		/* Create 2nd stage page table mapping - Level 1 */
		pgd = kvm->arch.pgd + pgd_index(addr);
		pud = pud_offset(pgd, addr);
		if (pud_none(*pud)) {
		if (!cache)
		return 0; /* ignore calls from kvm_set_spte_hva */
		return NULL;
		pmd = mmu_memory_cache_alloc(cache);
		pud_populate(NULL, pud, pmd);
		get_page(virt_to_page(pud));
		}

		pmd = pmd_offset(pud, addr);
		return pmd_offset(pud, addr);
		}

		static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
		cache, phys_addr_t addr, const pmd_t new_pmd)
		{
		pmd_t *pmd, old_pmd;

		pmd = stage2_get_pmd(kvm, cache, addr);
		VM_BUG_ON(!pmd);

		/*
		* Mapping in huge pages should only happen through a fault. If a
		* page is merged into a transparent huge page, the individual
		* subpages of that huge page should be unmapped through MMU
		* notifiers before we get here.
		*
		* Merging of CompoundPages is not supported; they should become
		* splitting first, unmapped, merged, and mapped back in on-demand.
		*/
		VM_BUG_ON(pmd_present(pmd) && pmd_pfn(pmd) != pmd_pfn(*new_pmd));

		old_pmd = *pmd;
		kvm_set_pmd(pmd, *new_pmd);
		if (pmd_present(old_pmd))
		kvm_tlb_flush_vmid_ipa(kvm, addr);
		else
		get_page(virt_to_page(pmd));
		return 0;
		}

		/* Create 2nd stage page table mapping - Level 2 */
		static int stage2_set_pte(struct kvm kvm, struct kvm_mmu_memory_cache cache,
		phys_addr_t addr, const pte_t *new_pte, bool iomap)
		{
		pmd_t *pmd;
		pte_t *pte, old_pte;

		/* Create stage-2 page table mapping - Level 1 */
		pmd = stage2_get_pmd(kvm, cache, addr);
		if (!pmd) {
		/*
		* Ignore calls from kvm_set_spte_hva for unallocated
		* address ranges.
		*/
		return 0;
		}

		/* Create stage-2 page mappings - Level 2 */
		if (pmd_none(*pmd)) {
		if (!cache)
		return 0; /* ignore calls from kvm_set_spte_hva */
		@@ -507,16 +576,60 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
		return ret;
		}

		static bool transparent_hugepage_adjust(pfn_t pfnp, phys_addr_t ipap)
		{
		pfn_t pfn = *pfnp;
		gfn_t gfn = *ipap >> PAGE_SHIFT;

		if (PageTransCompound(pfn_to_page(pfn))) {
		unsigned long mask;
		/*
		* The address we faulted on is backed by a transparent huge
		* page. However, because we map the compound huge page and
		* not the individual tail page, we need to transfer the
		* refcount to the head page. We have to be careful that the
		* THP doesn't start to split while we are adjusting the
		* refcounts.
		*
		* We are sure this doesn't happen, because mmu_notifier_retry
		* was successful and we are holding the mmu_lock, so if this
		* THP is trying to split, it will be blocked in the mmu
		* notifier before touching any of the pages, specifically
		* before being able to call __split_huge_page_refcount().
		*
		* We can therefore safely transfer the refcount from PG_tail
		* to PG_head and switch the pfn from a tail page to the head
		* page accordingly.
		*/
		mask = PTRS_PER_PMD - 1;
		VM_BUG_ON((gfn & mask) != (pfn & mask));
		if (pfn & mask) {
		*ipap &= PMD_MASK;
		kvm_release_pfn_clean(pfn);
		pfn &= ~mask;
		kvm_get_pfn(pfn);
		*pfnp = pfn;
		}

		return true;
		}

		return false;
		}

		static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		gfn_t gfn, struct kvm_memory_slot *memslot,
		struct kvm_memory_slot *memslot,
		unsigned long fault_status)
		{
		pte_t new_pte;
		pfn_t pfn;
		int ret;
		bool write_fault, writable;
		bool write_fault, writable, hugetlb = false, force_pte = false;
		unsigned long mmu_seq;
		gfn_t gfn = fault_ipa >> PAGE_SHIFT;
		unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
		struct kvm *kvm = vcpu->kvm;
		struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
		struct vm_area_struct *vma;
		pfn_t pfn;

		write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
		if (fault_status == FSC_PERM && !write_fault) {
		@@ -524,6 +637,26 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		return -EFAULT;
		}

		/* Let's check if we will get back a huge page backed by hugetlbfs */
		down_read(&current->mm->mmap_sem);
		vma = find_vma_intersection(current->mm, hva, hva + 1);
		if (is_vm_hugetlb_page(vma)) {
		hugetlb = true;
		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
		} else {
		/*
		* Pages belonging to VMAs not aligned to the PMD mapping
		* granularity cannot be mapped using block descriptors even
		* if the pages belong to a THP for the process, because the
		* stage-2 block descriptor will cover more than a single THP
		* and we loose atomicity for unmapping, updates, and splits
		* of the THP or other pages in the stage-2 block range.
		*/
		if (vma->vm_start & ~PMD_MASK)
		force_pte = true;
		}
		up_read(&current->mm->mmap_sem);

		/* We need minimum second+third level pages */
		ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
		if (ret)
		@@ -541,26 +674,40 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		*/
		smp_rmb();

		pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
		pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
		if (is_error_pfn(pfn))
		return -EFAULT;

		new_pte = pfn_pte(pfn, PAGE_S2);
		coherent_icache_guest_page(vcpu->kvm, gfn);

		spin_lock(&vcpu->kvm->mmu_lock);
		if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
		spin_lock(&kvm->mmu_lock);
		if (mmu_notifier_retry(kvm, mmu_seq))
		goto out_unlock;
		if (!hugetlb && !force_pte)
		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);

		if (hugetlb) {
		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
		new_pmd = pmd_mkhuge(new_pmd);
		if (writable) {
		kvm_set_s2pmd_writable(&new_pmd);
		kvm_set_pfn_dirty(pfn);
		}
		coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
		} else {
		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
		if (writable) {
		kvm_set_s2pte_writable(&new_pte);
		kvm_set_pfn_dirty(pfn);
		}
		stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
		coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
		}


		out_unlock:
		spin_unlock(&vcpu->kvm->mmu_lock);
		spin_unlock(&kvm->mmu_lock);
		kvm_release_pfn_clean(pfn);
		return 0;
		return ret;
		}

		/**
		@@ -629,7 +776,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu vcpu, struct kvm_run run)

		memslot = gfn_to_memslot(vcpu->kvm, gfn);

		ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
		ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
		if (ret == 0)
		ret = 1;
		out_unlock:

arch/arm64/include/asm/kvm_mmu.h

+9 −3

Original line number	Diff line number	Diff line
		@@ -91,6 +91,7 @@ int kvm_mmu_init(void);
		void kvm_clear_hyp_idmap(void);

		#define kvm_set_pte(ptep, pte) set_pte(ptep, pte)
		#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)

		static inline bool kvm_is_write_fault(unsigned long esr)
		{
		@@ -116,13 +117,18 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
		pte_val(*pte) \|= PTE_S2_RDWR;
		}

		static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
		{
		pmd_val(*pmd) \|= PMD_S2_RDWR;
		}

		struct kvm;

		static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
		static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
		unsigned long size)
		{
		if (!icache_is_aliasing()) { /* PIPT */
		unsigned long hva = gfn_to_hva(kvm, gfn);
		flush_icache_range(hva, hva + PAGE_SIZE);
		flush_icache_range(hva, hva + size);
		} else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */
		/* any kind of VIPT cache */
		__flush_icache_all();

arch/arm64/include/asm/pgtable-hwdef.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -85,6 +85,8 @@
		#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
		#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */

		#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */

		/*
		* Memory Attribute override for Stage-2 (MemAttr[3:0])
		*/