Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e1a1ef84 authored by Alexey Kardashevskiy's avatar Alexey Kardashevskiy Committed by Paul Mackerras
Browse files

KVM: PPC: Book3S: Allocate guest TCEs on demand too



We already allocate hardware TCE tables in multiple levels and skip
intermediate levels when we can, now it is a turn of the KVM TCE tables.
Thankfully these are allocated already in 2 levels.

This moves the table's last level allocation from the creating helper to
kvmppc_tce_put() and kvm_spapr_tce_fault(). Since such allocation cannot
be done in real mode, this creates a virtual mode version of
kvmppc_tce_put() which handles allocations.

This adds kvmppc_rm_ioba_validate() to do an additional test if
the consequent kvmppc_tce_put() needs a page which has not been allocated;
if this is the case, we bail out to virtual mode handlers.

The allocations are protected by a new mutex as kvm->lock is not suitable
for the task because the fault handler is called with the mmap_sem held
but kvmhv_setup_mmu() locks kvm->lock and mmap_sem in the reverse order.

Signed-off-by: default avatarAlexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent 2001825e
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table {
	struct kref kref;
};

#define TCES_PER_PAGE	(PAGE_SIZE / sizeof(u64))

struct kvmppc_spapr_tce_table {
	struct list_head list;
	struct kvm *kvm;
@@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table {
	u64 offset;		/* in pages */
	u64 size;		/* window size in pages */
	struct list_head iommu_tables;
	struct mutex alloc_lock;
	struct page *pages[0];
};

+0 −2
Original line number Diff line number Diff line
@@ -197,8 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
		(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
				(stt)->size, (ioba), (npages)) ?        \
				H_PARAMETER : H_SUCCESS)
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
		unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
			     unsigned long ioba, unsigned long tce);
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+58 −14
Original line number Diff line number Diff line
@@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head)
	unsigned long i, npages = kvmppc_tce_pages(stt->size);

	for (i = 0; i < npages; i++)
		if (stt->pages[i])
			__free_page(stt->pages[i]);

	kfree(stt);
}

static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
		unsigned long sttpage)
{
	struct page *page = stt->pages[sttpage];

	if (page)
		return page;

	mutex_lock(&stt->alloc_lock);
	page = stt->pages[sttpage];
	if (!page) {
		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
		WARN_ON_ONCE(!page);
		if (page)
			stt->pages[sttpage] = page;
	}
	mutex_unlock(&stt->alloc_lock);

	return page;
}

static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
{
	struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
@@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
	if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
		return VM_FAULT_SIGBUS;

	page = stt->pages[vmf->pgoff];
	page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
	if (!page)
		return VM_FAULT_OOM;

	get_page(page);
	vmf->page = page;
	return 0;
@@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
	struct kvmppc_spapr_tce_table *siter;
	unsigned long npages, size = args->size;
	int ret = -ENOMEM;
	int i;

	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
		(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
@@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
	stt->offset = args->offset;
	stt->size = size;
	stt->kvm = kvm;
	mutex_init(&stt->alloc_lock);
	INIT_LIST_HEAD_RCU(&stt->iommu_tables);

	for (i = 0; i < npages; i++) {
		stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
		if (!stt->pages[i])
			goto fail;
	}

	mutex_lock(&kvm->lock);

	/* Check this LIOBN hasn't been previously allocated */
@@ -352,11 +371,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
	if (ret >= 0)
		return ret;

 fail:
	for (i = 0; i < npages; i++)
		if (stt->pages[i])
			__free_page(stt->pages[i]);

	kfree(stt);
 fail_acct:
	kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
@@ -413,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
	return H_SUCCESS;
}

/*
 * Handles TCE requests for emulated devices.
 * Puts guest TCE values to the table and expects user space to convert them.
 * Cannot fail so kvmppc_tce_validate must be called before it.
 */
static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
		unsigned long idx, unsigned long tce)
{
	struct page *page;
	u64 *tbl;
	unsigned long sttpage;

	idx -= stt->offset;
	sttpage = idx / TCES_PER_PAGE;
	page = stt->pages[sttpage];

	if (!page) {
		/* We allow any TCE, not just with read|write permissions */
		if (!tce)
			return;

		page = kvm_spapr_get_tce_page(stt, sttpage);
		if (!page)
			return;
	}
	tbl = page_to_virt(page);

	tbl[idx % TCES_PER_PAGE] = tce;
}

static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
		unsigned long entry)
{
+49 −17
Original line number Diff line number Diff line
@@ -66,8 +66,6 @@

#endif

#define TCES_PER_PAGE	(PAGE_SIZE / sizeof(u64))

/*
 * Finds a TCE table descriptor by LIOBN.
 *
@@ -148,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,

	return H_SUCCESS;
}
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */

/* Note on the use of page_address() in real mode,
 *
@@ -180,13 +177,9 @@ static u64 *kvmppc_page_address(struct page *page)
/*
 * Handles TCE requests for emulated devices.
 * Puts guest TCE values to the table and expects user space to convert them.
 * Called in both real and virtual modes.
 * Cannot fail so kvmppc_tce_validate must be called before it.
 *
 * WARNING: This will be called in real-mode on HV KVM and virtual
 *          mode on PR KVM
 * Cannot fail so kvmppc_rm_tce_validate must be called before it.
 */
void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
		unsigned long idx, unsigned long tce)
{
	struct page *page;
@@ -194,13 +187,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,

	idx -= stt->offset;
	page = stt->pages[idx / TCES_PER_PAGE];
	/*
	 * page must not be NULL in real mode,
	 * kvmppc_rm_ioba_validate() must have taken care of this.
	 */
	WARN_ON_ONCE_RM(!page);
	tbl = kvmppc_page_address(page);

	tbl[idx % TCES_PER_PAGE] = tce;
}
EXPORT_SYMBOL_GPL(kvmppc_tce_put);

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
 * TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
 * in real mode.
 * Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
 * allocated or not required (when clearing a tce entry).
 */
static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
		unsigned long ioba, unsigned long npages, bool clearing)
{
	unsigned long i, idx, sttpage, sttpages;
	unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages);

	if (ret)
		return ret;
	/*
	 * clearing==true says kvmppc_rm_tce_put won't be allocating pages
	 * for empty tces.
	 */
	if (clearing)
		return H_SUCCESS;

	idx = (ioba >> stt->page_shift) - stt->offset;
	sttpage = idx / TCES_PER_PAGE;
	sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
			TCES_PER_PAGE;
	for (i = sttpage; i < sttpage + sttpages; ++i)
		if (!stt->pages[i])
			return H_TOO_HARD;

	return H_SUCCESS;
}

static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
		unsigned long entry, unsigned long *hpa,
		enum dma_data_direction *direction)
@@ -378,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
	if (!stt)
		return H_TOO_HARD;

	ret = kvmppc_ioba_validate(stt, ioba, 1);
	ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0);
	if (ret != H_SUCCESS)
		return ret;

@@ -406,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
		}
	}

	kvmppc_tce_put(stt, entry, tce);
	kvmppc_rm_tce_put(stt, entry, tce);

	return H_SUCCESS;
}
@@ -477,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
	if (tce_list & (SZ_4K - 1))
		return H_PARAMETER;

	ret = kvmppc_ioba_validate(stt, ioba, npages);
	ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false);
	if (ret != H_SUCCESS)
		return ret;

@@ -554,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
			}
		}

		kvmppc_tce_put(stt, entry + i, tce);
		kvmppc_rm_tce_put(stt, entry + i, tce);
	}

unlock_exit:
@@ -580,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
	if (!stt)
		return H_TOO_HARD;

	ret = kvmppc_ioba_validate(stt, ioba, npages);
	ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0);
	if (ret != H_SUCCESS)
		return ret;

@@ -607,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
	}

	for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
		kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
		kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);

	return H_SUCCESS;
}
@@ -632,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,

	idx = (ioba >> stt->page_shift) - stt->offset;
	page = stt->pages[idx / TCES_PER_PAGE];
	if (!page) {
		vcpu->arch.regs.gpr[4] = 0;
		return H_SUCCESS;
	}
	tbl = (u64 *)page_address(page);

	vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];