Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c10c21ef authored by Alexey Kardashevskiy's avatar Alexey Kardashevskiy Committed by Michael Ellerman
Browse files

powerpc/vfio/iommu/kvm: Do not pin device memory



This new memory does not have page structs as it is not plugged to
the host so gup() will fail anyway.

This adds 2 helpers:
- mm_iommu_newdev() to preregister the "memory device" memory so
the rest of API can still be used;
- mm_iommu_is_devmem() to know if the physical address is one of thise
new regions which we must avoid unpinning of.

This adds @MM to tce_page_is_contained() and iommu_tce_xchg() to test
if the memory is device memory to avoid pfn_to_page().

This adds a check for device memory in mm_iommu_ua_mark_dirty_rm() which
does delayed pages dirtying.

Signed-off-by: default avatarAlexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: default avatarPaul Mackerras <paulus@ozlabs.org>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent e0bf78b0
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -218,8 +218,9 @@ extern void iommu_register_group(struct iommu_table_group *table_group,
extern int iommu_add_device(struct device *dev);
extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void);
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
		unsigned long *hpa, enum dma_data_direction *direction);
extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
		unsigned long entry, unsigned long *hpa,
		enum dma_data_direction *direction);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
					int pci_domain_number,
+11 −0
Original line number Diff line number Diff line
@@ -24,6 +24,9 @@ extern bool mm_iommu_preregistered(struct mm_struct *mm);
extern long mm_iommu_new(struct mm_struct *mm,
		unsigned long ua, unsigned long entries,
		struct mm_iommu_table_group_mem_t **pmem);
extern long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
		unsigned long entries, unsigned long dev_hpa,
		struct mm_iommu_table_group_mem_t **pmem);
extern long mm_iommu_put(struct mm_struct *mm,
		struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_init(struct mm_struct *mm);
@@ -39,8 +42,16 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
		unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
extern bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
		unsigned int pageshift, unsigned long *size);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#else
static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
		unsigned int pageshift, unsigned long *size)
{
	return false;
}
#endif
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
extern void set_context(unsigned long id, pgd_t *pgd);
+8 −3
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@
#include <asm/fadump.h>
#include <asm/vio.h>
#include <asm/tce.h>
#include <asm/mmu_context.h>

#define DBG(...)

@@ -993,15 +994,19 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);

long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
		unsigned long *hpa, enum dma_data_direction *direction)
long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
		unsigned long entry, unsigned long *hpa,
		enum dma_data_direction *direction)
{
	long ret;
	unsigned long size = 0;

	ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);

	if (!ret && ((*direction == DMA_FROM_DEVICE) ||
			(*direction == DMA_BIDIRECTIONAL)))
			(*direction == DMA_BIDIRECTIONAL)) &&
			!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
					&size))
		SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));

	/* if (unlikely(ret))
+10 −8
Original line number Diff line number Diff line
@@ -397,12 +397,13 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
	return H_SUCCESS;
}

static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
		unsigned long entry)
{
	unsigned long hpa = 0;
	enum dma_data_direction dir = DMA_NONE;

	iommu_tce_xchg(tbl, entry, &hpa, &dir);
	iommu_tce_xchg(mm, tbl, entry, &hpa, &dir);
}

static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -433,7 +434,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
	unsigned long hpa = 0;
	long ret;

	if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
	if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir)))
		return H_TOO_HARD;

	if (dir == DMA_NONE)
@@ -441,7 +442,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,

	ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
	if (ret != H_SUCCESS)
		iommu_tce_xchg(tbl, entry, &hpa, &dir);
		iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);

	return ret;
}
@@ -487,7 +488,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
	if (mm_iommu_mapped_inc(mem))
		return H_TOO_HARD;

	ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
	ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
	if (WARN_ON_ONCE(ret)) {
		mm_iommu_mapped_dec(mem);
		return H_TOO_HARD;
@@ -566,7 +567,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
					entry, ua, dir);

		if (ret != H_SUCCESS) {
			kvmppc_clear_tce(stit->tbl, entry);
			kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
			goto unlock_exit;
		}
	}
@@ -655,7 +656,8 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
					iommu_tce_direction(tce));

			if (ret != H_SUCCESS) {
				kvmppc_clear_tce(stit->tbl, entry);
				kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
						entry);
				goto unlock_exit;
			}
		}
@@ -704,7 +706,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
				return ret;

			WARN_ON_ONCE(1);
			kvmppc_clear_tce(stit->tbl, entry);
			kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
		}
	}

+84 −9
Original line number Diff line number Diff line
@@ -36,6 +36,8 @@ struct mm_iommu_table_group_mem_t {
	u64 ua;			/* userspace address */
	u64 entries;		/* number of entries in hpas[] */
	u64 *hpas;		/* vmalloc'ed */
#define MM_IOMMU_TABLE_INVALID_HPA	((uint64_t)-1)
	u64 dev_hpa;		/* Device memory base address */
};

static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
@@ -126,7 +128,8 @@ static int mm_iommu_move_page_from_cma(struct page *page)
	return 0;
}

long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
		unsigned long entries, unsigned long dev_hpa,
		struct mm_iommu_table_group_mem_t **pmem)
{
	struct mm_iommu_table_group_mem_t *mem;
@@ -150,11 +153,13 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,

	}

	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
		ret = mm_iommu_adjust_locked_vm(mm, entries, true);
		if (ret)
			goto unlock_exit;

		locked_entries = entries;
	}

	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
	if (!mem) {
@@ -162,6 +167,13 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
		goto unlock_exit;
	}

	if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
		mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
		mem->dev_hpa = dev_hpa;
		goto good_exit;
	}
	mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;

	/*
	 * For a starting point for a maximum page size calculation
	 * we use @ua and @entries natural alignment to allow IOMMU pages
@@ -230,6 +242,7 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
		mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
	}

good_exit:
	atomic64_set(&mem->mapped, 1);
	mem->used = 1;
	mem->ua = ua;
@@ -246,13 +259,31 @@ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,

	return ret;
}

long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
		struct mm_iommu_table_group_mem_t **pmem)
{
	return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
			pmem);
}
EXPORT_SYMBOL_GPL(mm_iommu_new);

long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
		unsigned long entries, unsigned long dev_hpa,
		struct mm_iommu_table_group_mem_t **pmem)
{
	return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
}
EXPORT_SYMBOL_GPL(mm_iommu_newdev);

static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
{
	long i;
	struct page *page = NULL;

	if (!mem->hpas)
		return;

	for (i = 0; i < mem->entries; ++i) {
		if (!mem->hpas[i])
			continue;
@@ -294,6 +325,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
{
	long ret = 0;
	unsigned long entries, dev_hpa;

	mutex_lock(&mem_list_mutex);

@@ -315,9 +347,12 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
	}

	/* @mapped became 0 so now mappings are disabled, release the region */
	entries = mem->entries;
	dev_hpa = mem->dev_hpa;
	mm_iommu_release(mem);

	mm_iommu_adjust_locked_vm(mm, mem->entries, false);
	if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
		mm_iommu_adjust_locked_vm(mm, entries, false);

unlock_exit:
	mutex_unlock(&mem_list_mutex);
@@ -387,7 +422,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
	u64 *va = &mem->hpas[entry];
	u64 *va;

	if (entry >= mem->entries)
		return -EFAULT;
@@ -395,6 +430,12 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
	if (pageshift > mem->pageshift)
		return -EFAULT;

	if (!mem->hpas) {
		*hpa = mem->dev_hpa + (ua - mem->ua);
		return 0;
	}

	va = &mem->hpas[entry];
	*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);

	return 0;
@@ -405,7 +446,6 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
		unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
	const long entry = (ua - mem->ua) >> PAGE_SHIFT;
	void *va = &mem->hpas[entry];
	unsigned long *pa;

	if (entry >= mem->entries)
@@ -414,7 +454,12 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
	if (pageshift > mem->pageshift)
		return -EFAULT;

	pa = (void *) vmalloc_to_phys(va);
	if (!mem->hpas) {
		*hpa = mem->dev_hpa + (ua - mem->ua);
		return 0;
	}

	pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
	if (!pa)
		return -EFAULT;

@@ -434,6 +479,9 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
	if (!mem)
		return;

	if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
		return;

	entry = (ua - mem->ua) >> PAGE_SHIFT;
	va = &mem->hpas[entry];

@@ -444,6 +492,33 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
	*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
}

bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
		unsigned int pageshift, unsigned long *size)
{
	struct mm_iommu_table_group_mem_t *mem;
	unsigned long end;

	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
		if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
			continue;

		end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
		if ((mem->dev_hpa <= hpa) && (hpa < end)) {
			/*
			 * Since the IOMMU page size might be bigger than
			 * PAGE_SIZE, the amount of preregistered memory
			 * starting from @hpa might be smaller than 1<<pageshift
			 * and the caller needs to distinguish this situation.
			 */
			*size = min(1UL << pageshift, end - hpa);
			return true;
		}
	}

	return false;
}
EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);

long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
{
	if (atomic64_inc_not_zero(&mem->mapped))
Loading