Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 425333bf authored by Alexey Kardashevskiy's avatar Alexey Kardashevskiy Committed by Paul Mackerras
Browse files

KVM: PPC: Avoid marking DMA-mapped pages dirty in real mode



At the moment the real mode handler of H_PUT_TCE calls iommu_tce_xchg_rm()
which in turn reads the old TCE and if it was a valid entry, marks
the physical page dirty if it was mapped for writing. Since it is in
real mode, realmode_pfn_to_page() is used instead of pfn_to_page()
to get the page struct. However SetPageDirty() itself reads the compound
page head and returns a virtual address for the head page struct and
setting dirty bit for that kills the system.

This adds additional dirty bit tracking into the MM/IOMMU API for use
in the real mode. Note that this does not change how VFIO and
KVM (in virtual mode) set this bit. The KVM (real mode) changes include:
- use the lowest bit of the cached host phys address to carry
the dirty bit;
- mark pages dirty when they are unpinned which happens when
the preregistered memory is released which always happens in virtual
mode;
- add mm_iommu_ua_mark_dirty_rm() helper to set delayed dirty bit;
- change iommu_tce_xchg_rm() to take the kvm struct for the mm to use
in the new mm_iommu_ua_mark_dirty_rm() helper;
- move iommu_tce_xchg_rm() to book3s_64_vio_hv.c (which is the only
caller anyway) to reduce the real mode KVM and IOMMU knowledge
across different subsystems.

This removes realmode_pfn_to_page() as it is not used anymore.

While we at it, remove some EXPORT_SYMBOL_GPL() as that code is for
the real mode only and modules cannot call it anyway.

Signed-off-by: default avatarAlexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent bdf7ffc8
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
	return hash__vmemmap_remove_mapping(start, page_size);
}
#endif
struct page *realmode_pfn_to_page(unsigned long pfn);

static inline pte_t pmd_pte(pmd_t pmd)
{
+0 −2
Original line number Diff line number Diff line
@@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void);
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
		unsigned long *hpa, enum dma_data_direction *direction);
extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
		unsigned long *hpa, enum dma_data_direction *direction);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
					int pci_domain_number,
+1 −0
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
		unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
		unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#endif
+0 −25
Original line number Diff line number Diff line
@@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg);

#ifdef CONFIG_PPC_BOOK3S_64
long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
		unsigned long *hpa, enum dma_data_direction *direction)
{
	long ret;

	ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);

	if (!ret && ((*direction == DMA_FROM_DEVICE) ||
			(*direction == DMA_BIDIRECTIONAL))) {
		struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);

		if (likely(pg)) {
			SetPageDirty(pg);
		} else {
			tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
			ret = -EFAULT;
		}
	}

	return ret;
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
#endif

int iommu_take_ownership(struct iommu_table *tbl)
{
	unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
+31 −8
Original line number Diff line number Diff line
@@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
		unsigned long entry, unsigned long *hpa,
		enum dma_data_direction *direction)
{
	long ret;

	ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);

	if (!ret && ((*direction == DMA_FROM_DEVICE) ||
				(*direction == DMA_BIDIRECTIONAL))) {
		__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
		/*
		 * kvmppc_rm_tce_iommu_do_map() updates the UA cache after
		 * calling this so we still get here a valid UA.
		 */
		if (pua && *pua)
			mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua));
	}

	return ret;
}

static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
		unsigned long entry)
{
	unsigned long hpa = 0;
	enum dma_data_direction dir = DMA_NONE;

	iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
	iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
}

static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
	unsigned long hpa = 0;
	long ret;

	if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
	if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
		/*
		 * real mode xchg can fail if struct page crosses
		 * a page boundary
@@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,

	ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
	if (ret)
		iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
		iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);

	return ret;
}
@@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
	if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
		return H_CLOSED;

	ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
	ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
	if (ret) {
		mm_iommu_mapped_dec(mem);
		/*
@@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
			return ret;

		WARN_ON_ONCE_RM(1);
		kvmppc_rm_clear_tce(stit->tbl, entry);
		kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
	}

	kvmppc_tce_put(stt, entry, tce);
@@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
				goto unlock_exit;

			WARN_ON_ONCE_RM(1);
			kvmppc_rm_clear_tce(stit->tbl, entry);
			kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
		}

		kvmppc_tce_put(stt, entry + i, tce);
@@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
				return ret;

			WARN_ON_ONCE_RM(1);
			kvmppc_rm_clear_tce(stit->tbl, entry);
			kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
		}
	}

Loading