Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bb65a64c authored by Robin Murphy's avatar Robin Murphy Committed by Joerg Roedel
Browse files

iommu/dma: Plumb in the per-CPU IOVA caches



With IOVA allocation suitably tidied up, we are finally free to opt in
to the per-CPU caching mechanism. The caching alone can provide a modest
improvement over walking the rbtree for weedier systems (iperf3 shows
~10% more ethernet throughput on an ARM Juno r1 constrained to a single
650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
lock contention which larger ARM-based systems with lots of parallel I/O
are starting to feel the pain of.

Reviewed-by: default avatarNate Watterson <nwatters@codeaurora.org>
Tested-by: default avatarNate Watterson <nwatters@codeaurora.org>
Signed-off-by: default avatarRobin Murphy <robin.murphy@arm.com>
Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent a44e6657
Loading
Loading
Loading
Loading
+17 −20
Original line number Original line Diff line number Diff line
@@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
{
{
	struct iommu_dma_cookie *cookie = domain->iova_cookie;
	struct iommu_dma_cookie *cookie = domain->iova_cookie;
	struct iova_domain *iovad = &cookie->iovad;
	struct iova_domain *iovad = &cookie->iovad;
	unsigned long shift, iova_len;
	unsigned long shift, iova_len, iova = 0;
	struct iova *iova = NULL;


	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
		cookie->msi_iova += size;
		cookie->msi_iova += size;
@@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,


	shift = iova_shift(iovad);
	shift = iova_shift(iovad);
	iova_len = size >> shift;
	iova_len = size >> shift;
	/*
	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
	 * will come back to bite us badly, so we have to waste a bit of space
	 * rounding up anything cacheable to make sure that can't happen. The
	 * order of the unadjusted size will still match upon freeing.
	 */
	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
		iova_len = roundup_pow_of_two(iova_len);


	if (domain->geometry.force_aperture)
	if (domain->geometry.force_aperture)
		dma_limit = min(dma_limit, domain->geometry.aperture_end);
		dma_limit = min(dma_limit, domain->geometry.aperture_end);


	/* Try to get PCI devices a SAC address */
	/* Try to get PCI devices a SAC address */
	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
		iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift,
		iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);
				  true);

	/*
	 * Enforce size-alignment to be safe - there could perhaps be an
	 * attribute to control this per-device, or at least per-domain...
	 */
	if (!iova)
	if (!iova)
		iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true);
		iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);


	return (dma_addr_t)iova->pfn_lo << shift;
	return (dma_addr_t)iova << shift;
}
}


static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
		dma_addr_t iova, size_t size)
		dma_addr_t iova, size_t size)
{
{
	struct iova_domain *iovad = &cookie->iovad;
	struct iova_domain *iovad = &cookie->iovad;
	struct iova *iova_rbnode;
	unsigned long shift = iova_shift(iovad);


	/* The MSI case is only ever cleaning up its most recent allocation */
	/* The MSI case is only ever cleaning up its most recent allocation */
	if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
		cookie->msi_iova -= size;
		cookie->msi_iova -= size;
		return;
	else
	}
		free_iova_fast(iovad, iova >> shift, size >> shift);

	iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova));
	if (WARN_ON(!iova_rbnode))
		return;

	__free_iova(iovad, iova_rbnode);
}
}


static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,