Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 88d321d1 authored by Patrick Daly's avatar Patrick Daly
Browse files

iommu: arm-smmu: Limit maximum batch size while holding spinlocks



Previously enhancements were made to io-pgtable-arm.c to loop through
an entire sg_table to prevent additional overhead from traversing back
and forth from the upper layer APIs in iommu.c to the low-level APIs in
io-pgtable-arm.c

However, with the cpu operating at minimum frequency and with
~14 Mb sg_tables, the irqsoff tracer reported latencys over 1ms.
Target a maximum latency of 500us under these conditions by spliting
sg_tables into smaller pieces.

This change is expected to have an impact on overall top performance:
Before (8998):
    size        iommu_map_sg      iommu_unmap
      4K            5.672 us        10.743 us
     64K            6.690 us        10.684 us
      2M           48.981 us        19.038 us
     12M          259.648 us       154.106 us
     20M          429.331 us       158.832 us

After:
    size        iommu_map_sg      iommu_unmap
      4K            5.731 us        10.578 us
     64K            6.923 us        10.691 us
      2M           53.107 us        19.278 us
     12M          291.640 us       153.895 us
     20M          477.925 us       158.704 us

Before (sdm845):
(average over 10 iterations)
    size        iommu_map_sg      iommu_unmap
      4K           16.750 us         9.302 us
     64K           18.229 us         9.349 us
      2M           90.364 us        19.864 us
     12M          477.432 us        33.161 us
     20M          774.515 us        43.656 us

After:
(average over 10 iterations)
    size        iommu_map_sg      iommu_unmap
      4K           16.614 us         9.364 us
     64K           18.187 us         9.380 us
      2M           96.494 us        20.036 us
     12M          504.958 us        44.541 us
     20M          838.952 us        44.583 us

Change-Id: I18cf0e86b4de7183c06684129a835a8806263193
Signed-off-by: default avatarPatrick Daly <pdaly@codeaurora.org>
parent 92b0fef6
Loading
Loading
Loading
Loading
+40 −8
Original line number Diff line number Diff line
@@ -2223,14 +2223,18 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
	return ret;
}

#define MAX_MAP_SG_BATCH_SIZE (SZ_4M)
static size_t arm_smmu_map_sg(struct iommu_domain *domain, unsigned long iova,
			   struct scatterlist *sg, unsigned int nents, int prot)
{
	int ret;
	size_t size;
	size_t size, batch_size, size_to_unmap = 0;
	unsigned long flags;
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
	unsigned int idx_start, idx_end;
	struct scatterlist *sg_start, *sg_end;
	unsigned long __saved_iova_start;

	if (!ops)
		return -ENODEV;
@@ -2239,17 +2243,45 @@ static size_t arm_smmu_map_sg(struct iommu_domain *domain, unsigned long iova,
	if (ret)
		return ret;

	__saved_iova_start = iova;
	idx_start = idx_end = 0;
	sg_start = sg_end = sg;
	while (idx_end < nents) {
		batch_size = sg_end->length;
		sg_end = sg_next(sg_end);
		idx_end++;
		while ((idx_end < nents) &&
		       (batch_size + sg_end->length < MAX_MAP_SG_BATCH_SIZE)) {

			batch_size += sg_end->length;
			sg_end = sg_next(sg_end);
			idx_end++;
		}

		spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
	ret = ops->map_sg(ops, iova, sg, nents, prot, &size);
		ret = ops->map_sg(ops, iova, sg_start, idx_end - idx_start,
				  prot, &size);
		spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
		/* Returns 0 on error */
		if (!ret) {
			size_to_unmap = iova + size - __saved_iova_start;
			goto out;
		}

	if (!ret)
		arm_smmu_unmap(domain, iova, size);
		iova += batch_size;
		idx_start = idx_end;
		sg_start = sg_end;
	}

	arm_smmu_domain_power_off(domain, smmu_domain->smmu);
out:
	arm_smmu_assign_table(smmu_domain);

	return ret;
	if (size_to_unmap) {
		arm_smmu_unmap(domain, __saved_iova_start, size_to_unmap);
		iova = __saved_iova_start;
	}
	arm_smmu_domain_power_off(domain, smmu_domain->smmu);
	return iova - __saved_iova_start;
}

static phys_addr_t __arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,