Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fe427e37 authored by Joerg Roedel's avatar Joerg Roedel
Browse files

Merge branch 'for-joerg/batched-unmap' of...

Merge branch 'for-joerg/batched-unmap' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into core
parents 086f9efa 3951c41a
Loading
Loading
Loading
Loading
+16 −8
Original line number Diff line number Diff line
@@ -222,7 +222,7 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
		size_t unmapped_page;
		size_t pgsize = get_pgsize(iova, len - unmapped_len);

		unmapped_page = ops->unmap(ops, iova, pgsize);
		unmapped_page = ops->unmap(ops, iova, pgsize, NULL);
		if (!unmapped_page)
			break;

@@ -247,20 +247,28 @@ static void mmu_tlb_inv_context_s1(void *cookie)
	mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
}

static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
				     size_t granule, bool leaf, void *cookie)
{}

static void mmu_tlb_sync_context(void *cookie)
{
	//struct panfrost_device *pfdev = cookie;
	// TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X
}

static const struct iommu_gather_ops mmu_tlb_ops = {
static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule,
			       void *cookie)
{
	mmu_tlb_sync_context(cookie);
}

static void mmu_tlb_flush_leaf(unsigned long iova, size_t size, size_t granule,
			       void *cookie)
{
	mmu_tlb_sync_context(cookie);
}

static const struct iommu_flush_ops mmu_tlb_ops = {
	.tlb_flush_all	= mmu_tlb_inv_context_s1,
	.tlb_add_flush	= mmu_tlb_inv_range_nosync,
	.tlb_sync	= mmu_tlb_sync_context,
	.tlb_flush_walk = mmu_tlb_flush_walk,
	.tlb_flush_leaf = mmu_tlb_flush_leaf,
};

static const char *access_type_name(struct panfrost_device *pfdev,
+6 −5
Original line number Diff line number Diff line
@@ -3055,7 +3055,8 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
}

static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
			   size_t page_size)
			      size_t page_size,
			      struct iommu_iotlb_gather *gather)
{
	struct protection_domain *domain = to_pdomain(dom);
	size_t unmap_size;
@@ -3196,9 +3197,10 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
	domain_flush_complete(dom);
}

static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
				      unsigned long iova, size_t size)
static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
				 struct iommu_iotlb_gather *gather)
{
	amd_iommu_flush_iotlb_all(domain);
}

const struct iommu_ops amd_iommu_ops = {
@@ -3219,8 +3221,7 @@ const struct iommu_ops amd_iommu_ops = {
	.is_attach_deferred = amd_iommu_is_attach_deferred,
	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
	.flush_iotlb_all = amd_iommu_flush_iotlb_all,
	.iotlb_range_add = amd_iommu_iotlb_range_add,
	.iotlb_sync = amd_iommu_flush_iotlb_all,
	.iotlb_sync = amd_iommu_iotlb_sync,
};

/*****************************************************************************
+37 −15
Original line number Diff line number Diff line
@@ -1545,13 +1545,6 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
}

/* IO_PGTABLE API */
static void arm_smmu_tlb_sync(void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;

	arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
}

static void arm_smmu_tlb_inv_context(void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
@@ -1603,10 +1596,38 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
	} while (size -= granule);
}

static const struct iommu_gather_ops arm_smmu_gather_ops = {
static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
					 unsigned long iova, size_t granule,
					 void *cookie)
{
	arm_smmu_tlb_inv_range_nosync(iova, granule, granule, true, cookie);
}

static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
				  size_t granule, void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
	struct arm_smmu_device *smmu = smmu_domain->smmu;

	arm_smmu_tlb_inv_range_nosync(iova, size, granule, false, cookie);
	arm_smmu_cmdq_issue_sync(smmu);
}

static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
				  size_t granule, void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
	struct arm_smmu_device *smmu = smmu_domain->smmu;

	arm_smmu_tlb_inv_range_nosync(iova, size, granule, true, cookie);
	arm_smmu_cmdq_issue_sync(smmu);
}

static const struct iommu_flush_ops arm_smmu_flush_ops = {
	.tlb_flush_all	= arm_smmu_tlb_inv_context,
	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
	.tlb_sync	= arm_smmu_tlb_sync,
	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
};

/* IOMMU API */
@@ -1796,7 +1817,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
		.ias		= ias,
		.oas		= oas,
		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
		.tlb		= &arm_smmu_gather_ops,
		.tlb		= &arm_smmu_flush_ops,
		.iommu_dev	= smmu->dev,
	};

@@ -1985,8 +2006,8 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
	return ops->map(ops, iova, paddr, size, prot);
}

static size_t
arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
			     size_t size, struct iommu_iotlb_gather *gather)
{
	int ret;
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -1995,7 +2016,7 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
	if (!ops)
		return 0;

	ret = ops->unmap(ops, iova, size);
	ret = ops->unmap(ops, iova, size, gather);
	if (ret && arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size))
		return 0;

@@ -2010,7 +2031,8 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
		arm_smmu_tlb_inv_context(smmu_domain);
}

static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
				struct iommu_iotlb_gather *gather)
{
	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;

+78 −25
Original line number Diff line number Diff line
@@ -248,10 +248,17 @@ enum arm_smmu_domain_stage {
	ARM_SMMU_DOMAIN_BYPASS,
};

struct arm_smmu_flush_ops {
	struct iommu_flush_ops		tlb;
	void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule,
			      bool leaf, void *cookie);
	void (*tlb_sync)(void *cookie);
};

struct arm_smmu_domain {
	struct arm_smmu_device		*smmu;
	struct io_pgtable_ops		*pgtbl_ops;
	const struct iommu_gather_ops	*tlb_ops;
	const struct arm_smmu_flush_ops	*flush_ops;
	struct arm_smmu_cfg		cfg;
	enum arm_smmu_domain_stage	stage;
	bool				non_strict;
@@ -533,7 +540,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
 * almost negligible, but the benefit of getting the first one in as far ahead
 * of the sync as possible is significant, hence we don't just make this a
 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
 * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think.
 */
static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
					 size_t granule, bool leaf, void *cookie)
@@ -547,21 +554,66 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
	writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
}

static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
				  size_t granule, void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;

	ops->tlb_inv_range(iova, size, granule, false, cookie);
	ops->tlb_sync(cookie);
}

static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
				  size_t granule, void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;

	ops->tlb_inv_range(iova, size, granule, true, cookie);
	ops->tlb_sync(cookie);
}

static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
				  unsigned long iova, size_t granule,
				  void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;

	ops->tlb_inv_range(iova, granule, granule, true, cookie);
}

static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
	.tlb = {
		.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
		.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
		.tlb_add_page	= arm_smmu_tlb_add_page,
	},
	.tlb_inv_range		= arm_smmu_tlb_inv_range_nosync,
	.tlb_sync		= arm_smmu_tlb_sync_context,
};

static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
	.tlb = {
		.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
		.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
		.tlb_add_page	= arm_smmu_tlb_add_page,
	},
	.tlb_inv_range		= arm_smmu_tlb_inv_range_nosync,
	.tlb_sync		= arm_smmu_tlb_sync_context,
};

static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
	.tlb = {
		.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
	.tlb_add_flush	= arm_smmu_tlb_inv_vmid_nosync,
		.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
		.tlb_add_page	= arm_smmu_tlb_add_page,
	},
	.tlb_inv_range		= arm_smmu_tlb_inv_vmid_nosync,
	.tlb_sync		= arm_smmu_tlb_sync_vmid,
};

@@ -842,7 +894,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
			ias = min(ias, 32UL);
			oas = min(oas, 32UL);
		}
		smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
		break;
	case ARM_SMMU_DOMAIN_NESTED:
		/*
@@ -862,9 +914,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
			oas = min(oas, 40UL);
		}
		if (smmu->version == ARM_SMMU_V2)
			smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
		else
			smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
		break;
	default:
		ret = -EINVAL;
@@ -893,7 +945,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
		.ias		= ias,
		.oas		= oas,
		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
		.tlb		= smmu_domain->tlb_ops,
		.tlb		= &smmu_domain->flush_ops->tlb,
		.iommu_dev	= smmu->dev,
	};

@@ -1301,7 +1353,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
}

static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
			     size_t size)
			     size_t size, struct iommu_iotlb_gather *gather)
{
	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1311,7 +1363,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
		return 0;

	arm_smmu_rpm_get(smmu);
	ret = ops->unmap(ops, iova, size);
	ret = ops->unmap(ops, iova, size, gather);
	arm_smmu_rpm_put(smmu);

	return ret;
@@ -1322,21 +1374,22 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct arm_smmu_device *smmu = smmu_domain->smmu;

	if (smmu_domain->tlb_ops) {
	if (smmu_domain->flush_ops) {
		arm_smmu_rpm_get(smmu);
		smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
		smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain);
		arm_smmu_rpm_put(smmu);
	}
}

static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
				struct iommu_iotlb_gather *gather)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct arm_smmu_device *smmu = smmu_domain->smmu;

	if (smmu_domain->tlb_ops) {
	if (smmu_domain->flush_ops) {
		arm_smmu_rpm_get(smmu);
		smmu_domain->tlb_ops->tlb_sync(smmu_domain);
		smmu_domain->flush_ops->tlb_sync(smmu_domain);
		arm_smmu_rpm_put(smmu);
	}
}
+7 −2
Original line number Diff line number Diff line
@@ -444,13 +444,18 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
	struct iommu_dma_cookie *cookie = domain->iova_cookie;
	struct iova_domain *iovad = &cookie->iovad;
	size_t iova_off = iova_offset(iovad, dma_addr);
	struct iommu_iotlb_gather iotlb_gather;
	size_t unmapped;

	dma_addr -= iova_off;
	size = iova_align(iovad, size + iova_off);
	iommu_iotlb_gather_init(&iotlb_gather);

	unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
	WARN_ON(unmapped != size);

	WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size);
	if (!cookie->fq_domain)
		iommu_tlb_sync(domain);
		iommu_tlb_sync(domain, &iotlb_gather);
	iommu_dma_free_iova(cookie, dma_addr, size);
}

Loading