Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 39debdc1 authored by Joerg Roedel's avatar Joerg Roedel
Browse files

Merge branch 'for-joerg/arm-smmu/updates' of...

Merge branch 'for-joerg/arm-smmu/updates' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into arm/smmu
parents 6fbc7275 9e6ea59f
Loading
Loading
Loading
Loading
+39 −19
Original line number Diff line number Diff line
@@ -191,6 +191,7 @@
#define Q_BASE_RWA			(1UL << 62)
#define Q_BASE_ADDR_MASK		GENMASK_ULL(51, 5)
#define Q_BASE_LOG2SIZE			GENMASK(4, 0)
#define Q_MAX_SZ_SHIFT			(PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)

/*
 * Stream table.
@@ -289,8 +290,9 @@
					FIELD_GET(ARM64_TCR_##fld, tcr))

/* Command queue */
#define CMDQ_ENT_DWORDS			2
#define CMDQ_MAX_SZ_SHIFT		8
#define CMDQ_ENT_SZ_SHIFT		4
#define CMDQ_ENT_DWORDS			((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
#define CMDQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)

#define CMDQ_CONS_ERR			GENMASK(30, 24)
#define CMDQ_ERR_CERROR_NONE_IDX	0
@@ -336,14 +338,16 @@
#define CMDQ_SYNC_1_MSIADDR_MASK	GENMASK_ULL(51, 2)

/* Event queue */
#define EVTQ_ENT_DWORDS			4
#define EVTQ_MAX_SZ_SHIFT		7
#define EVTQ_ENT_SZ_SHIFT		5
#define EVTQ_ENT_DWORDS			((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
#define EVTQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)

#define EVTQ_0_ID			GENMASK_ULL(7, 0)

/* PRI queue */
#define PRIQ_ENT_DWORDS			2
#define PRIQ_MAX_SZ_SHIFT		8
#define PRIQ_ENT_SZ_SHIFT		4
#define PRIQ_ENT_DWORDS			((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
#define PRIQ_MAX_SZ_SHIFT		(Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)

#define PRIQ_0_SID			GENMASK_ULL(31, 0)
#define PRIQ_0_SSID			GENMASK_ULL(51, 32)
@@ -798,7 +802,7 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
/* High-level queue accessors */
static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
{
	memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);

	switch (ent->opcode) {
@@ -1785,13 +1789,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
		.pgsize_bitmap	= smmu->pgsize_bitmap,
		.ias		= ias,
		.oas		= oas,
		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
		.tlb		= &arm_smmu_gather_ops,
		.iommu_dev	= smmu->dev,
	};

	if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;

	if (smmu_domain->non_strict)
		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;

@@ -2270,17 +2272,32 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
				   struct arm_smmu_queue *q,
				   unsigned long prod_off,
				   unsigned long cons_off,
				   size_t dwords)
				   size_t dwords, const char *name)
{
	size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
	size_t qsz;

	do {
		qsz = ((1 << q->max_n_shift) * dwords) << 3;
		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
					      GFP_KERNEL);
		if (q->base || qsz < PAGE_SIZE)
			break;

		q->max_n_shift--;
	} while (1);

	q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
	if (!q->base) {
		dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
			qsz);
		dev_err(smmu->dev,
			"failed to allocate queue (0x%zx bytes) for %s\n",
			qsz, name);
		return -ENOMEM;
	}

	if (!WARN_ON(q->base_dma & (qsz - 1))) {
		dev_info(smmu->dev, "allocated %u entries for %s\n",
			 1 << q->max_n_shift, name);
	}

	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
	q->ent_dwords	= dwords;
@@ -2300,13 +2317,15 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
	/* cmdq */
	spin_lock_init(&smmu->cmdq.lock);
	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
				      "cmdq");
	if (ret)
		return ret;

	/* evtq */
	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
				      "evtq");
	if (ret)
		return ret;

@@ -2315,7 +2334,8 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
		return 0;

	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
				       "priq");
}

static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
@@ -2879,7 +2899,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
		return -ENXIO;
	}

	/* Queue sizes, capped at 4k */
	/* Queue sizes, capped to ensure natural alignment */
	smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
					 FIELD_GET(IDR1_CMDQS, reg));
	if (!smmu->cmdq.q.max_n_shift) {
+1 −3
Original line number Diff line number Diff line
@@ -892,13 +892,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
		.pgsize_bitmap	= smmu->pgsize_bitmap,
		.ias		= ias,
		.oas		= oas,
		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
		.tlb		= smmu_domain->tlb_ops,
		.iommu_dev	= smmu->dev,
	};

	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;

	if (smmu_domain->non_strict)
		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;

+10 −7
Original line number Diff line number Diff line
@@ -204,7 +204,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
		dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
		goto out_free;
	}
	if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
	if (table && !cfg->coherent_walk) {
		dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
		if (dma_mapping_error(dev, dma))
			goto out_free;
@@ -238,7 +238,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
	struct device *dev = cfg->iommu_dev;
	size_t size = ARM_V7S_TABLE_SIZE(lvl);

	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
	if (!cfg->coherent_walk)
		dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
				 DMA_TO_DEVICE);
	if (lvl == 1)
@@ -250,7 +250,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
			       struct io_pgtable_cfg *cfg)
{
	if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)
	if (cfg->coherent_walk)
		return;

	dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
@@ -716,7 +716,6 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
			    IO_PGTABLE_QUIRK_NO_PERMS |
			    IO_PGTABLE_QUIRK_TLBI_ON_MAP |
			    IO_PGTABLE_QUIRK_ARM_MTK_4GB |
			    IO_PGTABLE_QUIRK_NO_DMA |
			    IO_PGTABLE_QUIRK_NON_STRICT))
		return NULL;

@@ -779,8 +778,11 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
	/* TTBRs */
	cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) |
				   ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS |
				   ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
				   ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA);
				   (cfg->coherent_walk ?
				   (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
				    ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) :
				   (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) |
				    ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC)));
	cfg->arm_v7s_cfg.ttbr[1] = 0;
	return &data->iop;

@@ -835,7 +837,8 @@ static int __init arm_v7s_do_selftests(void)
		.tlb = &dummy_tlb_ops,
		.oas = 32,
		.ias = 32,
		.quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA,
		.coherent_walk = true,
		.quirks = IO_PGTABLE_QUIRK_ARM_NS,
		.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
	};
	unsigned int iova, size, iova_start;
+25 −15
Original line number Diff line number Diff line
@@ -156,10 +156,12 @@
#define ARM_LPAE_MAIR_ATTR_MASK		0xff
#define ARM_LPAE_MAIR_ATTR_DEVICE	0x04
#define ARM_LPAE_MAIR_ATTR_NC		0x44
#define ARM_LPAE_MAIR_ATTR_INC_OWBRWA	0xf4
#define ARM_LPAE_MAIR_ATTR_WBRWA	0xff
#define ARM_LPAE_MAIR_ATTR_IDX_NC	0
#define ARM_LPAE_MAIR_ATTR_IDX_CACHE	1
#define ARM_LPAE_MAIR_ATTR_IDX_DEV	2
#define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE	3

#define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
#define ARM_MALI_LPAE_TTBR_READ_INNER	BIT(2)
@@ -239,7 +241,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
		return NULL;

	pages = page_address(p);
	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
	if (!cfg->coherent_walk) {
		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
		if (dma_mapping_error(dev, dma))
			goto out_free;
@@ -265,7 +267,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
static void __arm_lpae_free_pages(void *pages, size_t size,
				  struct io_pgtable_cfg *cfg)
{
	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
	if (!cfg->coherent_walk)
		dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
				 size, DMA_TO_DEVICE);
	free_pages((unsigned long)pages, get_order(size));
@@ -283,7 +285,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
{
	*ptep = pte;

	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
	if (!cfg->coherent_walk)
		__arm_lpae_sync_pte(ptep, cfg);
}

@@ -361,8 +363,7 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,

	old = cmpxchg64_relaxed(ptep, curr, new);

	if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) ||
	    (old & ARM_LPAE_PTE_SW_SYNC))
	if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC))
		return old;

	/* Even if it's not ours, there's no point waiting; just kick it */
@@ -403,8 +404,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
		pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
		if (pte)
			__arm_lpae_free_pages(cptep, tblsz, cfg);
	} else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) &&
		   !(pte & ARM_LPAE_PTE_SW_SYNC)) {
	} else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
		__arm_lpae_sync_pte(ptep, cfg);
	}

@@ -459,6 +459,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
		else if (prot & IOMMU_CACHE)
			pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
		else if (prot & IOMMU_QCOM_SYS_CACHE)
			pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE
				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
	}

	if (prot & IOMMU_NOEXEC)
@@ -783,7 +786,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
	u64 reg;
	struct arm_lpae_io_pgtable *data;

	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA |
	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
			    IO_PGTABLE_QUIRK_NON_STRICT))
		return NULL;

@@ -792,9 +795,15 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
		return NULL;

	/* TCR */
	if (cfg->coherent_walk) {
		reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
		      (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
		      (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
	} else {
		reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) |
		      (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) |
		      (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT);
	}

	switch (ARM_LPAE_GRANULE(data)) {
	case SZ_4K:
@@ -846,7 +855,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
	      (ARM_LPAE_MAIR_ATTR_WBRWA
	       << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
	      (ARM_LPAE_MAIR_ATTR_DEVICE
	       << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
	       << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
	      (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
	       << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));

	cfg->arm_lpae_s1_cfg.mair[0] = reg;
	cfg->arm_lpae_s1_cfg.mair[1] = 0;
@@ -876,8 +887,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
	struct arm_lpae_io_pgtable *data;

	/* The NS quirk doesn't apply at stage 2 */
	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA |
			    IO_PGTABLE_QUIRK_NON_STRICT))
	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT))
		return NULL;

	data = arm_lpae_alloc_pgtable(cfg);
@@ -1212,7 +1222,7 @@ static int __init arm_lpae_do_selftests(void)
	struct io_pgtable_cfg cfg = {
		.tlb = &dummy_tlb_ops,
		.oas = 48,
		.quirks = IO_PGTABLE_QUIRK_NO_DMA,
		.coherent_walk = true,
	};

	for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
+1 −0
Original line number Diff line number Diff line
@@ -431,6 +431,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
	 * TODO: Add support for coherent walk through CCI with DVM and remove
	 * cache handling. For now, delegate it to the io-pgtable code.
	 */
	domain->cfg.coherent_walk = false;
	domain->cfg.iommu_dev = domain->mmu->root->dev;

	/*
Loading