Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit da765c6c authored by Patrick Daly's avatar Patrick Daly
Browse files

iommu: arm-smmu: Implement qsmmuv500 workaround



While a tlb flush is in progress, clients may experience suboptimal
performance. To mitigate this, add a minimum delay between tlb
operations for real-time clients. This is implemented using a global
lock between HLOS and other execution enviroments.

Change-Id: I7da0c3e2546268f2ef294aa82132ec95e8fdb98f
Signed-off-by: default avatarPatrick Daly <pdaly@codeaurora.org>
parent c70d827b
Loading
Loading
Loading
Loading
+4 −0
Original line number Original line Diff line number Diff line
@@ -100,6 +100,10 @@ conditions.
		  Some hardware may not have full support for atos debugging
		  Some hardware may not have full support for atos debugging
		  in tandem with other features like power collapse.
		  in tandem with other features like power collapse.


- qcom,mmu500-errata-1:
		  An array of <sid mask>.
		  Indicates the SIDs for which the workaround is required.

- qcom,deferred-regulator-disable-delay : The time delay for deferred regulator
- qcom,deferred-regulator-disable-delay : The time delay for deferred regulator
                  disable in ms. In case of unmap call, regulator is
                  disable in ms. In case of unmap call, regulator is
                  enabled/disabled. This may introduce additional delay. For
                  enabled/disabled. This may introduce additional delay. For
+177 −3
Original line number Original line Diff line number Diff line
@@ -51,6 +51,9 @@
#include <linux/of_platform.h>
#include <linux/of_platform.h>
#include <linux/msm-bus.h>
#include <linux/msm-bus.h>
#include <dt-bindings/msm/msm-bus-ids.h>
#include <dt-bindings/msm/msm-bus-ids.h>
#include <linux/remote_spinlock.h>
#include <linux/ktime.h>
#include <trace/events/iommu.h>


#include <linux/amba/bus.h>
#include <linux/amba/bus.h>


@@ -428,6 +431,7 @@ struct arm_smmu_device {
#define ARM_SMMU_OPT_3LVL_TABLES	(1 << 4)
#define ARM_SMMU_OPT_3LVL_TABLES	(1 << 4)
#define ARM_SMMU_OPT_NO_ASID_RETENTION	(1 << 5)
#define ARM_SMMU_OPT_NO_ASID_RETENTION	(1 << 5)
#define ARM_SMMU_OPT_DISABLE_ATOS	(1 << 6)
#define ARM_SMMU_OPT_DISABLE_ATOS	(1 << 6)
#define ARM_SMMU_OPT_QCOM_MMU500_ERRATA1	(1 << 7)
	u32				options;
	u32				options;
	enum arm_smmu_arch_version	version;
	enum arm_smmu_arch_version	version;
	enum arm_smmu_implementation	model;
	enum arm_smmu_implementation	model;
@@ -527,6 +531,9 @@ struct arm_smmu_domain {
	struct mutex			assign_lock;
	struct mutex			assign_lock;
	struct list_head		secure_pool_list;
	struct list_head		secure_pool_list;
	struct iommu_domain		domain;
	struct iommu_domain		domain;

	bool				qsmmuv500_errata1_init;
	bool				qsmmuv500_errata1_client;
};
};


static DEFINE_SPINLOCK(arm_smmu_devices_lock);
static DEFINE_SPINLOCK(arm_smmu_devices_lock);
@@ -549,6 +556,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
	{ ARM_SMMU_OPT_3LVL_TABLES, "qcom,use-3-lvl-tables" },
	{ ARM_SMMU_OPT_3LVL_TABLES, "qcom,use-3-lvl-tables" },
	{ ARM_SMMU_OPT_NO_ASID_RETENTION, "qcom,no-asid-retention" },
	{ ARM_SMMU_OPT_NO_ASID_RETENTION, "qcom,no-asid-retention" },
	{ ARM_SMMU_OPT_DISABLE_ATOS, "qcom,disable-atos" },
	{ ARM_SMMU_OPT_DISABLE_ATOS, "qcom,disable-atos" },
	{ ARM_SMMU_OPT_QCOM_MMU500_ERRATA1, "qcom,mmu500-errata-1" },
	{ 0, NULL},
	{ 0, NULL},
};
};


@@ -574,6 +582,7 @@ static int arm_smmu_enable_s1_translations(struct arm_smmu_domain *smmu_domain);
static int arm_smmu_alloc_cb(struct iommu_domain *domain,
static int arm_smmu_alloc_cb(struct iommu_domain *domain,
				struct arm_smmu_device *smmu,
				struct arm_smmu_device *smmu,
				struct device *dev);
				struct device *dev);
static struct iommu_gather_ops qsmmuv500_errata1_smmu_gather_ops;


static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
{
{
@@ -1228,11 +1237,12 @@ static phys_addr_t arm_smmu_verify_fault(struct iommu_domain *domain,
{
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	const struct iommu_gather_ops *tlb = smmu_domain->pgtbl_cfg.tlb;
	phys_addr_t phys;
	phys_addr_t phys;
	phys_addr_t phys_post_tlbiall;
	phys_addr_t phys_post_tlbiall;


	phys = arm_smmu_iova_to_phys_hard(domain, iova);
	phys = arm_smmu_iova_to_phys_hard(domain, iova);
	arm_smmu_tlb_inv_context(smmu_domain);
	tlb->tlb_flush_all(smmu_domain);
	phys_post_tlbiall = arm_smmu_iova_to_phys_hard(domain, iova);
	phys_post_tlbiall = arm_smmu_iova_to_phys_hard(domain, iova);


	if (phys != phys_post_tlbiall) {
	if (phys != phys_post_tlbiall) {
@@ -1588,6 +1598,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
	bool is_fast = smmu_domain->attributes & (1 << DOMAIN_ATTR_FAST);
	bool is_fast = smmu_domain->attributes & (1 << DOMAIN_ATTR_FAST);
	unsigned long quirks = 0;
	unsigned long quirks = 0;
	bool dynamic;
	bool dynamic;
	const struct iommu_gather_ops *tlb;


	mutex_lock(&smmu_domain->init_mutex);
	mutex_lock(&smmu_domain->init_mutex);
	if (smmu_domain->smmu)
	if (smmu_domain->smmu)
@@ -1703,6 +1714,10 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
	if (is_iommu_pt_coherent(smmu_domain))
	if (is_iommu_pt_coherent(smmu_domain))
		quirks |= IO_PGTABLE_QUIRK_PAGE_TABLE_COHERENT;
		quirks |= IO_PGTABLE_QUIRK_PAGE_TABLE_COHERENT;


	tlb = &arm_smmu_gather_ops;
	if (smmu->options & ARM_SMMU_OPT_QCOM_MMU500_ERRATA1)
		tlb = &qsmmuv500_errata1_smmu_gather_ops;

	ret = arm_smmu_alloc_cb(domain, smmu, dev);
	ret = arm_smmu_alloc_cb(domain, smmu, dev);
	if (ret < 0)
	if (ret < 0)
		goto out_unlock;
		goto out_unlock;
@@ -1720,7 +1735,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
		.pgsize_bitmap	= smmu->pgsize_bitmap,
		.pgsize_bitmap	= smmu->pgsize_bitmap,
		.ias		= ias,
		.ias		= ias,
		.oas		= oas,
		.oas		= oas,
		.tlb		= &arm_smmu_gather_ops,
		.tlb		= tlb,
		.iommu_dev	= smmu->dev,
		.iommu_dev	= smmu->dev,
	};
	};


@@ -3138,7 +3153,10 @@ static void arm_smmu_reg_write(struct iommu_domain *domain,


static void arm_smmu_tlbi_domain(struct iommu_domain *domain)
static void arm_smmu_tlbi_domain(struct iommu_domain *domain)
{
{
	arm_smmu_tlb_inv_context(to_smmu_domain(domain));
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	const struct iommu_gather_ops *tlb = smmu_domain->pgtbl_cfg.tlb;

	tlb->tlb_flush_all(smmu_domain);
}
}


static int arm_smmu_enable_config_clocks(struct iommu_domain *domain)
static int arm_smmu_enable_config_clocks(struct iommu_domain *domain)
@@ -4300,6 +4318,14 @@ struct qsmmuv500_archdata {
	struct list_head		tbus;
	struct list_head		tbus;
	void __iomem			*tcu_base;
	void __iomem			*tcu_base;
	u32				version;
	u32				version;

	struct actlr_setting		*actlrs;
	u32				actlr_tbl_size;

	struct arm_smmu_smr		*errata1_clients;
	u32				num_errata1_clients;
	remote_spinlock_t		errata1_lock;
	ktime_t				last_tlbi_ktime;
};
};
#define get_qsmmuv500_archdata(smmu)				\
#define get_qsmmuv500_archdata(smmu)				\
	((struct qsmmuv500_archdata *)(smmu->archdata))
	((struct qsmmuv500_archdata *)(smmu->archdata))
@@ -4320,6 +4346,118 @@ struct qsmmuv500_tbu_device {
	u32				halt_count;
	u32				halt_count;
};
};


static bool arm_smmu_domain_match_smr(struct arm_smmu_domain *smmu_domain,
				      struct arm_smmu_smr *smr)
{
	struct arm_smmu_smr *smr2;
	int i, idx;

	for_each_cfg_sme(smmu_domain->dev->iommu_fwspec, i, idx) {
		smr2 = &smmu_domain->smmu->smrs[idx];
		/* Continue if table entry does not match */
		if ((smr->id ^ smr2->id) & ~(smr->mask | smr2->mask))
			continue;
		return true;
	}
	return false;
}

#define ERRATA1_REMOTE_SPINLOCK       "S:6"
#define ERRATA1_TLBI_INTERVAL_US		10
static bool
qsmmuv500_errata1_required(struct arm_smmu_domain *smmu_domain,
				 struct qsmmuv500_archdata *data)
{
	bool ret = false;
	int j;
	struct arm_smmu_smr *smr;

	if (smmu_domain->qsmmuv500_errata1_init)
		return smmu_domain->qsmmuv500_errata1_client;

	for (j = 0; j < data->num_errata1_clients; j++) {
		smr = &data->errata1_clients[j];
		if (arm_smmu_domain_match_smr(smmu_domain, smr)) {
			ret = true;
			break;
		}
	}

	smmu_domain->qsmmuv500_errata1_init = true;
	smmu_domain->qsmmuv500_errata1_client = ret;
	return ret;
}

static void __qsmmuv500_errata1_tlbiall(struct arm_smmu_domain *smmu_domain)
{
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct device *dev = smmu_domain->dev;
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
	void __iomem *base;
	ktime_t cur;
	u32 val;

	base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
	writel_relaxed(0, base + ARM_SMMU_CB_S1_TLBIALL);
	writel_relaxed(0, base + ARM_SMMU_CB_TLBSYNC);
	if (readl_poll_timeout_atomic(base + ARM_SMMU_CB_TLBSTATUS, val,
				      !(val & TLBSTATUS_SACTIVE), 0, 100)) {
		cur = ktime_get();
		trace_errata_throttle_start(dev, 0);

		msm_bus_noc_throttle_wa(true);
		if (readl_poll_timeout_atomic(base + ARM_SMMU_CB_TLBSTATUS, val,
				      !(val & TLBSTATUS_SACTIVE), 0, 10000)) {
			dev_err(smmu->dev, "ERRATA1 TLBSYNC timeout");
			trace_errata_failed(dev, 0);
		}

		msm_bus_noc_throttle_wa(false);

		trace_errata_throttle_end(
				dev, ktime_us_delta(ktime_get(), cur));
	}
}

/* Must be called with clocks/regulators enabled */
static void qsmmuv500_errata1_tlb_inv_context(void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
	struct device *dev = smmu_domain->dev;
	struct qsmmuv500_archdata *data =
			get_qsmmuv500_archdata(smmu_domain->smmu);
	ktime_t cur;
	bool errata;

	cur = ktime_get();
	trace_errata_tlbi_start(dev, 0);

	errata = qsmmuv500_errata1_required(smmu_domain, data);
	remote_spin_lock(&data->errata1_lock);
	if (errata) {
		s64 delta;

		delta = ktime_us_delta(ktime_get(), data->last_tlbi_ktime);
		if (delta < ERRATA1_TLBI_INTERVAL_US)
			udelay(ERRATA1_TLBI_INTERVAL_US - delta);

		__qsmmuv500_errata1_tlbiall(smmu_domain);

		data->last_tlbi_ktime = ktime_get();
	} else {
		__qsmmuv500_errata1_tlbiall(smmu_domain);
	}
	remote_spin_unlock(&data->errata1_lock);

	trace_errata_tlbi_end(dev, ktime_us_delta(ktime_get(), cur));
}

static struct iommu_gather_ops qsmmuv500_errata1_smmu_gather_ops = {
	.tlb_flush_all	= qsmmuv500_errata1_tlb_inv_context,
	.alloc_pages_exact = arm_smmu_alloc_pages_exact,
	.free_pages_exact = arm_smmu_free_pages_exact,
};

static int qsmmuv500_tbu_halt(struct qsmmuv500_tbu_device *tbu)
static int qsmmuv500_tbu_halt(struct qsmmuv500_tbu_device *tbu)
{
{
	unsigned long flags;
	unsigned long flags;
@@ -4592,6 +4730,38 @@ static int qsmmuv500_tbu_register(struct device *dev, void *cookie)
	return 0;
	return 0;
}
}


static int qsmmuv500_parse_errata1(struct arm_smmu_device *smmu)
{
	int len, i;
	struct device *dev = smmu->dev;
	struct qsmmuv500_archdata *data = get_qsmmuv500_archdata(smmu);
	struct arm_smmu_smr *smrs;
	const __be32 *cell;

	cell = of_get_property(dev->of_node, "qcom,mmu500-errata-1", NULL);
	if (!cell)
		return 0;

	remote_spin_lock_init(&data->errata1_lock, ERRATA1_REMOTE_SPINLOCK);
	len = of_property_count_elems_of_size(
			dev->of_node, "qcom,mmu500-errata-1", sizeof(u32) * 2);
	if (len < 0)
		return 0;

	smrs = devm_kzalloc(dev, sizeof(*smrs) * len, GFP_KERNEL);
	if (!smrs)
		return -ENOMEM;

	for (i = 0; i < len; i++) {
		smrs[i].id = of_read_number(cell++, 1);
		smrs[i].mask = of_read_number(cell++, 1);
	}

	data->errata1_clients = smrs;
	data->num_errata1_clients = len;
	return 0;
}

static int qsmmuv500_arch_init(struct arm_smmu_device *smmu)
static int qsmmuv500_arch_init(struct arm_smmu_device *smmu)
{
{
	struct resource *res;
	struct resource *res;
@@ -4615,6 +4785,10 @@ static int qsmmuv500_arch_init(struct arm_smmu_device *smmu)
	data->version = readl_relaxed(data->tcu_base + TCU_HW_VERSION_HLOS1);
	data->version = readl_relaxed(data->tcu_base + TCU_HW_VERSION_HLOS1);
	smmu->archdata = data;
	smmu->archdata = data;


	ret = qsmmuv500_parse_errata1(smmu);
	if (ret)
		return ret;

	ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
	ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
	if (ret)
	if (ret)
		return ret;
		return ret;
+56 −0
Original line number Original line Diff line number Diff line
@@ -161,6 +161,62 @@ DEFINE_EVENT(iommu_error, io_page_fault,


	TP_ARGS(dev, iova, flags)
	TP_ARGS(dev, iova, flags)
);
);

DECLARE_EVENT_CLASS(iommu_errata_tlbi,

	TP_PROTO(struct device *dev, u64 time),

	TP_ARGS(dev, time),

	TP_STRUCT__entry(
		__string(device, dev_name(dev))
		__field(u64, time)
	),

	TP_fast_assign(
		__assign_str(device, dev_name(dev));
		__entry->time = time;
	),

	TP_printk("IOMMU:%s %lld us",
			__get_str(device), __entry->time
	)
);

DEFINE_EVENT(iommu_errata_tlbi, errata_tlbi_start,

	TP_PROTO(struct device *dev, u64 time),

	TP_ARGS(dev, time)
);

DEFINE_EVENT(iommu_errata_tlbi, errata_tlbi_end,

	TP_PROTO(struct device *dev, u64 time),

	TP_ARGS(dev, time)
);

DEFINE_EVENT(iommu_errata_tlbi, errata_throttle_start,

	TP_PROTO(struct device *dev, u64 time),

	TP_ARGS(dev, time)
);

DEFINE_EVENT(iommu_errata_tlbi, errata_throttle_end,

	TP_PROTO(struct device *dev, u64 time),

	TP_ARGS(dev, time)
);

DEFINE_EVENT(iommu_errata_tlbi, errata_failed,

	TP_PROTO(struct device *dev, u64 time),

	TP_ARGS(dev, time)
);
#endif /* _TRACE_IOMMU_H */
#endif /* _TRACE_IOMMU_H */


/* This part must be outside protection */
/* This part must be outside protection */