Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fb3f2510 authored by Isaac J. Manjarres's avatar Isaac J. Manjarres
Browse files

iommu/arm-smmu: Merge all IOMMU changes from msm-4.19 to msm-lahaina



This patch merges all of the IOMMU/SMMU, DMA mapping, fast, and
lazy mapping changes from msm-4.19 into msm-lahaina.

Change-Id: If7c1f641a8c836dbb799e2f3439f443ff299b299
Signed-off-by: default avatarIsaac J. Manjarres <isaacm@codeaurora.org>
parent b18bea50
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -907,6 +907,27 @@ config ARCH_WANT_HUGE_PMD_SHARE
config ARCH_HAS_CACHE_LINE_SIZE
	def_bool y

if ARM64 && IOMMU_DMA

config ARM64_DMA_IOMMU_ALIGNMENT
	int "Maximum PAGE_SIZE order of alignment for DMA IOMMU buffers"
	range 4 9
	default 9
	help
	  DMA mapping framework by default aligns all buffers to the smallest
	  PAGE_SIZE order which is greater than or equal to the requested buffer
	  size. This works well for buffers up to a few hundreds kilobytes, but
	  for larger buffers it just a waste of address space. Drivers which has
	  relatively small addressing window (like 64Mib) might run out of
	  virtual space with just a few allocations.

	  With this parameter you can specify the maximum PAGE_SIZE order for
	  DMA IOMMU buffers. Larger buffers will be aligned only to this
	  specified order. The order is expressed as a power of two multiplied
	  by the PAGE_SIZE.

endif

config ARCH_ENABLE_SPLIT_PMD_PTLOCK
	def_bool y if PGTABLE_LEVELS > 2

+98 −47
Original line number Diff line number Diff line
@@ -16,16 +16,32 @@
#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
#include <linux/dma-contiguous.h>
#include <linux/iommu.h>
#include <linux/vmalloc.h>
#include <linux/swiotlb.h>
#include <linux/pci.h>

#include <asm/cacheflush.h>
#include <linux/of_address.h>
#include <linux/dma-mapping-fast.h>


static bool is_dma_coherent(struct device *dev, unsigned long attrs)
{
	if (attrs & DMA_ATTR_FORCE_COHERENT)
		return true;
	else if (attrs & DMA_ATTR_FORCE_NON_COHERENT)
		return false;
	else if (dev_is_dma_coherent(dev))
		return true;
	else
		return false;
}

pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
		unsigned long attrs)
{
	if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE))
	if (!is_dma_coherent(dev, attrs) || (attrs & DMA_ATTR_WRITE_COMBINE))
		return pgprot_writecombine(prot);
	return prot;
}
@@ -103,7 +119,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
				 dma_addr_t *handle, gfp_t gfp,
				 unsigned long attrs)
{
	bool coherent = dev_is_dma_coherent(dev);
	bool coherent = is_dma_coherent(dev, attrs);
	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
	size_t iosize = size;
	void *addr;
@@ -117,6 +133,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
	 * Some drivers rely on this, and we probably don't want the
	 * possibility of stale kernel data being read by devices anyway.
	 */
	if (!(attrs & DMA_ATTR_SKIP_ZEROING))
		gfp |= __GFP_ZERO;

	if (!gfpflags_allow_blocking(gfp)) {
@@ -232,31 +249,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
{
	struct vm_struct *area;
	int ret;
	unsigned long pfn = 0;

	vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);

	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
		return ret;

	if (!is_vmalloc_addr(cpu_addr)) {
		unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr));
		return __swiotlb_mmap_pfn(vma, pfn, size);
	}
	area = find_vm_area(cpu_addr);

	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
	if (area && area->pages)
		return iommu_dma_mmap(area->pages, size, vma);
	else if (!is_vmalloc_addr(cpu_addr))
		pfn = page_to_pfn(virt_to_page(cpu_addr));
	else if (is_vmalloc_addr(cpu_addr))
		/*
		 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
		 * hence in the vmalloc space.
		 * DMA_ATTR_FORCE_CONTIGUOUS and atomic pool allocations are
		 * always remapped, hence in the vmalloc space.
		 */
		unsigned long pfn = vmalloc_to_pfn(cpu_addr);
		pfn = vmalloc_to_pfn(cpu_addr);

	if (pfn)
		return __swiotlb_mmap_pfn(vma, pfn, size);
	}

	area = find_vm_area(cpu_addr);
	if (WARN_ON(!area || !area->pages))
	return -ENXIO;

	return iommu_dma_mmap(area->pages, size, vma);
}

static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
@@ -264,27 +280,24 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
			       size_t size, unsigned long attrs)
{
	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
	struct page *page = NULL;
	struct vm_struct *area = find_vm_area(cpu_addr);

	if (!is_vmalloc_addr(cpu_addr)) {
		struct page *page = virt_to_page(cpu_addr);
		return __swiotlb_get_sgtable_page(sgt, page, size);
	}

	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
	if (area && area->pages)
		return sg_alloc_table_from_pages(sgt, area->pages, count, 0,
					size, GFP_KERNEL);
	else if (!is_vmalloc_addr(cpu_addr))
		page = virt_to_page(cpu_addr);
	else if (is_vmalloc_addr(cpu_addr))
		/*
		 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
		 * hence in the vmalloc space.
		 * DMA_ATTR_FORCE_CONTIGUOUS and atomic pool allocations
		 * are always remapped, hence in the vmalloc space.
		 */
		struct page *page = vmalloc_to_page(cpu_addr);
		return __swiotlb_get_sgtable_page(sgt, page, size);
	}
		page = vmalloc_to_page(cpu_addr);

	if (WARN_ON(!area || !area->pages))
	if (page)
		return __swiotlb_get_sgtable_page(sgt, page, size);
	return -ENXIO;

	return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
					 GFP_KERNEL);
}

static void __iommu_sync_single_for_cpu(struct device *dev,
@@ -292,11 +305,12 @@ static void __iommu_sync_single_for_cpu(struct device *dev,
					enum dma_data_direction dir)
{
	phys_addr_t phys;
	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);

	if (dev_is_dma_coherent(dev))
	if (!domain || iommu_is_iova_coherent(domain, dev_addr))
		return;

	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
	phys = iommu_iova_to_phys(domain, dev_addr);
	arch_sync_dma_for_cpu(dev, phys, size, dir);
}

@@ -305,11 +319,12 @@ static void __iommu_sync_single_for_device(struct device *dev,
					   enum dma_data_direction dir)
{
	phys_addr_t phys;
	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);

	if (dev_is_dma_coherent(dev))
	if (!domain || iommu_is_iova_coherent(domain, dev_addr))
		return;

	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
	phys = iommu_iova_to_phys(domain, dev_addr);
	arch_sync_dma_for_device(dev, phys, size, dir);
}

@@ -318,7 +333,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
				   enum dma_data_direction dir,
				   unsigned long attrs)
{
	bool coherent = dev_is_dma_coherent(dev);
	bool coherent = is_dma_coherent(dev, attrs);
	int prot = dma_info_to_prot(dir, coherent, attrs);
	dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);

@@ -344,9 +359,11 @@ static void __iommu_sync_sg_for_cpu(struct device *dev,
				    enum dma_data_direction dir)
{
	struct scatterlist *sg;
	dma_addr_t iova = sg_dma_address(sgl);
	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
	int i;

	if (dev_is_dma_coherent(dev))
	if (!domain || iommu_is_iova_coherent(domain, iova))
		return;

	for_each_sg(sgl, sg, nelems, i)
@@ -358,9 +375,11 @@ static void __iommu_sync_sg_for_device(struct device *dev,
				       enum dma_data_direction dir)
{
	struct scatterlist *sg;
	dma_addr_t iova = sg_dma_address(sgl);
	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
	int i;

	if (dev_is_dma_coherent(dev))
	if (!domain || iommu_is_iova_coherent(domain, iova))
		return;

	for_each_sg(sgl, sg, nelems, i)
@@ -371,13 +390,18 @@ static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
				int nelems, enum dma_data_direction dir,
				unsigned long attrs)
{
	bool coherent = dev_is_dma_coherent(dev);
	bool coherent = is_dma_coherent(dev, attrs);
	int ret;

	ret =  iommu_dma_map_sg(dev, sgl, nelems,
				dma_info_to_prot(dir, coherent, attrs));
	if (!ret)
		return ret;

	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
		__iommu_sync_sg_for_device(dev, sgl, nelems, dir);

	return iommu_dma_map_sg(dev, sgl, nelems,
				dma_info_to_prot(dir, coherent, attrs));
	return ret;
}

static void __iommu_unmap_sg_attrs(struct device *dev,
@@ -414,10 +438,30 @@ static int __init __iommu_dma_init(void)
}
arch_initcall(__iommu_dma_init);

static int __iommu_init_dma_resources(struct device *dev,
				      struct iommu_domain *domain, u64 dma_base,
				      u64 size)
{
	int is_fast, ret = 0;

	iommu_domain_get_attr(domain, DOMAIN_ATTR_FAST, &is_fast);

	if (is_fast) {
		dev->dma_ops = fast_smmu_get_dma_ops();
	} else {
		ret = iommu_dma_init_domain(domain, dma_base, size, dev);
		if (!ret)
			dev->dma_ops = &iommu_dma_ops;
	}

	return ret;
}

static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
				  const struct iommu_ops *ops)
{
	struct iommu_domain *domain;
	int s1_bypass;

	if (!ops)
		return;
@@ -431,13 +475,20 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
	if (!domain)
		goto out_err;

	if (domain->type == IOMMU_DOMAIN_DMA) {
		if (iommu_dma_init_domain(domain, dma_base, size, dev))
			goto out_err;
	iommu_domain_get_attr(domain, DOMAIN_ATTR_S1_BYPASS, &s1_bypass);
	if (s1_bypass)
		return;

		dev->dma_ops = &iommu_dma_ops;
	/* Allow iommu-debug to call arch_setup_dma_ops to reconfigure itself */
	if (domain->type != IOMMU_DOMAIN_DMA &&
	    !of_device_is_compatible(dev->of_node, "iommu-debug-test")) {
		dev_err(dev, "Invalid iommu domain type!\n");
		return;
	}

	if (__iommu_init_dma_resources(dev, domain, dma_base, size))
		goto out_err;

	return;

out_err:
+116 −0
Original line number Diff line number Diff line
@@ -63,6 +63,58 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST

	  If unsure, say N here.

config IOMMU_IO_PGTABLE_FAST
	bool "Fast ARMv7/v8 Long Descriptor Format"
	depends on (ARM || ARM64) && IOMMU_DMA
	help
          Enable support for a subset of the ARM long descriptor pagetable
	  format.  This allocator achieves fast performance by
	  pre-allocating and pre-populating page table memory up front.
	  only supports a 32 bit virtual address space.

          This implementation is mainly optimized for use cases where the
          buffers are small (<= 64K) since it only supports 4K page sizes.

config IOMMU_IO_PGTABLE_FAST_SELFTEST
	bool "Fast IO pgtable selftests"
	depends on IOMMU_IO_PGTABLE_FAST
	help
	  Enable self-tests for "fast" page table allocator.
	  This performs a series of page-table consistency checks
	  during boot.

	  If unsure, say N here.

config IOMMU_IO_PGTABLE_FAST_PROVE_TLB
	bool "Prove correctness of TLB maintenance in the Fast DMA mapper"
	depends on IOMMU_IO_PGTABLE_FAST
	help
          Enables some debug features that help prove correctness of TLB
          maintenance routines in the Fast DMA mapper.  This option will
          slow things down considerably, so should only be used in a debug
          configuration.  This relies on the ability to set bits in an
          invalid page table entry, which is disallowed on some hardware
          due to errata.  If you're running on such a platform then this
          option can only be used with unit tests.  It will break real use
          cases.

	  If unsure, say N here.

config QCOM_IOMMU_IO_PGTABLE_QUIRKS
	bool "IO Pagetable quirks for performance"
	depends on ARM || ARM64
	depends on IOMMU_IO_PGTABLE_FAST || IOMMU_IO_PGTABLE_LPAE
	depends on ARM_SMMU
	help
	  Enables some quirks that are used when creating the IOMMU's
	  page tables for a particular domain for faster translations.
	  The quirks that are supported deal with allowing for page
	  tables to be IO-coherent, allowing for page tables to be
	  saved in the system cache, and disabling the write-allocate
	  hint when saving page tables in the system cache.

	  If unsure, say Y here.

endmenu

config IOMMU_DEBUGFS
@@ -399,6 +451,38 @@ config ARM_SMMU_V3
	  Say Y here if your system includes an IOMMU device implementing
	  the ARM SMMUv3 architecture.

config ARM_SMMU_SELFTEST
	bool "ARM SMMU self test support"
	depends on ARM_SMMU
	help
	  Enables self tests for arm smmu. Tests basic hardware
	  configurations like interrupts. Note that enabling this
	  option can marginally increase the boot time.

	  If unsure, say N here.

config IOMMU_TLBSYNC_DEBUG
	bool "TLB sync timeout debug"
	depends on ARM_SMMU
	help
	  Enables to collect the SMMU system state information right
	  after the first TLB sync timeout failure by calling BUG().
	  Note to use this only on debug builds.

	  If unsure, say N here.

config QCOM_LAZY_MAPPING
	tristate "Reference counted iommu-mapping support"
	depends on ION
	depends on IOMMU_API
	help
	  ION buffers may be shared between several software clients.
	  Reference counting the mapping may simplify coordination between
	  these clients, and decrease latency by preventing multiple
	  map/unmaps of the same region.

	  If unsure, say N here.

config S390_IOMMU
	def_bool y if S390 && PCI
	depends on S390 && PCI
@@ -454,6 +538,38 @@ config MTK_IOMMU_V1

	  if unsure, say N here.

menuconfig IOMMU_DEBUG
	bool "IOMMU Profiling and Debugging"
	help
	  This option is used to enable profiling and debugging in
	  the IOMMU framework code. IOMMU profiling and debugging
	  can be done through the debugfs nodes which this option
	  makes available.

if IOMMU_DEBUG

config IOMMU_DEBUG_TRACKING
	bool "Track key IOMMU events"
	select IOMMU_API
	help
	  Enables additional debug tracking in the IOMMU framework code.
	  Tracking information and tests can be accessed through various
	  debugfs files.

	  Say Y here if you need to debug IOMMU issues and are okay with
	  the performance penalty of the tracking.

config IOMMU_TESTS
	bool "Interactive IOMMU performance/functional tests"
	select IOMMU_API
	help
	  Enables a suite of IOMMU unit tests.  The tests are runnable
	  through debugfs.  Unlike the IOMMU_DEBUG_TRACKING option, the
	  impact of enabling this option to overal system performance
	  should be minimal.

endif # IOMMU_DEBUG

config QCOM_IOMMU
	# Note: iommu drivers cannot (yet?) be built as modules
	bool "Qualcomm IOMMU Support"
+3 −0
Original line number Diff line number Diff line
@@ -4,11 +4,14 @@ obj-$(CONFIG_IOMMU_API) += iommu-traces.o
obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o
obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
obj-$(CONFIG_QCOM_LAZY_MAPPING) += msm_dma_iommu_mapping.o
obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
obj-$(CONFIG_IOMMU_IOVA) += iova.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_FAST) += io-pgtable-fast.o dma-mapping-fast.o
obj-$(CONFIG_OF_IOMMU)	+= of_iommu.o
obj-$(CONFIG_IOMMU_DEBUG) += iommu-debug.o
obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
+28 −1
Original line number Diff line number Diff line
@@ -25,6 +25,9 @@
#define sCR0_VMID16EN			(1 << 31)
#define sCR0_BSU_SHIFT			14
#define sCR0_BSU_MASK			0x3
#define sCR0_SHCFG_SHIFT		22
#define sCR0_SHCFG_MASK			0x3
#define sCR0_SHCFG_NSH			3

/* Auxiliary Configuration register */
#define ARM_SMMU_GR0_sACR		0x10
@@ -93,6 +96,8 @@
#define ARM_SMMU_GR0_SMR(n)		(0x800 + ((n) << 2))
#define SMR_VALID			(1 << 31)
#define SMR_MASK_SHIFT			16
#define SMR_MASK_MASK			0x7FFF
#define SID_MASK			0x7FFF
#define SMR_ID_SHIFT			0

#define ARM_SMMU_GR0_S2CR(n)		(0xc00 + ((n) << 2))
@@ -101,6 +106,9 @@
#define S2CR_EXIDVALID			(1 << 10)
#define S2CR_TYPE_SHIFT			16
#define S2CR_TYPE_MASK			0x3
#define S2CR_SHCFG_SHIFT		8
#define S2CR_SHCFG_MASK			0x3
#define S2CR_SHCFG_NSH			0x3
enum arm_smmu_s2cr_type {
	S2CR_TYPE_TRANS,
	S2CR_TYPE_BYPASS,
@@ -136,6 +144,7 @@ enum arm_smmu_s2cr_privcfg {
#define CBAR_IRPTNDX_MASK		0xff

#define ARM_SMMU_GR1_CBFRSYNRA(n)	(0x400 + ((n) << 2))
#define CBFRSYNRA_SID_MASK		(0xffff)

#define ARM_SMMU_GR1_CBA2R(n)		(0x800 + ((n) << 2))
#define CBA2R_RW64_32BIT		(0 << 0)
@@ -155,20 +164,38 @@ enum arm_smmu_s2cr_privcfg {
#define ARM_SMMU_CB_S1_MAIR1		0x3c
#define ARM_SMMU_CB_PAR			0x50
#define ARM_SMMU_CB_FSR			0x58
#define ARM_SMMU_CB_FSRRESTORE		0x5c
#define ARM_SMMU_CB_FAR			0x60
#define ARM_SMMU_CB_FSYNR0		0x68
#define ARM_SMMU_CB_FSYNR1		0x6c
#define ARM_SMMU_CB_S1_TLBIVA		0x600
#define ARM_SMMU_CB_S1_TLBIASID		0x610
#define ARM_SMMU_CB_S1_TLBIALL		0x618
#define ARM_SMMU_CB_S1_TLBIVAL		0x620
#define ARM_SMMU_CB_S2_TLBIIPAS2	0x630
#define ARM_SMMU_CB_S2_TLBIIPAS2L	0x638
#define ARM_SMMU_CB_TLBSYNC		0x7f0
#define ARM_SMMU_CB_TLBSTATUS		0x7f4
#define TLBSTATUS_SACTIVE		(1 << 0)
#define ARM_SMMU_CB_ATS1PR		0x800
#define ARM_SMMU_CB_ATSR		0x8f0

#define ARM_SMMU_STATS_SYNC_INV_TBU_ACK 0x25dc
#define ARM_SMMU_TBU_PWR_STATUS         0x2204
#define ARM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR 0x2670

#define SCTLR_MEM_ATTR_SHIFT		16
#define SCTLR_SHCFG_SHIFT		22
#define SCTLR_RACFG_SHIFT		24
#define SCTLR_WACFG_SHIFT		26
#define SCTLR_SHCFG_MASK		0x3
#define SCTLR_SHCFG_NSH			0x3
#define SCTLR_RACFG_RA			0x2
#define SCTLR_WACFG_WA			0x2
#define SCTLR_MEM_ATTR_OISH_WB_CACHE	0xf
#define SCTLR_MTCFG			(1 << 20)
#define SCTLR_S1_ASIDPNE		(1 << 12)
#define SCTLR_CFCFG			(1 << 7)
#define SCTLR_HUPCF			(1 << 8)
#define SCTLR_CFIE			(1 << 6)
#define SCTLR_CFRE			(1 << 5)
#define SCTLR_E				(1 << 4)
Loading