Loading arch/arm64/Kconfig +21 −0 Original line number Diff line number Diff line Loading @@ -907,6 +907,27 @@ config ARCH_WANT_HUGE_PMD_SHARE config ARCH_HAS_CACHE_LINE_SIZE def_bool y if ARM64 && IOMMU_DMA config ARM64_DMA_IOMMU_ALIGNMENT int "Maximum PAGE_SIZE order of alignment for DMA IOMMU buffers" range 4 9 default 9 help DMA mapping framework by default aligns all buffers to the smallest PAGE_SIZE order which is greater than or equal to the requested buffer size. This works well for buffers up to a few hundreds kilobytes, but for larger buffers it just a waste of address space. Drivers which has relatively small addressing window (like 64Mib) might run out of virtual space with just a few allocations. With this parameter you can specify the maximum PAGE_SIZE order for DMA IOMMU buffers. Larger buffers will be aligned only to this specified order. The order is expressed as a power of two multiplied by the PAGE_SIZE. endif config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y if PGTABLE_LEVELS > 2 Loading arch/arm64/mm/dma-mapping.c +98 −47 Original line number Diff line number Diff line Loading @@ -16,16 +16,32 @@ #include <linux/dma-direct.h> #include <linux/dma-noncoherent.h> #include <linux/dma-contiguous.h> #include <linux/iommu.h> #include <linux/vmalloc.h> #include <linux/swiotlb.h> #include <linux/pci.h> #include <asm/cacheflush.h> #include <linux/of_address.h> #include <linux/dma-mapping-fast.h> static bool is_dma_coherent(struct device *dev, unsigned long attrs) { if (attrs & DMA_ATTR_FORCE_COHERENT) return true; else if (attrs & DMA_ATTR_FORCE_NON_COHERENT) return false; else if (dev_is_dma_coherent(dev)) return true; else return false; } pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE)) if (!is_dma_coherent(dev, attrs) || (attrs & DMA_ATTR_WRITE_COMBINE)) return pgprot_writecombine(prot); return prot; } Loading Loading @@ -103,7 +119,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { bool coherent = dev_is_dma_coherent(dev); bool coherent = is_dma_coherent(dev, attrs); int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); size_t iosize = size; void *addr; Loading @@ -117,6 +133,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, * Some drivers rely on this, and we probably don't want the * possibility of stale kernel data being read by devices anyway. */ if (!(attrs & DMA_ATTR_SKIP_ZEROING)) gfp |= __GFP_ZERO; if (!gfpflags_allow_blocking(gfp)) { Loading Loading @@ -232,31 +249,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, { struct vm_struct *area; int ret; unsigned long pfn = 0; vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (!is_vmalloc_addr(cpu_addr)) { unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr)); return __swiotlb_mmap_pfn(vma, pfn, size); } area = find_vm_area(cpu_addr); if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { if (area && area->pages) return iommu_dma_mmap(area->pages, size, vma); else if (!is_vmalloc_addr(cpu_addr)) pfn = page_to_pfn(virt_to_page(cpu_addr)); else if (is_vmalloc_addr(cpu_addr)) /* * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, * hence in the vmalloc space. * DMA_ATTR_FORCE_CONTIGUOUS and atomic pool allocations are * always remapped, hence in the vmalloc space. */ unsigned long pfn = vmalloc_to_pfn(cpu_addr); pfn = vmalloc_to_pfn(cpu_addr); if (pfn) return __swiotlb_mmap_pfn(vma, pfn, size); } area = find_vm_area(cpu_addr); if (WARN_ON(!area || !area->pages)) return -ENXIO; return iommu_dma_mmap(area->pages, size, vma); } static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, Loading @@ -264,27 +280,24 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, size_t size, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page = NULL; struct vm_struct *area = find_vm_area(cpu_addr); if (!is_vmalloc_addr(cpu_addr)) { struct page *page = virt_to_page(cpu_addr); return __swiotlb_get_sgtable_page(sgt, page, size); } if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { if (area && area->pages) return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, GFP_KERNEL); else if (!is_vmalloc_addr(cpu_addr)) page = virt_to_page(cpu_addr); else if (is_vmalloc_addr(cpu_addr)) /* * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, * hence in the vmalloc space. * DMA_ATTR_FORCE_CONTIGUOUS and atomic pool allocations * are always remapped, hence in the vmalloc space. */ struct page *page = vmalloc_to_page(cpu_addr); return __swiotlb_get_sgtable_page(sgt, page, size); } page = vmalloc_to_page(cpu_addr); if (WARN_ON(!area || !area->pages)) if (page) return __swiotlb_get_sgtable_page(sgt, page, size); return -ENXIO; return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, GFP_KERNEL); } static void __iommu_sync_single_for_cpu(struct device *dev, Loading @@ -292,11 +305,12 @@ static void __iommu_sync_single_for_cpu(struct device *dev, enum dma_data_direction dir) { phys_addr_t phys; struct iommu_domain *domain = iommu_get_domain_for_dev(dev); if (dev_is_dma_coherent(dev)) if (!domain || iommu_is_iova_coherent(domain, dev_addr)) return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); phys = iommu_iova_to_phys(domain, dev_addr); arch_sync_dma_for_cpu(dev, phys, size, dir); } Loading @@ -305,11 +319,12 @@ static void __iommu_sync_single_for_device(struct device *dev, enum dma_data_direction dir) { phys_addr_t phys; struct iommu_domain *domain = iommu_get_domain_for_dev(dev); if (dev_is_dma_coherent(dev)) if (!domain || iommu_is_iova_coherent(domain, dev_addr)) return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); phys = iommu_iova_to_phys(domain, dev_addr); arch_sync_dma_for_device(dev, phys, size, dir); } Loading @@ -318,7 +333,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { bool coherent = dev_is_dma_coherent(dev); bool coherent = is_dma_coherent(dev, attrs); int prot = dma_info_to_prot(dir, coherent, attrs); dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); Loading @@ -344,9 +359,11 @@ static void __iommu_sync_sg_for_cpu(struct device *dev, enum dma_data_direction dir) { struct scatterlist *sg; dma_addr_t iova = sg_dma_address(sgl); struct iommu_domain *domain = iommu_get_domain_for_dev(dev); int i; if (dev_is_dma_coherent(dev)) if (!domain || iommu_is_iova_coherent(domain, iova)) return; for_each_sg(sgl, sg, nelems, i) Loading @@ -358,9 +375,11 @@ static void __iommu_sync_sg_for_device(struct device *dev, enum dma_data_direction dir) { struct scatterlist *sg; dma_addr_t iova = sg_dma_address(sgl); struct iommu_domain *domain = iommu_get_domain_for_dev(dev); int i; if (dev_is_dma_coherent(dev)) if (!domain || iommu_is_iova_coherent(domain, iova)) return; for_each_sg(sgl, sg, nelems, i) Loading @@ -371,13 +390,18 @@ static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, unsigned long attrs) { bool coherent = dev_is_dma_coherent(dev); bool coherent = is_dma_coherent(dev, attrs); int ret; ret = iommu_dma_map_sg(dev, sgl, nelems, dma_info_to_prot(dir, coherent, attrs)); if (!ret) return ret; if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_sg_for_device(dev, sgl, nelems, dir); return iommu_dma_map_sg(dev, sgl, nelems, dma_info_to_prot(dir, coherent, attrs)); return ret; } static void __iommu_unmap_sg_attrs(struct device *dev, Loading Loading @@ -414,10 +438,30 @@ static int __init __iommu_dma_init(void) } arch_initcall(__iommu_dma_init); static int __iommu_init_dma_resources(struct device *dev, struct iommu_domain *domain, u64 dma_base, u64 size) { int is_fast, ret = 0; iommu_domain_get_attr(domain, DOMAIN_ATTR_FAST, &is_fast); if (is_fast) { dev->dma_ops = fast_smmu_get_dma_ops(); } else { ret = iommu_dma_init_domain(domain, dma_base, size, dev); if (!ret) dev->dma_ops = &iommu_dma_ops; } return ret; } static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *ops) { struct iommu_domain *domain; int s1_bypass; if (!ops) return; Loading @@ -431,13 +475,20 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, if (!domain) goto out_err; if (domain->type == IOMMU_DOMAIN_DMA) { if (iommu_dma_init_domain(domain, dma_base, size, dev)) goto out_err; iommu_domain_get_attr(domain, DOMAIN_ATTR_S1_BYPASS, &s1_bypass); if (s1_bypass) return; dev->dma_ops = &iommu_dma_ops; /* Allow iommu-debug to call arch_setup_dma_ops to reconfigure itself */ if (domain->type != IOMMU_DOMAIN_DMA && !of_device_is_compatible(dev->of_node, "iommu-debug-test")) { dev_err(dev, "Invalid iommu domain type!\n"); return; } if (__iommu_init_dma_resources(dev, domain, dma_base, size)) goto out_err; return; out_err: Loading drivers/iommu/Kconfig +116 −0 Original line number Diff line number Diff line Loading @@ -63,6 +63,58 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST If unsure, say N here. config IOMMU_IO_PGTABLE_FAST bool "Fast ARMv7/v8 Long Descriptor Format" depends on (ARM || ARM64) && IOMMU_DMA help Enable support for a subset of the ARM long descriptor pagetable format. This allocator achieves fast performance by pre-allocating and pre-populating page table memory up front. only supports a 32 bit virtual address space. This implementation is mainly optimized for use cases where the buffers are small (<= 64K) since it only supports 4K page sizes. config IOMMU_IO_PGTABLE_FAST_SELFTEST bool "Fast IO pgtable selftests" depends on IOMMU_IO_PGTABLE_FAST help Enable self-tests for "fast" page table allocator. This performs a series of page-table consistency checks during boot. If unsure, say N here. config IOMMU_IO_PGTABLE_FAST_PROVE_TLB bool "Prove correctness of TLB maintenance in the Fast DMA mapper" depends on IOMMU_IO_PGTABLE_FAST help Enables some debug features that help prove correctness of TLB maintenance routines in the Fast DMA mapper. This option will slow things down considerably, so should only be used in a debug configuration. This relies on the ability to set bits in an invalid page table entry, which is disallowed on some hardware due to errata. If you're running on such a platform then this option can only be used with unit tests. It will break real use cases. If unsure, say N here. config QCOM_IOMMU_IO_PGTABLE_QUIRKS bool "IO Pagetable quirks for performance" depends on ARM || ARM64 depends on IOMMU_IO_PGTABLE_FAST || IOMMU_IO_PGTABLE_LPAE depends on ARM_SMMU help Enables some quirks that are used when creating the IOMMU's page tables for a particular domain for faster translations. The quirks that are supported deal with allowing for page tables to be IO-coherent, allowing for page tables to be saved in the system cache, and disabling the write-allocate hint when saving page tables in the system cache. If unsure, say Y here. endmenu config IOMMU_DEBUGFS Loading Loading @@ -399,6 +451,38 @@ config ARM_SMMU_V3 Say Y here if your system includes an IOMMU device implementing the ARM SMMUv3 architecture. config ARM_SMMU_SELFTEST bool "ARM SMMU self test support" depends on ARM_SMMU help Enables self tests for arm smmu. Tests basic hardware configurations like interrupts. Note that enabling this option can marginally increase the boot time. If unsure, say N here. config IOMMU_TLBSYNC_DEBUG bool "TLB sync timeout debug" depends on ARM_SMMU help Enables to collect the SMMU system state information right after the first TLB sync timeout failure by calling BUG(). Note to use this only on debug builds. If unsure, say N here. config QCOM_LAZY_MAPPING tristate "Reference counted iommu-mapping support" depends on ION depends on IOMMU_API help ION buffers may be shared between several software clients. Reference counting the mapping may simplify coordination between these clients, and decrease latency by preventing multiple map/unmaps of the same region. If unsure, say N here. config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI Loading Loading @@ -454,6 +538,38 @@ config MTK_IOMMU_V1 if unsure, say N here. menuconfig IOMMU_DEBUG bool "IOMMU Profiling and Debugging" help This option is used to enable profiling and debugging in the IOMMU framework code. IOMMU profiling and debugging can be done through the debugfs nodes which this option makes available. if IOMMU_DEBUG config IOMMU_DEBUG_TRACKING bool "Track key IOMMU events" select IOMMU_API help Enables additional debug tracking in the IOMMU framework code. Tracking information and tests can be accessed through various debugfs files. Say Y here if you need to debug IOMMU issues and are okay with the performance penalty of the tracking. config IOMMU_TESTS bool "Interactive IOMMU performance/functional tests" select IOMMU_API help Enables a suite of IOMMU unit tests. The tests are runnable through debugfs. Unlike the IOMMU_DEBUG_TRACKING option, the impact of enabling this option to overal system performance should be minimal. endif # IOMMU_DEBUG config QCOM_IOMMU # Note: iommu drivers cannot (yet?) be built as modules bool "Qualcomm IOMMU Support" Loading drivers/iommu/Makefile +3 −0 Original line number Diff line number Diff line Loading @@ -4,11 +4,14 @@ obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o obj-$(CONFIG_QCOM_LAZY_MAPPING) += msm_dma_iommu_mapping.o obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_IOMMU_IO_PGTABLE_FAST) += io-pgtable-fast.o dma-mapping-fast.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_IOMMU_DEBUG) += iommu-debug.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o Loading drivers/iommu/arm-smmu-regs.h +28 −1 Original line number Diff line number Diff line Loading @@ -25,6 +25,9 @@ #define sCR0_VMID16EN (1 << 31) #define sCR0_BSU_SHIFT 14 #define sCR0_BSU_MASK 0x3 #define sCR0_SHCFG_SHIFT 22 #define sCR0_SHCFG_MASK 0x3 #define sCR0_SHCFG_NSH 3 /* Auxiliary Configuration register */ #define ARM_SMMU_GR0_sACR 0x10 Loading Loading @@ -93,6 +96,8 @@ #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) #define SMR_VALID (1 << 31) #define SMR_MASK_SHIFT 16 #define SMR_MASK_MASK 0x7FFF #define SID_MASK 0x7FFF #define SMR_ID_SHIFT 0 #define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2)) Loading @@ -101,6 +106,9 @@ #define S2CR_EXIDVALID (1 << 10) #define S2CR_TYPE_SHIFT 16 #define S2CR_TYPE_MASK 0x3 #define S2CR_SHCFG_SHIFT 8 #define S2CR_SHCFG_MASK 0x3 #define S2CR_SHCFG_NSH 0x3 enum arm_smmu_s2cr_type { S2CR_TYPE_TRANS, S2CR_TYPE_BYPASS, Loading Loading @@ -136,6 +144,7 @@ enum arm_smmu_s2cr_privcfg { #define CBAR_IRPTNDX_MASK 0xff #define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2)) #define CBFRSYNRA_SID_MASK (0xffff) #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) #define CBA2R_RW64_32BIT (0 << 0) Loading @@ -155,20 +164,38 @@ enum arm_smmu_s2cr_privcfg { #define ARM_SMMU_CB_S1_MAIR1 0x3c #define ARM_SMMU_CB_PAR 0x50 #define ARM_SMMU_CB_FSR 0x58 #define ARM_SMMU_CB_FSRRESTORE 0x5c #define ARM_SMMU_CB_FAR 0x60 #define ARM_SMMU_CB_FSYNR0 0x68 #define ARM_SMMU_CB_FSYNR1 0x6c #define ARM_SMMU_CB_S1_TLBIVA 0x600 #define ARM_SMMU_CB_S1_TLBIASID 0x610 #define ARM_SMMU_CB_S1_TLBIALL 0x618 #define ARM_SMMU_CB_S1_TLBIVAL 0x620 #define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 #define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 #define ARM_SMMU_CB_TLBSYNC 0x7f0 #define ARM_SMMU_CB_TLBSTATUS 0x7f4 #define TLBSTATUS_SACTIVE (1 << 0) #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 #define ARM_SMMU_STATS_SYNC_INV_TBU_ACK 0x25dc #define ARM_SMMU_TBU_PWR_STATUS 0x2204 #define ARM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR 0x2670 #define SCTLR_MEM_ATTR_SHIFT 16 #define SCTLR_SHCFG_SHIFT 22 #define SCTLR_RACFG_SHIFT 24 #define SCTLR_WACFG_SHIFT 26 #define SCTLR_SHCFG_MASK 0x3 #define SCTLR_SHCFG_NSH 0x3 #define SCTLR_RACFG_RA 0x2 #define SCTLR_WACFG_WA 0x2 #define SCTLR_MEM_ATTR_OISH_WB_CACHE 0xf #define SCTLR_MTCFG (1 << 20) #define SCTLR_S1_ASIDPNE (1 << 12) #define SCTLR_CFCFG (1 << 7) #define SCTLR_HUPCF (1 << 8) #define SCTLR_CFIE (1 << 6) #define SCTLR_CFRE (1 << 5) #define SCTLR_E (1 << 4) Loading Loading
arch/arm64/Kconfig +21 −0 Original line number Diff line number Diff line Loading @@ -907,6 +907,27 @@ config ARCH_WANT_HUGE_PMD_SHARE config ARCH_HAS_CACHE_LINE_SIZE def_bool y if ARM64 && IOMMU_DMA config ARM64_DMA_IOMMU_ALIGNMENT int "Maximum PAGE_SIZE order of alignment for DMA IOMMU buffers" range 4 9 default 9 help DMA mapping framework by default aligns all buffers to the smallest PAGE_SIZE order which is greater than or equal to the requested buffer size. This works well for buffers up to a few hundreds kilobytes, but for larger buffers it just a waste of address space. Drivers which has relatively small addressing window (like 64Mib) might run out of virtual space with just a few allocations. With this parameter you can specify the maximum PAGE_SIZE order for DMA IOMMU buffers. Larger buffers will be aligned only to this specified order. The order is expressed as a power of two multiplied by the PAGE_SIZE. endif config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y if PGTABLE_LEVELS > 2 Loading
arch/arm64/mm/dma-mapping.c +98 −47 Original line number Diff line number Diff line Loading @@ -16,16 +16,32 @@ #include <linux/dma-direct.h> #include <linux/dma-noncoherent.h> #include <linux/dma-contiguous.h> #include <linux/iommu.h> #include <linux/vmalloc.h> #include <linux/swiotlb.h> #include <linux/pci.h> #include <asm/cacheflush.h> #include <linux/of_address.h> #include <linux/dma-mapping-fast.h> static bool is_dma_coherent(struct device *dev, unsigned long attrs) { if (attrs & DMA_ATTR_FORCE_COHERENT) return true; else if (attrs & DMA_ATTR_FORCE_NON_COHERENT) return false; else if (dev_is_dma_coherent(dev)) return true; else return false; } pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE)) if (!is_dma_coherent(dev, attrs) || (attrs & DMA_ATTR_WRITE_COMBINE)) return pgprot_writecombine(prot); return prot; } Loading Loading @@ -103,7 +119,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { bool coherent = dev_is_dma_coherent(dev); bool coherent = is_dma_coherent(dev, attrs); int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); size_t iosize = size; void *addr; Loading @@ -117,6 +133,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, * Some drivers rely on this, and we probably don't want the * possibility of stale kernel data being read by devices anyway. */ if (!(attrs & DMA_ATTR_SKIP_ZEROING)) gfp |= __GFP_ZERO; if (!gfpflags_allow_blocking(gfp)) { Loading Loading @@ -232,31 +249,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, { struct vm_struct *area; int ret; unsigned long pfn = 0; vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (!is_vmalloc_addr(cpu_addr)) { unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr)); return __swiotlb_mmap_pfn(vma, pfn, size); } area = find_vm_area(cpu_addr); if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { if (area && area->pages) return iommu_dma_mmap(area->pages, size, vma); else if (!is_vmalloc_addr(cpu_addr)) pfn = page_to_pfn(virt_to_page(cpu_addr)); else if (is_vmalloc_addr(cpu_addr)) /* * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, * hence in the vmalloc space. * DMA_ATTR_FORCE_CONTIGUOUS and atomic pool allocations are * always remapped, hence in the vmalloc space. */ unsigned long pfn = vmalloc_to_pfn(cpu_addr); pfn = vmalloc_to_pfn(cpu_addr); if (pfn) return __swiotlb_mmap_pfn(vma, pfn, size); } area = find_vm_area(cpu_addr); if (WARN_ON(!area || !area->pages)) return -ENXIO; return iommu_dma_mmap(area->pages, size, vma); } static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, Loading @@ -264,27 +280,24 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, size_t size, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page = NULL; struct vm_struct *area = find_vm_area(cpu_addr); if (!is_vmalloc_addr(cpu_addr)) { struct page *page = virt_to_page(cpu_addr); return __swiotlb_get_sgtable_page(sgt, page, size); } if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { if (area && area->pages) return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, GFP_KERNEL); else if (!is_vmalloc_addr(cpu_addr)) page = virt_to_page(cpu_addr); else if (is_vmalloc_addr(cpu_addr)) /* * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, * hence in the vmalloc space. * DMA_ATTR_FORCE_CONTIGUOUS and atomic pool allocations * are always remapped, hence in the vmalloc space. */ struct page *page = vmalloc_to_page(cpu_addr); return __swiotlb_get_sgtable_page(sgt, page, size); } page = vmalloc_to_page(cpu_addr); if (WARN_ON(!area || !area->pages)) if (page) return __swiotlb_get_sgtable_page(sgt, page, size); return -ENXIO; return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, GFP_KERNEL); } static void __iommu_sync_single_for_cpu(struct device *dev, Loading @@ -292,11 +305,12 @@ static void __iommu_sync_single_for_cpu(struct device *dev, enum dma_data_direction dir) { phys_addr_t phys; struct iommu_domain *domain = iommu_get_domain_for_dev(dev); if (dev_is_dma_coherent(dev)) if (!domain || iommu_is_iova_coherent(domain, dev_addr)) return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); phys = iommu_iova_to_phys(domain, dev_addr); arch_sync_dma_for_cpu(dev, phys, size, dir); } Loading @@ -305,11 +319,12 @@ static void __iommu_sync_single_for_device(struct device *dev, enum dma_data_direction dir) { phys_addr_t phys; struct iommu_domain *domain = iommu_get_domain_for_dev(dev); if (dev_is_dma_coherent(dev)) if (!domain || iommu_is_iova_coherent(domain, dev_addr)) return; phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); phys = iommu_iova_to_phys(domain, dev_addr); arch_sync_dma_for_device(dev, phys, size, dir); } Loading @@ -318,7 +333,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { bool coherent = dev_is_dma_coherent(dev); bool coherent = is_dma_coherent(dev, attrs); int prot = dma_info_to_prot(dir, coherent, attrs); dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); Loading @@ -344,9 +359,11 @@ static void __iommu_sync_sg_for_cpu(struct device *dev, enum dma_data_direction dir) { struct scatterlist *sg; dma_addr_t iova = sg_dma_address(sgl); struct iommu_domain *domain = iommu_get_domain_for_dev(dev); int i; if (dev_is_dma_coherent(dev)) if (!domain || iommu_is_iova_coherent(domain, iova)) return; for_each_sg(sgl, sg, nelems, i) Loading @@ -358,9 +375,11 @@ static void __iommu_sync_sg_for_device(struct device *dev, enum dma_data_direction dir) { struct scatterlist *sg; dma_addr_t iova = sg_dma_address(sgl); struct iommu_domain *domain = iommu_get_domain_for_dev(dev); int i; if (dev_is_dma_coherent(dev)) if (!domain || iommu_is_iova_coherent(domain, iova)) return; for_each_sg(sgl, sg, nelems, i) Loading @@ -371,13 +390,18 @@ static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, unsigned long attrs) { bool coherent = dev_is_dma_coherent(dev); bool coherent = is_dma_coherent(dev, attrs); int ret; ret = iommu_dma_map_sg(dev, sgl, nelems, dma_info_to_prot(dir, coherent, attrs)); if (!ret) return ret; if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_sg_for_device(dev, sgl, nelems, dir); return iommu_dma_map_sg(dev, sgl, nelems, dma_info_to_prot(dir, coherent, attrs)); return ret; } static void __iommu_unmap_sg_attrs(struct device *dev, Loading Loading @@ -414,10 +438,30 @@ static int __init __iommu_dma_init(void) } arch_initcall(__iommu_dma_init); static int __iommu_init_dma_resources(struct device *dev, struct iommu_domain *domain, u64 dma_base, u64 size) { int is_fast, ret = 0; iommu_domain_get_attr(domain, DOMAIN_ATTR_FAST, &is_fast); if (is_fast) { dev->dma_ops = fast_smmu_get_dma_ops(); } else { ret = iommu_dma_init_domain(domain, dma_base, size, dev); if (!ret) dev->dma_ops = &iommu_dma_ops; } return ret; } static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *ops) { struct iommu_domain *domain; int s1_bypass; if (!ops) return; Loading @@ -431,13 +475,20 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, if (!domain) goto out_err; if (domain->type == IOMMU_DOMAIN_DMA) { if (iommu_dma_init_domain(domain, dma_base, size, dev)) goto out_err; iommu_domain_get_attr(domain, DOMAIN_ATTR_S1_BYPASS, &s1_bypass); if (s1_bypass) return; dev->dma_ops = &iommu_dma_ops; /* Allow iommu-debug to call arch_setup_dma_ops to reconfigure itself */ if (domain->type != IOMMU_DOMAIN_DMA && !of_device_is_compatible(dev->of_node, "iommu-debug-test")) { dev_err(dev, "Invalid iommu domain type!\n"); return; } if (__iommu_init_dma_resources(dev, domain, dma_base, size)) goto out_err; return; out_err: Loading
drivers/iommu/Kconfig +116 −0 Original line number Diff line number Diff line Loading @@ -63,6 +63,58 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST If unsure, say N here. config IOMMU_IO_PGTABLE_FAST bool "Fast ARMv7/v8 Long Descriptor Format" depends on (ARM || ARM64) && IOMMU_DMA help Enable support for a subset of the ARM long descriptor pagetable format. This allocator achieves fast performance by pre-allocating and pre-populating page table memory up front. only supports a 32 bit virtual address space. This implementation is mainly optimized for use cases where the buffers are small (<= 64K) since it only supports 4K page sizes. config IOMMU_IO_PGTABLE_FAST_SELFTEST bool "Fast IO pgtable selftests" depends on IOMMU_IO_PGTABLE_FAST help Enable self-tests for "fast" page table allocator. This performs a series of page-table consistency checks during boot. If unsure, say N here. config IOMMU_IO_PGTABLE_FAST_PROVE_TLB bool "Prove correctness of TLB maintenance in the Fast DMA mapper" depends on IOMMU_IO_PGTABLE_FAST help Enables some debug features that help prove correctness of TLB maintenance routines in the Fast DMA mapper. This option will slow things down considerably, so should only be used in a debug configuration. This relies on the ability to set bits in an invalid page table entry, which is disallowed on some hardware due to errata. If you're running on such a platform then this option can only be used with unit tests. It will break real use cases. If unsure, say N here. config QCOM_IOMMU_IO_PGTABLE_QUIRKS bool "IO Pagetable quirks for performance" depends on ARM || ARM64 depends on IOMMU_IO_PGTABLE_FAST || IOMMU_IO_PGTABLE_LPAE depends on ARM_SMMU help Enables some quirks that are used when creating the IOMMU's page tables for a particular domain for faster translations. The quirks that are supported deal with allowing for page tables to be IO-coherent, allowing for page tables to be saved in the system cache, and disabling the write-allocate hint when saving page tables in the system cache. If unsure, say Y here. endmenu config IOMMU_DEBUGFS Loading Loading @@ -399,6 +451,38 @@ config ARM_SMMU_V3 Say Y here if your system includes an IOMMU device implementing the ARM SMMUv3 architecture. config ARM_SMMU_SELFTEST bool "ARM SMMU self test support" depends on ARM_SMMU help Enables self tests for arm smmu. Tests basic hardware configurations like interrupts. Note that enabling this option can marginally increase the boot time. If unsure, say N here. config IOMMU_TLBSYNC_DEBUG bool "TLB sync timeout debug" depends on ARM_SMMU help Enables to collect the SMMU system state information right after the first TLB sync timeout failure by calling BUG(). Note to use this only on debug builds. If unsure, say N here. config QCOM_LAZY_MAPPING tristate "Reference counted iommu-mapping support" depends on ION depends on IOMMU_API help ION buffers may be shared between several software clients. Reference counting the mapping may simplify coordination between these clients, and decrease latency by preventing multiple map/unmaps of the same region. If unsure, say N here. config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI Loading Loading @@ -454,6 +538,38 @@ config MTK_IOMMU_V1 if unsure, say N here. menuconfig IOMMU_DEBUG bool "IOMMU Profiling and Debugging" help This option is used to enable profiling and debugging in the IOMMU framework code. IOMMU profiling and debugging can be done through the debugfs nodes which this option makes available. if IOMMU_DEBUG config IOMMU_DEBUG_TRACKING bool "Track key IOMMU events" select IOMMU_API help Enables additional debug tracking in the IOMMU framework code. Tracking information and tests can be accessed through various debugfs files. Say Y here if you need to debug IOMMU issues and are okay with the performance penalty of the tracking. config IOMMU_TESTS bool "Interactive IOMMU performance/functional tests" select IOMMU_API help Enables a suite of IOMMU unit tests. The tests are runnable through debugfs. Unlike the IOMMU_DEBUG_TRACKING option, the impact of enabling this option to overal system performance should be minimal. endif # IOMMU_DEBUG config QCOM_IOMMU # Note: iommu drivers cannot (yet?) be built as modules bool "Qualcomm IOMMU Support" Loading
drivers/iommu/Makefile +3 −0 Original line number Diff line number Diff line Loading @@ -4,11 +4,14 @@ obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o obj-$(CONFIG_QCOM_LAZY_MAPPING) += msm_dma_iommu_mapping.o obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_IOMMU_IO_PGTABLE_FAST) += io-pgtable-fast.o dma-mapping-fast.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_IOMMU_DEBUG) += iommu-debug.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o Loading
drivers/iommu/arm-smmu-regs.h +28 −1 Original line number Diff line number Diff line Loading @@ -25,6 +25,9 @@ #define sCR0_VMID16EN (1 << 31) #define sCR0_BSU_SHIFT 14 #define sCR0_BSU_MASK 0x3 #define sCR0_SHCFG_SHIFT 22 #define sCR0_SHCFG_MASK 0x3 #define sCR0_SHCFG_NSH 3 /* Auxiliary Configuration register */ #define ARM_SMMU_GR0_sACR 0x10 Loading Loading @@ -93,6 +96,8 @@ #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) #define SMR_VALID (1 << 31) #define SMR_MASK_SHIFT 16 #define SMR_MASK_MASK 0x7FFF #define SID_MASK 0x7FFF #define SMR_ID_SHIFT 0 #define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2)) Loading @@ -101,6 +106,9 @@ #define S2CR_EXIDVALID (1 << 10) #define S2CR_TYPE_SHIFT 16 #define S2CR_TYPE_MASK 0x3 #define S2CR_SHCFG_SHIFT 8 #define S2CR_SHCFG_MASK 0x3 #define S2CR_SHCFG_NSH 0x3 enum arm_smmu_s2cr_type { S2CR_TYPE_TRANS, S2CR_TYPE_BYPASS, Loading Loading @@ -136,6 +144,7 @@ enum arm_smmu_s2cr_privcfg { #define CBAR_IRPTNDX_MASK 0xff #define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2)) #define CBFRSYNRA_SID_MASK (0xffff) #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) #define CBA2R_RW64_32BIT (0 << 0) Loading @@ -155,20 +164,38 @@ enum arm_smmu_s2cr_privcfg { #define ARM_SMMU_CB_S1_MAIR1 0x3c #define ARM_SMMU_CB_PAR 0x50 #define ARM_SMMU_CB_FSR 0x58 #define ARM_SMMU_CB_FSRRESTORE 0x5c #define ARM_SMMU_CB_FAR 0x60 #define ARM_SMMU_CB_FSYNR0 0x68 #define ARM_SMMU_CB_FSYNR1 0x6c #define ARM_SMMU_CB_S1_TLBIVA 0x600 #define ARM_SMMU_CB_S1_TLBIASID 0x610 #define ARM_SMMU_CB_S1_TLBIALL 0x618 #define ARM_SMMU_CB_S1_TLBIVAL 0x620 #define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 #define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 #define ARM_SMMU_CB_TLBSYNC 0x7f0 #define ARM_SMMU_CB_TLBSTATUS 0x7f4 #define TLBSTATUS_SACTIVE (1 << 0) #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 #define ARM_SMMU_STATS_SYNC_INV_TBU_ACK 0x25dc #define ARM_SMMU_TBU_PWR_STATUS 0x2204 #define ARM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR 0x2670 #define SCTLR_MEM_ATTR_SHIFT 16 #define SCTLR_SHCFG_SHIFT 22 #define SCTLR_RACFG_SHIFT 24 #define SCTLR_WACFG_SHIFT 26 #define SCTLR_SHCFG_MASK 0x3 #define SCTLR_SHCFG_NSH 0x3 #define SCTLR_RACFG_RA 0x2 #define SCTLR_WACFG_WA 0x2 #define SCTLR_MEM_ATTR_OISH_WB_CACHE 0xf #define SCTLR_MTCFG (1 << 20) #define SCTLR_S1_ASIDPNE (1 << 12) #define SCTLR_CFCFG (1 << 7) #define SCTLR_HUPCF (1 << 8) #define SCTLR_CFIE (1 << 6) #define SCTLR_CFRE (1 << 5) #define SCTLR_E (1 << 4) Loading