Loading Documentation/DMA-attributes.txt +10 −0 Original line number Diff line number Diff line Loading @@ -182,6 +182,11 @@ DMA_ATTR_FORCE_COHERENT When passed to a DMA map call the DMA_ATTR_FORCE_COHERENT DMA attribute can be used to force a buffer to be mapped as IO coherent. When the DMA_ATTR_FORCE_COHERENT attribute is set during a map call ensure that it is also set during for the matching unmap call to ensure that the correct cache maintenance is carried out. This DMA attribute is only currently supported for arm64 stage 1 IOMMU mappings. Loading @@ -193,5 +198,10 @@ attribute can be used to force a buffer to not be mapped as IO coherent. The DMA_ATTR_FORCE_NON_COHERENT DMA attribute overrides the buffer IO coherency configuration set by making the device IO coherent. When the DMA_ATTR_FORCE_NON_COHERENT attribute is set during a map call ensure that it is also set during for the matching unmap call to ensure that the correct cache maintenance is carried out. This DMA attribute is only currently supported for arm64 stage 1 IOMMU mappings. arch/arm64/mm/dma-mapping.c +1 −3 Original line number Diff line number Diff line Loading @@ -1800,10 +1800,8 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, mapping->domain, iova)); int offset = handle & ~PAGE_MASK; int len = PAGE_ALIGN(size + offset); bool iova_coherent = iommu_is_iova_coherent(mapping->domain, handle); if (!(iova_coherent || if (!(is_dma_coherent(dev, attrs) || (attrs & DMA_ATTR_SKIP_CPU_SYNC))) __dma_page_dev_to_cpu(page, offset, size, dir); Loading drivers/iommu/dma-mapping-fast.c +43 −6 Original line number Diff line number Diff line Loading @@ -25,6 +25,13 @@ #define FAST_PAGE_SIZE (1UL << FAST_PAGE_SHIFT) #define FAST_PAGE_MASK (~(PAGE_SIZE - 1)) #define FAST_PTE_ADDR_MASK ((av8l_fast_iopte)0xfffffffff000) #define FAST_MAIR_ATTR_IDX_CACHE 1 #define FAST_PTE_ATTRINDX_SHIFT 2 #define FAST_PTE_ATTRINDX_MASK 0x7 #define FAST_PTE_SH_SHIFT 8 #define FAST_PTE_SH_MASK (((av8l_fast_iopte)0x3) << FAST_PTE_SH_SHIFT) #define FAST_PTE_SH_OS (((av8l_fast_iopte)2) << FAST_PTE_SH_SHIFT) #define FAST_PTE_SH_IS (((av8l_fast_iopte)3) << FAST_PTE_SH_SHIFT) static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, bool coherent) Loading Loading @@ -56,6 +63,36 @@ static void fast_dmac_clean_range(struct dma_fast_smmu_mapping *mapping, dmac_clean_range(start, end); } static bool __fast_is_pte_coherent(av8l_fast_iopte *ptep) { int attr_idx = (*ptep & (FAST_PTE_ATTRINDX_MASK << FAST_PTE_ATTRINDX_SHIFT)) >> FAST_PTE_ATTRINDX_SHIFT; if ((attr_idx == FAST_MAIR_ATTR_IDX_CACHE) && (((*ptep & FAST_PTE_SH_MASK) == FAST_PTE_SH_IS) || (*ptep & FAST_PTE_SH_MASK) == FAST_PTE_SH_OS)) return true; return false; } static bool is_dma_coherent(struct device *dev, unsigned long attrs) { bool is_coherent; if (attrs & DMA_ATTR_FORCE_COHERENT) is_coherent = true; else if (attrs & DMA_ATTR_FORCE_NON_COHERENT) is_coherent = false; else if (is_device_dma_coherent(dev)) is_coherent = true; else is_coherent = false; return is_coherent; } /* * Checks if the allocated range (ending at @end) covered the upcoming * stale bit. We don't need to know exactly where the range starts since Loading Loading @@ -313,7 +350,7 @@ static dma_addr_t fast_smmu_map_page(struct device *dev, struct page *page, int nptes = len >> FAST_PAGE_SHIFT; bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC); int prot = __fast_dma_direction_to_prot(dir); bool is_coherent = is_device_dma_coherent(dev); bool is_coherent = is_dma_coherent(dev, attrs); prot = __get_iommu_pgprot(attrs, prot, is_coherent); Loading Loading @@ -357,7 +394,7 @@ static void fast_smmu_unmap_page(struct device *dev, dma_addr_t iova, int nptes = len >> FAST_PAGE_SHIFT; struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK)); bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC); bool is_coherent = is_device_dma_coherent(dev); bool is_coherent = is_dma_coherent(dev, attrs); if (!skip_sync && !is_coherent) __fast_dma_page_dev_to_cpu(page, offset, size, dir); Loading @@ -377,7 +414,7 @@ static void fast_smmu_sync_single_for_cpu(struct device *dev, unsigned long offset = iova & ~FAST_PAGE_MASK; struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK)); if (!is_device_dma_coherent(dev)) if (!__fast_is_pte_coherent(pmd)) __fast_dma_page_dev_to_cpu(page, offset, size, dir); } Loading @@ -389,7 +426,7 @@ static void fast_smmu_sync_single_for_device(struct device *dev, unsigned long offset = iova & ~FAST_PAGE_MASK; struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK)); if (!is_device_dma_coherent(dev)) if (!__fast_is_pte_coherent(pmd)) __fast_dma_page_cpu_to_dev(page, offset, size, dir); } Loading Loading @@ -469,7 +506,7 @@ static void *fast_smmu_alloc(struct device *dev, size_t size, struct sg_mapping_iter miter; unsigned int count = ALIGN(size, SZ_4K) >> PAGE_SHIFT; int prot = IOMMU_READ | IOMMU_WRITE; /* TODO: extract from attrs */ bool is_coherent = is_device_dma_coherent(dev); bool is_coherent = is_dma_coherent(dev, attrs); pgprot_t remap_prot = __get_dma_pgprot(attrs, PAGE_KERNEL, is_coherent); struct page **pages; Loading Loading @@ -591,7 +628,7 @@ static int fast_smmu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long uaddr = vma->vm_start; struct page **pages; int i, nr_pages, ret = 0; bool coherent = is_device_dma_coherent(dev); bool coherent = is_dma_coherent(dev, attrs); vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, coherent); Loading drivers/iommu/io-pgtable-arm.c +4 −2 Original line number Diff line number Diff line Loading @@ -89,6 +89,7 @@ #define ARM_LPAE_PTE_TYPE_TABLE 3 #define ARM_LPAE_PTE_TYPE_PAGE 3 #define ARM_LPAE_PTE_SH_MASK (((arm_lpae_iopte)0x3) << 8) #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) Loading Loading @@ -894,8 +895,9 @@ static bool __arm_lpae_is_iova_coherent(struct arm_lpae_io_pgtable *data, ARM_LPAE_PTE_ATTRINDX_SHIFT)) >> ARM_LPAE_PTE_ATTRINDX_SHIFT; if ((attr_idx == ARM_LPAE_MAIR_ATTR_IDX_CACHE) && ((*ptep & ARM_LPAE_PTE_SH_IS) || (*ptep & ARM_LPAE_PTE_SH_OS))) (((*ptep & ARM_LPAE_PTE_SH_MASK) == ARM_LPAE_PTE_SH_IS) || (*ptep & ARM_LPAE_PTE_SH_MASK) == ARM_LPAE_PTE_SH_OS)) return true; } else { if (*ptep & ARM_LPAE_PTE_MEMATTR_OIWB) Loading drivers/iommu/io-pgtable-fast.c +5 −0 Original line number Diff line number Diff line Loading @@ -395,11 +395,16 @@ av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, for (i = 0; i < 4; ++i) { for (j = 0; j < 512; ++j) { av8l_fast_iopte pte, *pudp; void *addr; page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) goto err_free_pages; pages[pg++] = page; addr = page_address(page); dmac_clean_range(addr, addr + SZ_4K); pte = page_to_phys(page) | AV8L_FAST_PTE_TYPE_TABLE; pudp = data->puds[i] + j; *pudp = pte; Loading Loading
Documentation/DMA-attributes.txt +10 −0 Original line number Diff line number Diff line Loading @@ -182,6 +182,11 @@ DMA_ATTR_FORCE_COHERENT When passed to a DMA map call the DMA_ATTR_FORCE_COHERENT DMA attribute can be used to force a buffer to be mapped as IO coherent. When the DMA_ATTR_FORCE_COHERENT attribute is set during a map call ensure that it is also set during for the matching unmap call to ensure that the correct cache maintenance is carried out. This DMA attribute is only currently supported for arm64 stage 1 IOMMU mappings. Loading @@ -193,5 +198,10 @@ attribute can be used to force a buffer to not be mapped as IO coherent. The DMA_ATTR_FORCE_NON_COHERENT DMA attribute overrides the buffer IO coherency configuration set by making the device IO coherent. When the DMA_ATTR_FORCE_NON_COHERENT attribute is set during a map call ensure that it is also set during for the matching unmap call to ensure that the correct cache maintenance is carried out. This DMA attribute is only currently supported for arm64 stage 1 IOMMU mappings.
arch/arm64/mm/dma-mapping.c +1 −3 Original line number Diff line number Diff line Loading @@ -1800,10 +1800,8 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, mapping->domain, iova)); int offset = handle & ~PAGE_MASK; int len = PAGE_ALIGN(size + offset); bool iova_coherent = iommu_is_iova_coherent(mapping->domain, handle); if (!(iova_coherent || if (!(is_dma_coherent(dev, attrs) || (attrs & DMA_ATTR_SKIP_CPU_SYNC))) __dma_page_dev_to_cpu(page, offset, size, dir); Loading
drivers/iommu/dma-mapping-fast.c +43 −6 Original line number Diff line number Diff line Loading @@ -25,6 +25,13 @@ #define FAST_PAGE_SIZE (1UL << FAST_PAGE_SHIFT) #define FAST_PAGE_MASK (~(PAGE_SIZE - 1)) #define FAST_PTE_ADDR_MASK ((av8l_fast_iopte)0xfffffffff000) #define FAST_MAIR_ATTR_IDX_CACHE 1 #define FAST_PTE_ATTRINDX_SHIFT 2 #define FAST_PTE_ATTRINDX_MASK 0x7 #define FAST_PTE_SH_SHIFT 8 #define FAST_PTE_SH_MASK (((av8l_fast_iopte)0x3) << FAST_PTE_SH_SHIFT) #define FAST_PTE_SH_OS (((av8l_fast_iopte)2) << FAST_PTE_SH_SHIFT) #define FAST_PTE_SH_IS (((av8l_fast_iopte)3) << FAST_PTE_SH_SHIFT) static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, bool coherent) Loading Loading @@ -56,6 +63,36 @@ static void fast_dmac_clean_range(struct dma_fast_smmu_mapping *mapping, dmac_clean_range(start, end); } static bool __fast_is_pte_coherent(av8l_fast_iopte *ptep) { int attr_idx = (*ptep & (FAST_PTE_ATTRINDX_MASK << FAST_PTE_ATTRINDX_SHIFT)) >> FAST_PTE_ATTRINDX_SHIFT; if ((attr_idx == FAST_MAIR_ATTR_IDX_CACHE) && (((*ptep & FAST_PTE_SH_MASK) == FAST_PTE_SH_IS) || (*ptep & FAST_PTE_SH_MASK) == FAST_PTE_SH_OS)) return true; return false; } static bool is_dma_coherent(struct device *dev, unsigned long attrs) { bool is_coherent; if (attrs & DMA_ATTR_FORCE_COHERENT) is_coherent = true; else if (attrs & DMA_ATTR_FORCE_NON_COHERENT) is_coherent = false; else if (is_device_dma_coherent(dev)) is_coherent = true; else is_coherent = false; return is_coherent; } /* * Checks if the allocated range (ending at @end) covered the upcoming * stale bit. We don't need to know exactly where the range starts since Loading Loading @@ -313,7 +350,7 @@ static dma_addr_t fast_smmu_map_page(struct device *dev, struct page *page, int nptes = len >> FAST_PAGE_SHIFT; bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC); int prot = __fast_dma_direction_to_prot(dir); bool is_coherent = is_device_dma_coherent(dev); bool is_coherent = is_dma_coherent(dev, attrs); prot = __get_iommu_pgprot(attrs, prot, is_coherent); Loading Loading @@ -357,7 +394,7 @@ static void fast_smmu_unmap_page(struct device *dev, dma_addr_t iova, int nptes = len >> FAST_PAGE_SHIFT; struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK)); bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC); bool is_coherent = is_device_dma_coherent(dev); bool is_coherent = is_dma_coherent(dev, attrs); if (!skip_sync && !is_coherent) __fast_dma_page_dev_to_cpu(page, offset, size, dir); Loading @@ -377,7 +414,7 @@ static void fast_smmu_sync_single_for_cpu(struct device *dev, unsigned long offset = iova & ~FAST_PAGE_MASK; struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK)); if (!is_device_dma_coherent(dev)) if (!__fast_is_pte_coherent(pmd)) __fast_dma_page_dev_to_cpu(page, offset, size, dir); } Loading @@ -389,7 +426,7 @@ static void fast_smmu_sync_single_for_device(struct device *dev, unsigned long offset = iova & ~FAST_PAGE_MASK; struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK)); if (!is_device_dma_coherent(dev)) if (!__fast_is_pte_coherent(pmd)) __fast_dma_page_cpu_to_dev(page, offset, size, dir); } Loading Loading @@ -469,7 +506,7 @@ static void *fast_smmu_alloc(struct device *dev, size_t size, struct sg_mapping_iter miter; unsigned int count = ALIGN(size, SZ_4K) >> PAGE_SHIFT; int prot = IOMMU_READ | IOMMU_WRITE; /* TODO: extract from attrs */ bool is_coherent = is_device_dma_coherent(dev); bool is_coherent = is_dma_coherent(dev, attrs); pgprot_t remap_prot = __get_dma_pgprot(attrs, PAGE_KERNEL, is_coherent); struct page **pages; Loading Loading @@ -591,7 +628,7 @@ static int fast_smmu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long uaddr = vma->vm_start; struct page **pages; int i, nr_pages, ret = 0; bool coherent = is_device_dma_coherent(dev); bool coherent = is_dma_coherent(dev, attrs); vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, coherent); Loading
drivers/iommu/io-pgtable-arm.c +4 −2 Original line number Diff line number Diff line Loading @@ -89,6 +89,7 @@ #define ARM_LPAE_PTE_TYPE_TABLE 3 #define ARM_LPAE_PTE_TYPE_PAGE 3 #define ARM_LPAE_PTE_SH_MASK (((arm_lpae_iopte)0x3) << 8) #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) Loading Loading @@ -894,8 +895,9 @@ static bool __arm_lpae_is_iova_coherent(struct arm_lpae_io_pgtable *data, ARM_LPAE_PTE_ATTRINDX_SHIFT)) >> ARM_LPAE_PTE_ATTRINDX_SHIFT; if ((attr_idx == ARM_LPAE_MAIR_ATTR_IDX_CACHE) && ((*ptep & ARM_LPAE_PTE_SH_IS) || (*ptep & ARM_LPAE_PTE_SH_OS))) (((*ptep & ARM_LPAE_PTE_SH_MASK) == ARM_LPAE_PTE_SH_IS) || (*ptep & ARM_LPAE_PTE_SH_MASK) == ARM_LPAE_PTE_SH_OS)) return true; } else { if (*ptep & ARM_LPAE_PTE_MEMATTR_OIWB) Loading
drivers/iommu/io-pgtable-fast.c +5 −0 Original line number Diff line number Diff line Loading @@ -395,11 +395,16 @@ av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, for (i = 0; i < 4; ++i) { for (j = 0; j < 512; ++j) { av8l_fast_iopte pte, *pudp; void *addr; page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) goto err_free_pages; pages[pg++] = page; addr = page_address(page); dmac_clean_range(addr, addr + SZ_4K); pte = page_to_phys(page) | AV8L_FAST_PTE_TYPE_TABLE; pudp = data->puds[i] + j; *pudp = pte; Loading