Loading drivers/iommu/arm-smmu.c +152 −44 Original line number Original line Diff line number Diff line Loading @@ -1740,71 +1740,169 @@ static void arm_smmu_free_asid(struct iommu_domain *domain) mutex_unlock(&smmu->idr_mutex); mutex_unlock(&smmu->idr_mutex); } } /* static int get_range_prop(struct device *dev, const char *prop, * Checks for "qcom,iommu-dma-addr-pool" property to specify the IOVA range dma_addr_t *ret_base, dma_addr_t *ret_end) * for the domain. If not present, and the domain doesn't use fastmap, * the domain geometry is unmodified. */ static int arm_smmu_adjust_domain_geometry(struct device *dev, struct iommu_domain *domain) { { struct device_node *np; struct device_node *np; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); int naddr, nsize, len; int naddr, nsize, len; u64 dma_base, dma_size, dma_end; u64 base, end, size; const __be32 *ranges; const __be32 *ranges; dma_addr_t hw_base = domain->geometry.aperture_start; dma_addr_t hw_end = domain->geometry.aperture_end; bool is_fast = test_bit(DOMAIN_ATTR_FAST, smmu_domain->attributes); if (!dev->of_node) if (!dev->of_node) return 0; return -ENOENT; np = of_parse_phandle(dev->of_node, "qcom,iommu-group", 0); np = of_parse_phandle(dev->of_node, "qcom,iommu-group", 0); if (!np) if (!np) np = dev->of_node; np = dev->of_node; ranges = of_get_property(np, "qcom,iommu-dma-addr-pool", &len); ranges = of_get_property(np, prop, &len); if (!ranges && !is_fast) if (!ranges) return 0; return -ENOENT; if (ranges) { len /= sizeof(u32); len /= sizeof(u32); naddr = of_n_addr_cells(np); naddr = of_n_addr_cells(np); nsize = of_n_size_cells(np); nsize = of_n_size_cells(np); if (len < naddr + nsize) { if (len < naddr + nsize) { dev_err(dev, "Invalid length for qcom,iommu-dma-addr-pool, expected %d cells\n", dev_err(dev, "Invalid length for %s, expected %d cells\n", naddr + nsize); prop, naddr + nsize); return -EINVAL; return -EINVAL; } } if (naddr == 0 || nsize == 0) { if (naddr == 0 || nsize == 0) { dev_err(dev, "Invalid #address-cells %d or #size-cells %d\n", dev_err(dev, "Invalid #address-cells %d or #size-cells %d for %s\n", naddr, nsize); prop, naddr, nsize); return -EINVAL; return -EINVAL; } } dma_base = of_read_number(ranges, naddr); base = of_read_number(ranges, naddr); dma_size = of_read_number(ranges + naddr, nsize); size = of_read_number(ranges + naddr, nsize); dma_end = dma_base + dma_size - 1; end = base + size - 1; } else { *ret_base = base; *ret_end = end; return 0; } static int arm_smmu_get_domain_dma_range(struct device *dev, struct iommu_domain *domain, dma_addr_t hw_base, dma_addr_t hw_end, dma_addr_t *ret_base, dma_addr_t *ret_end) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); dma_addr_t dma_base, dma_end; bool is_fast = test_bit(DOMAIN_ATTR_FAST, smmu_domain->attributes); int ret; ret = get_range_prop(dev, "qcom,iommu-dma-addr-pool", &dma_base, &dma_end); if (ret == -ENOENT) { if (is_fast) { /* /* * This domain uses fastmap, but doesn't have any domain * This domain uses fastmap, but doesn't have any domain * geometry limitations, as implied by the absence of the * geometry limitations, as implied by the absence of * qcom,iommu-dma-addr-pool property, so impose the default * the qcom,iommu-dma-addr-pool property, so impose the * fastmap geometry requirement. * default fastmap geometry requirement. */ */ dma_base = 0; dma_base = 0; dma_end = SZ_4G - 1; dma_end = SZ_4G - 1; } else { dma_base = hw_base; dma_end = hw_end; } } else if (ret) { return ret; } if (!((hw_base <= dma_base) && (dma_end <= hw_end))) return -EINVAL; *ret_base = dma_base; *ret_end = dma_end; return 0; } } /* /* * The original geometry describes the IOVA limitations of the hardware, * Get the supported IOVA range for the domain, this can be larger than the * so lets make sure that the IOVA range for this device is at least * configured DMA layer IOVA range. * within those bounds. */ */ if (!((hw_base <= dma_base) && (dma_end <= hw_end))) static int arm_smmu_get_domain_iova_range(struct device *dev, struct iommu_domain *domain, unsigned long ias, dma_addr_t *ret_base, dma_addr_t *ret_end) { dma_addr_t iova_base, iova_end; dma_addr_t dma_base, dma_end, geometry_start, geometry_end; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); dma_addr_t hw_base = 0; dma_addr_t hw_end = (1UL << ias) - 1; bool is_fast = test_bit(DOMAIN_ATTR_FAST, smmu_domain->attributes); int ret; if (!is_fast) { iova_base = hw_base; iova_end = hw_end; goto end; } ret = arm_smmu_get_domain_dma_range(dev, domain, hw_base, hw_end, &dma_base, &dma_end); if (ret) return ret; ret = get_range_prop(dev, "qcom,iommu-geometry", &geometry_start, &geometry_end); if (!ret) { if (geometry_start >= SZ_1G * 4ULL || geometry_end >= SZ_1G * 4ULL) { pr_err("fastmap geometry does not support IOVAs >= 4GB\n"); return -EINVAL; return -EINVAL; } if (geometry_start < dma_base) iova_base = geometry_start; else iova_base = dma_base; if (geometry_end > dma_end) iova_end = geometry_end; else iova_end = dma_end; } else if (ret == -ENOENT) { iova_base = 0; iova_end = SZ_4G - 1; } else { return ret; } if (!((hw_base <= iova_base) && (iova_end <= hw_end))) return -EINVAL; end: *ret_base = iova_base; *ret_end = iova_end; return 0; } /* * Checks for "qcom,iommu-dma-addr-pool" property to specify the DMA layer IOVA * range for the domain. If not present, and the domain doesn't use fastmap, * the domain geometry is unmodified. */ static int arm_smmu_adjust_domain_geometry(struct device *dev, struct iommu_domain *domain) { dma_addr_t dma_base, dma_end; int ret; ret = arm_smmu_get_domain_dma_range(dev, domain, domain->geometry.aperture_start, domain->geometry.aperture_end, &dma_base, &dma_end); if (ret) return ret; domain->geometry.aperture_start = dma_base; domain->geometry.aperture_start = dma_base; domain->geometry.aperture_end = dma_end; domain->geometry.aperture_end = dma_end; Loading Loading @@ -2060,6 +2158,14 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, goto out_clear_smmu; goto out_clear_smmu; } } ret = arm_smmu_get_domain_iova_range(dev, domain, ias, &ttbr0_pgtbl_info->iova_base, &ttbr0_pgtbl_info->iova_end); if (ret) { dev_err(dev, "Failed to get domain IOVA range\n"); goto out_clear_smmu; } ttbr0_pgtbl_info->pgtbl_cfg = (struct io_pgtable_cfg) { ttbr0_pgtbl_info->pgtbl_cfg = (struct io_pgtable_cfg) { .quirks = quirks, .quirks = quirks, .pgsize_bitmap = smmu->pgsize_bitmap, .pgsize_bitmap = smmu->pgsize_bitmap, Loading @@ -2079,6 +2185,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, goto out_clear_smmu; goto out_clear_smmu; } } if (split_tables) { if (split_tables) { ttbr1_pgtbl_info->iova_base = ttbr0_pgtbl_info->iova_base; ttbr1_pgtbl_info->iova_end = ttbr0_pgtbl_info->iova_end; ttbr1_pgtbl_info->pgtbl_cfg = ttbr0_pgtbl_info->pgtbl_cfg; ttbr1_pgtbl_info->pgtbl_cfg = ttbr0_pgtbl_info->pgtbl_cfg; smmu_domain->pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, smmu_domain->pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &ttbr1_pgtbl_info->pgtbl_cfg, &ttbr1_pgtbl_info->pgtbl_cfg, Loading drivers/iommu/dma-mapping-fast.c +4 −1 Original line number Original line Diff line number Diff line Loading @@ -116,6 +116,9 @@ static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping, mapping->num_4k_pages); mapping->num_4k_pages); mapping->have_stale_tlbs = false; mapping->have_stale_tlbs = false; av8l_fast_clear_stale_ptes(mapping->pgtbl_ops, av8l_fast_clear_stale_ptes(mapping->pgtbl_ops, mapping->base, mapping->base + mapping->size - 1, skip_sync); skip_sync); bit = bitmap_find_next_zero_area( bit = bitmap_find_next_zero_area( mapping->clean_bitmap, mapping->clean_bitmap, Loading drivers/iommu/io-pgtable-fast.c +81 −43 Original line number Original line Diff line number Diff line Loading @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/types.h> #include <linux/io-pgtable.h> #include <linux/io-pgtable.h> #include <linux/io-pgtable-fast.h> #include <linux/io-pgtable-fast.h> #include <linux/mm.h> #include <asm/cacheflush.h> #include <asm/cacheflush.h> #include <linux/vmalloc.h> #include <linux/vmalloc.h> #include <linux/dma-mapping.h> #include <linux/dma-mapping.h> Loading Loading @@ -126,7 +127,7 @@ #define PTE_SH_IDX(pte) (pte & AV8L_FAST_PTE_SH_MASK) #define PTE_SH_IDX(pte) (pte & AV8L_FAST_PTE_SH_MASK) #define iopte_pmd_offset(pmds, iova) (pmds + (iova >> 12)) #define iopte_pmd_offset(pmds, base, iova) (pmds + ((iova - base) >> 12)) static inline dma_addr_t av8l_dma_addr(void *addr) static inline dma_addr_t av8l_dma_addr(void *addr) { { Loading Loading @@ -193,14 +194,16 @@ static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep) } } } } void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, bool skip_sync) void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, u64 base, u64 end, bool skip_sync) { { int i; int i; struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops); struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops); av8l_fast_iopte *pmdp = data->pmds; av8l_fast_iopte *pmdp = iopte_pmd_offset(data->pmds, data->base, base); for (i = 0; i < ((SZ_1G * 4UL) >> AV8L_FAST_PAGE_SHIFT); ++i) { for (i = base >> AV8L_FAST_PAGE_SHIFT; i <= (end >> AV8L_FAST_PAGE_SHIFT); ++i) { if (!(*pmdp & AV8L_FAST_PTE_VALID)) { if (!(*pmdp & AV8L_FAST_PTE_VALID)) { *pmdp = 0; *pmdp = 0; if (!skip_sync) if (!skip_sync) Loading Loading @@ -247,7 +250,7 @@ static int av8l_fast_map(struct io_pgtable_ops *ops, unsigned long iova, { { struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops); struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops); av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, iova); av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, data->base, iova); unsigned long i, nptes = size >> AV8L_FAST_PAGE_SHIFT; unsigned long i, nptes = size >> AV8L_FAST_PAGE_SHIFT; av8l_fast_iopte pte; av8l_fast_iopte pte; Loading Loading @@ -280,7 +283,7 @@ __av8l_fast_unmap(struct io_pgtable_ops *ops, unsigned long iova, ? AV8L_FAST_PTE_UNMAPPED_NEED_TLBI ? AV8L_FAST_PTE_UNMAPPED_NEED_TLBI : 0; : 0; ptep = iopte_pmd_offset(data->pmds, iova); ptep = iopte_pmd_offset(data->pmds, data->base, iova); nptes = size >> AV8L_FAST_PAGE_SHIFT; nptes = size >> AV8L_FAST_PAGE_SHIFT; memset(ptep, val, sizeof(*ptep) * nptes); memset(ptep, val, sizeof(*ptep) * nptes); Loading Loading @@ -378,7 +381,7 @@ static bool av8l_fast_iova_coherent(struct io_pgtable_ops *ops, unsigned long iova) unsigned long iova) { { struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, iova); av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, data->base, iova); return ((PTE_MAIR_IDX(*ptep) == AV8L_FAST_MAIR_ATTR_IDX_CACHE) && return ((PTE_MAIR_IDX(*ptep) == AV8L_FAST_MAIR_ATTR_IDX_CACHE) && ((PTE_SH_IDX(*ptep) == AV8L_FAST_PTE_SH_OS) || ((PTE_SH_IDX(*ptep) == AV8L_FAST_PTE_SH_OS) || Loading Loading @@ -413,7 +416,7 @@ av8l_fast_alloc_pgtable_data(struct io_pgtable_cfg *cfg) } } /* /* * We need 1 page for the pgd, 4 pages for puds (1GB VA per pud page) and * We need max 1 page for the pgd, 4 pages for puds (1GB VA per pud page) and * 2048 pages for pmds (each pud page contains 512 table entries, each * 2048 pages for pmds (each pud page contains 512 table entries, each * pointing to a pmd). * pointing to a pmd). */ */ Loading @@ -422,12 +425,39 @@ av8l_fast_alloc_pgtable_data(struct io_pgtable_cfg *cfg) #define NUM_PMD_PAGES 2048 #define NUM_PMD_PAGES 2048 #define NUM_PGTBL_PAGES (NUM_PGD_PAGES + NUM_PUD_PAGES + NUM_PMD_PAGES) #define NUM_PGTBL_PAGES (NUM_PGD_PAGES + NUM_PUD_PAGES + NUM_PMD_PAGES) /* undefine arch specific definitions which depends on page table format */ #undef pud_index #undef pud_mask #undef pud_next #undef pmd_index #undef pmd_mask #undef pmd_next #define pud_index(addr) (((addr) >> 30) & 0x3) #define pud_mask(addr) ((addr) & ~((1UL << 30) - 1)) #define pud_next(addr, end) \ ({ unsigned long __boundary = pud_mask(addr + (1UL << 30));\ (__boundary - 1 < (end) - 1) ? __boundary : (end); \ }) #define pmd_index(addr) (((addr) >> 21) & 0x1ff) #define pmd_mask(addr) ((addr) & ~((1UL << 21) - 1)) #define pmd_next(addr, end) \ ({ unsigned long __boundary = pmd_mask(addr + (1UL << 21));\ (__boundary - 1 < (end) - 1) ? __boundary : (end); \ }) static int static int av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, struct io_pgtable_cfg *cfg, void *cookie) struct io_pgtable_cfg *cfg, void *cookie) { { int i, j, pg = 0; int i, j, pg = 0; struct page **pages, *page; struct page **pages, *page; dma_addr_t pud, pmd; int pmd_pg_index; struct msm_io_pgtable_info *pgtbl_info = to_msm_io_pgtable_info(cfg); dma_addr_t base = pgtbl_info->iova_base; dma_addr_t end = pgtbl_info->iova_end; pages = kmalloc(sizeof(*pages) * NUM_PGTBL_PAGES, __GFP_NOWARN | pages = kmalloc(sizeof(*pages) * NUM_PGTBL_PAGES, __GFP_NOWARN | __GFP_NORETRY); __GFP_NORETRY); Loading @@ -445,10 +475,11 @@ av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, data->pgd = page_address(page); data->pgd = page_address(page); /* /* * We need 2048 entries at level 2 to map 4GB of VA space. A page * We need max 2048 entries at level 2 to map 4GB of VA space. A page * can hold 512 entries, so we need 4 pages. * can hold 512 entries, so we need max 4 pages. */ */ for (i = 0; i < 4; ++i) { for (i = pud_index(base), pud = base; pud < end; ++i, pud = pud_next(pud, end)) { av8l_fast_iopte pte, *ptep; av8l_fast_iopte pte, *ptep; page = alloc_page(GFP_KERNEL | __GFP_ZERO); page = alloc_page(GFP_KERNEL | __GFP_ZERO); Loading @@ -463,12 +494,15 @@ av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, av8l_clean_range(cfg, data->pgd, data->pgd + 4); av8l_clean_range(cfg, data->pgd, data->pgd + 4); /* /* * We have 4 puds, each of which can point to 512 pmds, so we'll * We have max 4 puds, each of which can point to 512 pmds, so we'll * have 2048 pmds, each of which can hold 512 ptes, for a grand * have max 2048 pmds, each of which can hold 512 ptes, for a grand * total of 2048*512=1048576 PTEs. * total of 2048*512=1048576 PTEs. */ */ for (i = 0; i < 4; ++i) { pmd_pg_index = pg; for (j = 0; j < 512; ++j) { for (i = pud_index(base), pud = base; pud < end; ++i, pud = pud_next(pud, end)) { for (j = pmd_index(pud), pmd = pud; pmd < pud_next(pud, end); ++j, pmd = pmd_next(pmd, end)) { av8l_fast_iopte pte, *pudp; av8l_fast_iopte pte, *pudp; void *addr; void *addr; Loading @@ -487,21 +521,21 @@ av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, av8l_clean_range(cfg, data->puds[i], data->puds[i] + 512); av8l_clean_range(cfg, data->puds[i], data->puds[i] + 512); } } if (WARN_ON(pg != NUM_PGTBL_PAGES)) goto err_free_pages; /* /* * We map the pmds into a virtually contiguous space so that we * We map the pmds into a virtually contiguous space so that we * don't have to traverse the first two levels of the page tables * don't have to traverse the first two levels of the page tables * to find the appropriate pud. Instead, it will be a simple * to find the appropriate pud. Instead, it will be a simple * offset from the virtual base of the pmds. * offset from the virtual base of the pmds. */ */ data->pmds = vmap(&pages[NUM_PGD_PAGES + NUM_PUD_PAGES], NUM_PMD_PAGES, data->pmds = vmap(&pages[pmd_pg_index], pg - pmd_pg_index, VM_IOREMAP, PAGE_KERNEL); VM_IOREMAP, PAGE_KERNEL); if (!data->pmds) if (!data->pmds) goto err_free_pages; goto err_free_pages; data->pages = pages; data->pages = pages; data->base = base; data->end = end; data->nr_pages = pg; return 0; return 0; err_free_pages: err_free_pages: Loading Loading @@ -605,7 +639,7 @@ static void av8l_fast_free_pgtable(struct io_pgtable *iop) struct av8l_fast_io_pgtable *data = iof_pgtable_to_data(iop); struct av8l_fast_io_pgtable *data = iof_pgtable_to_data(iop); vunmap(data->pmds); vunmap(data->pmds); for (i = 0; i < NUM_PGTBL_PAGES; ++i) for (i = 0; i < data->nr_pages; ++i) __free_page(data->pages[i]); __free_page(data->pages[i]); kvfree(data->pages); kvfree(data->pages); kfree(data); kfree(data); Loading Loading @@ -674,12 +708,15 @@ static int __init av8l_fast_positive_testing(void) int failed = 0; int failed = 0; u64 iova; u64 iova; struct io_pgtable_ops *ops; struct io_pgtable_ops *ops; struct io_pgtable_cfg cfg; struct msm_io_pgtable_info pgtable_info; struct av8l_fast_io_pgtable *data; struct av8l_fast_io_pgtable *data; av8l_fast_iopte *pmds; av8l_fast_iopte *pmds; u64 max = SZ_1G * 4ULL - 1; u64 max = SZ_1G * 4ULL - 1; u64 base = 0; cfg = (struct io_pgtable_cfg) { pgtable_info.iova_base = base; pgtable_info.iova_end = max; pgtable_info.pgtbl_cfg = (struct io_pgtable_cfg) { .quirks = 0, .quirks = 0, .tlb = &dummy_tlb_ops, .tlb = &dummy_tlb_ops, .ias = 32, .ias = 32, Loading @@ -688,8 +725,9 @@ static int __init av8l_fast_positive_testing(void) .coherent_walk = true, .coherent_walk = true, }; }; cfg_cookie = &cfg; cfg_cookie = &pgtable_info.pgtbl_cfg; ops = alloc_io_pgtable_ops(ARM_V8L_FAST, &cfg, &cfg); ops = alloc_io_pgtable_ops(ARM_V8L_FAST, &pgtable_info.pgtbl_cfg, &pgtable_info.pgtbl_cfg); if (WARN_ON(!ops)) if (WARN_ON(!ops)) return 1; return 1; Loading @@ -698,81 +736,81 @@ static int __init av8l_fast_positive_testing(void) pmds = data->pmds; pmds = data->pmds; /* map the entire 4GB VA space with 4K map calls */ /* map the entire 4GB VA space with 4K map calls */ for (iova = 0; iova < max; iova += SZ_4K) { for (iova = base; iova < max; iova += SZ_4K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_4K, IOMMU_READ))) { if (WARN_ON(ops->map(ops, iova, iova, SZ_4K, IOMMU_READ))) { failed++; failed++; continue; continue; } } } } if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0, if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base, max))) base, max - base))) failed++; failed++; /* unmap it all */ /* unmap it all */ for (iova = 0; iova < max; iova += SZ_4K) { for (iova = base; iova < max; iova += SZ_4K) { if (WARN_ON(ops->unmap(ops, iova, SZ_4K, NULL) != SZ_4K)) if (WARN_ON(ops->unmap(ops, iova, SZ_4K, NULL) != SZ_4K)) failed++; failed++; } } /* sweep up TLB proving PTEs */ /* sweep up TLB proving PTEs */ av8l_fast_clear_stale_ptes(ops, false); av8l_fast_clear_stale_ptes(ops, base, max, false); /* map the entire 4GB VA space with 8K map calls */ /* map the entire 4GB VA space with 8K map calls */ for (iova = 0; iova < max; iova += SZ_8K) { for (iova = base; iova < max; iova += SZ_8K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_8K, IOMMU_READ))) { if (WARN_ON(ops->map(ops, iova, iova, SZ_8K, IOMMU_READ))) { failed++; failed++; continue; continue; } } } } if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0, if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base, max))) base, max - base))) failed++; failed++; /* unmap it all with 8K unmap calls */ /* unmap it all with 8K unmap calls */ for (iova = 0; iova < max; iova += SZ_8K) { for (iova = base; iova < max; iova += SZ_8K) { if (WARN_ON(ops->unmap(ops, iova, SZ_8K, NULL) != SZ_8K)) if (WARN_ON(ops->unmap(ops, iova, SZ_8K, NULL) != SZ_8K)) failed++; failed++; } } /* sweep up TLB proving PTEs */ /* sweep up TLB proving PTEs */ av8l_fast_clear_stale_ptes(ops, false); av8l_fast_clear_stale_ptes(ops, base, max, false); /* map the entire 4GB VA space with 16K map calls */ /* map the entire 4GB VA space with 16K map calls */ for (iova = 0; iova < max; iova += SZ_16K) { for (iova = base; iova < max; iova += SZ_16K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_16K, IOMMU_READ))) { if (WARN_ON(ops->map(ops, iova, iova, SZ_16K, IOMMU_READ))) { failed++; failed++; continue; continue; } } } } if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0, if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base, max))) base, max - base))) failed++; failed++; /* unmap it all */ /* unmap it all */ for (iova = 0; iova < max; iova += SZ_16K) { for (iova = base; iova < max; iova += SZ_16K) { if (WARN_ON(ops->unmap(ops, iova, SZ_16K, NULL) != SZ_16K)) if (WARN_ON(ops->unmap(ops, iova, SZ_16K, NULL) != SZ_16K)) failed++; failed++; } } /* sweep up TLB proving PTEs */ /* sweep up TLB proving PTEs */ av8l_fast_clear_stale_ptes(ops, false); av8l_fast_clear_stale_ptes(ops, base, max, false); /* map the entire 4GB VA space with 64K map calls */ /* map the entire 4GB VA space with 64K map calls */ for (iova = 0; iova < max; iova += SZ_64K) { for (iova = base; iova < max; iova += SZ_64K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_64K, IOMMU_READ))) { if (WARN_ON(ops->map(ops, iova, iova, SZ_64K, IOMMU_READ))) { failed++; failed++; continue; continue; } } } } if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0, if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base, max))) base, max - base))) failed++; failed++; /* unmap it all at once */ /* unmap it all at once */ if (WARN_ON(ops->unmap(ops, 0, max, NULL) != max)) if (WARN_ON(ops->unmap(ops, base, max - base, NULL) != (max - base))) failed++; failed++; free_io_pgtable_ops(ops); free_io_pgtable_ops(ops); Loading include/linux/io-pgtable-fast.h +7 −1 Original line number Original line Diff line number Diff line Loading @@ -23,6 +23,9 @@ struct av8l_fast_io_pgtable { av8l_fast_iopte *puds[4]; av8l_fast_iopte *puds[4]; av8l_fast_iopte *pmds; av8l_fast_iopte *pmds; struct page **pages; /* page table memory */ struct page **pages; /* page table memory */ int nr_pages; dma_addr_t base; dma_addr_t end; }; }; /* Struct accessors */ /* Struct accessors */ Loading Loading @@ -99,7 +102,8 @@ av8l_fast_iova_to_phys_public(struct io_pgtable_ops *ops, */ */ #define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0xa #define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0xa void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, bool skip_sync); void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, u64 base, u64 end, bool skip_sync); void av8l_register_notify(struct notifier_block *nb); void av8l_register_notify(struct notifier_block *nb); #else /* !CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */ #else /* !CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */ Loading @@ -107,6 +111,8 @@ void av8l_register_notify(struct notifier_block *nb); #define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0 #define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0 static inline void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, static inline void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, u64 base, u64 end, bool skip_sync) bool skip_sync) { { } } Loading include/linux/io-pgtable.h +4 −0 Original line number Original line Diff line number Diff line Loading @@ -190,6 +190,8 @@ struct io_pgtable_ops { * and False if non-coherent. * and False if non-coherent. * @iova_to_pte: Translate iova to Page Table Entry (PTE). * @iova_to_pte: Translate iova to Page Table Entry (PTE). * @pgtbl_cfg: The configuration for a set of page tables. * @pgtbl_cfg: The configuration for a set of page tables. * @iova_base: Configured IOVA base * @iova_end: Configured IOVA end */ */ struct msm_io_pgtable_info { struct msm_io_pgtable_info { int (*map_sg)(struct io_pgtable_ops *ops, unsigned long iova, int (*map_sg)(struct io_pgtable_ops *ops, unsigned long iova, Loading @@ -199,6 +201,8 @@ struct msm_io_pgtable_info { unsigned long iova); unsigned long iova); uint64_t (*iova_to_pte)(struct io_pgtable_ops *ops, unsigned long iova); uint64_t (*iova_to_pte)(struct io_pgtable_ops *ops, unsigned long iova); struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_cfg pgtbl_cfg; dma_addr_t iova_base; dma_addr_t iova_end; }; }; /** /** Loading Loading
drivers/iommu/arm-smmu.c +152 −44 Original line number Original line Diff line number Diff line Loading @@ -1740,71 +1740,169 @@ static void arm_smmu_free_asid(struct iommu_domain *domain) mutex_unlock(&smmu->idr_mutex); mutex_unlock(&smmu->idr_mutex); } } /* static int get_range_prop(struct device *dev, const char *prop, * Checks for "qcom,iommu-dma-addr-pool" property to specify the IOVA range dma_addr_t *ret_base, dma_addr_t *ret_end) * for the domain. If not present, and the domain doesn't use fastmap, * the domain geometry is unmodified. */ static int arm_smmu_adjust_domain_geometry(struct device *dev, struct iommu_domain *domain) { { struct device_node *np; struct device_node *np; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); int naddr, nsize, len; int naddr, nsize, len; u64 dma_base, dma_size, dma_end; u64 base, end, size; const __be32 *ranges; const __be32 *ranges; dma_addr_t hw_base = domain->geometry.aperture_start; dma_addr_t hw_end = domain->geometry.aperture_end; bool is_fast = test_bit(DOMAIN_ATTR_FAST, smmu_domain->attributes); if (!dev->of_node) if (!dev->of_node) return 0; return -ENOENT; np = of_parse_phandle(dev->of_node, "qcom,iommu-group", 0); np = of_parse_phandle(dev->of_node, "qcom,iommu-group", 0); if (!np) if (!np) np = dev->of_node; np = dev->of_node; ranges = of_get_property(np, "qcom,iommu-dma-addr-pool", &len); ranges = of_get_property(np, prop, &len); if (!ranges && !is_fast) if (!ranges) return 0; return -ENOENT; if (ranges) { len /= sizeof(u32); len /= sizeof(u32); naddr = of_n_addr_cells(np); naddr = of_n_addr_cells(np); nsize = of_n_size_cells(np); nsize = of_n_size_cells(np); if (len < naddr + nsize) { if (len < naddr + nsize) { dev_err(dev, "Invalid length for qcom,iommu-dma-addr-pool, expected %d cells\n", dev_err(dev, "Invalid length for %s, expected %d cells\n", naddr + nsize); prop, naddr + nsize); return -EINVAL; return -EINVAL; } } if (naddr == 0 || nsize == 0) { if (naddr == 0 || nsize == 0) { dev_err(dev, "Invalid #address-cells %d or #size-cells %d\n", dev_err(dev, "Invalid #address-cells %d or #size-cells %d for %s\n", naddr, nsize); prop, naddr, nsize); return -EINVAL; return -EINVAL; } } dma_base = of_read_number(ranges, naddr); base = of_read_number(ranges, naddr); dma_size = of_read_number(ranges + naddr, nsize); size = of_read_number(ranges + naddr, nsize); dma_end = dma_base + dma_size - 1; end = base + size - 1; } else { *ret_base = base; *ret_end = end; return 0; } static int arm_smmu_get_domain_dma_range(struct device *dev, struct iommu_domain *domain, dma_addr_t hw_base, dma_addr_t hw_end, dma_addr_t *ret_base, dma_addr_t *ret_end) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); dma_addr_t dma_base, dma_end; bool is_fast = test_bit(DOMAIN_ATTR_FAST, smmu_domain->attributes); int ret; ret = get_range_prop(dev, "qcom,iommu-dma-addr-pool", &dma_base, &dma_end); if (ret == -ENOENT) { if (is_fast) { /* /* * This domain uses fastmap, but doesn't have any domain * This domain uses fastmap, but doesn't have any domain * geometry limitations, as implied by the absence of the * geometry limitations, as implied by the absence of * qcom,iommu-dma-addr-pool property, so impose the default * the qcom,iommu-dma-addr-pool property, so impose the * fastmap geometry requirement. * default fastmap geometry requirement. */ */ dma_base = 0; dma_base = 0; dma_end = SZ_4G - 1; dma_end = SZ_4G - 1; } else { dma_base = hw_base; dma_end = hw_end; } } else if (ret) { return ret; } if (!((hw_base <= dma_base) && (dma_end <= hw_end))) return -EINVAL; *ret_base = dma_base; *ret_end = dma_end; return 0; } } /* /* * The original geometry describes the IOVA limitations of the hardware, * Get the supported IOVA range for the domain, this can be larger than the * so lets make sure that the IOVA range for this device is at least * configured DMA layer IOVA range. * within those bounds. */ */ if (!((hw_base <= dma_base) && (dma_end <= hw_end))) static int arm_smmu_get_domain_iova_range(struct device *dev, struct iommu_domain *domain, unsigned long ias, dma_addr_t *ret_base, dma_addr_t *ret_end) { dma_addr_t iova_base, iova_end; dma_addr_t dma_base, dma_end, geometry_start, geometry_end; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); dma_addr_t hw_base = 0; dma_addr_t hw_end = (1UL << ias) - 1; bool is_fast = test_bit(DOMAIN_ATTR_FAST, smmu_domain->attributes); int ret; if (!is_fast) { iova_base = hw_base; iova_end = hw_end; goto end; } ret = arm_smmu_get_domain_dma_range(dev, domain, hw_base, hw_end, &dma_base, &dma_end); if (ret) return ret; ret = get_range_prop(dev, "qcom,iommu-geometry", &geometry_start, &geometry_end); if (!ret) { if (geometry_start >= SZ_1G * 4ULL || geometry_end >= SZ_1G * 4ULL) { pr_err("fastmap geometry does not support IOVAs >= 4GB\n"); return -EINVAL; return -EINVAL; } if (geometry_start < dma_base) iova_base = geometry_start; else iova_base = dma_base; if (geometry_end > dma_end) iova_end = geometry_end; else iova_end = dma_end; } else if (ret == -ENOENT) { iova_base = 0; iova_end = SZ_4G - 1; } else { return ret; } if (!((hw_base <= iova_base) && (iova_end <= hw_end))) return -EINVAL; end: *ret_base = iova_base; *ret_end = iova_end; return 0; } /* * Checks for "qcom,iommu-dma-addr-pool" property to specify the DMA layer IOVA * range for the domain. If not present, and the domain doesn't use fastmap, * the domain geometry is unmodified. */ static int arm_smmu_adjust_domain_geometry(struct device *dev, struct iommu_domain *domain) { dma_addr_t dma_base, dma_end; int ret; ret = arm_smmu_get_domain_dma_range(dev, domain, domain->geometry.aperture_start, domain->geometry.aperture_end, &dma_base, &dma_end); if (ret) return ret; domain->geometry.aperture_start = dma_base; domain->geometry.aperture_start = dma_base; domain->geometry.aperture_end = dma_end; domain->geometry.aperture_end = dma_end; Loading Loading @@ -2060,6 +2158,14 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, goto out_clear_smmu; goto out_clear_smmu; } } ret = arm_smmu_get_domain_iova_range(dev, domain, ias, &ttbr0_pgtbl_info->iova_base, &ttbr0_pgtbl_info->iova_end); if (ret) { dev_err(dev, "Failed to get domain IOVA range\n"); goto out_clear_smmu; } ttbr0_pgtbl_info->pgtbl_cfg = (struct io_pgtable_cfg) { ttbr0_pgtbl_info->pgtbl_cfg = (struct io_pgtable_cfg) { .quirks = quirks, .quirks = quirks, .pgsize_bitmap = smmu->pgsize_bitmap, .pgsize_bitmap = smmu->pgsize_bitmap, Loading @@ -2079,6 +2185,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, goto out_clear_smmu; goto out_clear_smmu; } } if (split_tables) { if (split_tables) { ttbr1_pgtbl_info->iova_base = ttbr0_pgtbl_info->iova_base; ttbr1_pgtbl_info->iova_end = ttbr0_pgtbl_info->iova_end; ttbr1_pgtbl_info->pgtbl_cfg = ttbr0_pgtbl_info->pgtbl_cfg; ttbr1_pgtbl_info->pgtbl_cfg = ttbr0_pgtbl_info->pgtbl_cfg; smmu_domain->pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, smmu_domain->pgtbl_ops[1] = alloc_io_pgtable_ops(fmt, &ttbr1_pgtbl_info->pgtbl_cfg, &ttbr1_pgtbl_info->pgtbl_cfg, Loading
drivers/iommu/dma-mapping-fast.c +4 −1 Original line number Original line Diff line number Diff line Loading @@ -116,6 +116,9 @@ static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping, mapping->num_4k_pages); mapping->num_4k_pages); mapping->have_stale_tlbs = false; mapping->have_stale_tlbs = false; av8l_fast_clear_stale_ptes(mapping->pgtbl_ops, av8l_fast_clear_stale_ptes(mapping->pgtbl_ops, mapping->base, mapping->base + mapping->size - 1, skip_sync); skip_sync); bit = bitmap_find_next_zero_area( bit = bitmap_find_next_zero_area( mapping->clean_bitmap, mapping->clean_bitmap, Loading
drivers/iommu/io-pgtable-fast.c +81 −43 Original line number Original line Diff line number Diff line Loading @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/types.h> #include <linux/io-pgtable.h> #include <linux/io-pgtable.h> #include <linux/io-pgtable-fast.h> #include <linux/io-pgtable-fast.h> #include <linux/mm.h> #include <asm/cacheflush.h> #include <asm/cacheflush.h> #include <linux/vmalloc.h> #include <linux/vmalloc.h> #include <linux/dma-mapping.h> #include <linux/dma-mapping.h> Loading Loading @@ -126,7 +127,7 @@ #define PTE_SH_IDX(pte) (pte & AV8L_FAST_PTE_SH_MASK) #define PTE_SH_IDX(pte) (pte & AV8L_FAST_PTE_SH_MASK) #define iopte_pmd_offset(pmds, iova) (pmds + (iova >> 12)) #define iopte_pmd_offset(pmds, base, iova) (pmds + ((iova - base) >> 12)) static inline dma_addr_t av8l_dma_addr(void *addr) static inline dma_addr_t av8l_dma_addr(void *addr) { { Loading Loading @@ -193,14 +194,16 @@ static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep) } } } } void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, bool skip_sync) void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, u64 base, u64 end, bool skip_sync) { { int i; int i; struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops); struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops); av8l_fast_iopte *pmdp = data->pmds; av8l_fast_iopte *pmdp = iopte_pmd_offset(data->pmds, data->base, base); for (i = 0; i < ((SZ_1G * 4UL) >> AV8L_FAST_PAGE_SHIFT); ++i) { for (i = base >> AV8L_FAST_PAGE_SHIFT; i <= (end >> AV8L_FAST_PAGE_SHIFT); ++i) { if (!(*pmdp & AV8L_FAST_PTE_VALID)) { if (!(*pmdp & AV8L_FAST_PTE_VALID)) { *pmdp = 0; *pmdp = 0; if (!skip_sync) if (!skip_sync) Loading Loading @@ -247,7 +250,7 @@ static int av8l_fast_map(struct io_pgtable_ops *ops, unsigned long iova, { { struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops); struct io_pgtable *iop = iof_pgtable_ops_to_pgtable(ops); av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, iova); av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, data->base, iova); unsigned long i, nptes = size >> AV8L_FAST_PAGE_SHIFT; unsigned long i, nptes = size >> AV8L_FAST_PAGE_SHIFT; av8l_fast_iopte pte; av8l_fast_iopte pte; Loading Loading @@ -280,7 +283,7 @@ __av8l_fast_unmap(struct io_pgtable_ops *ops, unsigned long iova, ? AV8L_FAST_PTE_UNMAPPED_NEED_TLBI ? AV8L_FAST_PTE_UNMAPPED_NEED_TLBI : 0; : 0; ptep = iopte_pmd_offset(data->pmds, iova); ptep = iopte_pmd_offset(data->pmds, data->base, iova); nptes = size >> AV8L_FAST_PAGE_SHIFT; nptes = size >> AV8L_FAST_PAGE_SHIFT; memset(ptep, val, sizeof(*ptep) * nptes); memset(ptep, val, sizeof(*ptep) * nptes); Loading Loading @@ -378,7 +381,7 @@ static bool av8l_fast_iova_coherent(struct io_pgtable_ops *ops, unsigned long iova) unsigned long iova) { { struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops); av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, iova); av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, data->base, iova); return ((PTE_MAIR_IDX(*ptep) == AV8L_FAST_MAIR_ATTR_IDX_CACHE) && return ((PTE_MAIR_IDX(*ptep) == AV8L_FAST_MAIR_ATTR_IDX_CACHE) && ((PTE_SH_IDX(*ptep) == AV8L_FAST_PTE_SH_OS) || ((PTE_SH_IDX(*ptep) == AV8L_FAST_PTE_SH_OS) || Loading Loading @@ -413,7 +416,7 @@ av8l_fast_alloc_pgtable_data(struct io_pgtable_cfg *cfg) } } /* /* * We need 1 page for the pgd, 4 pages for puds (1GB VA per pud page) and * We need max 1 page for the pgd, 4 pages for puds (1GB VA per pud page) and * 2048 pages for pmds (each pud page contains 512 table entries, each * 2048 pages for pmds (each pud page contains 512 table entries, each * pointing to a pmd). * pointing to a pmd). */ */ Loading @@ -422,12 +425,39 @@ av8l_fast_alloc_pgtable_data(struct io_pgtable_cfg *cfg) #define NUM_PMD_PAGES 2048 #define NUM_PMD_PAGES 2048 #define NUM_PGTBL_PAGES (NUM_PGD_PAGES + NUM_PUD_PAGES + NUM_PMD_PAGES) #define NUM_PGTBL_PAGES (NUM_PGD_PAGES + NUM_PUD_PAGES + NUM_PMD_PAGES) /* undefine arch specific definitions which depends on page table format */ #undef pud_index #undef pud_mask #undef pud_next #undef pmd_index #undef pmd_mask #undef pmd_next #define pud_index(addr) (((addr) >> 30) & 0x3) #define pud_mask(addr) ((addr) & ~((1UL << 30) - 1)) #define pud_next(addr, end) \ ({ unsigned long __boundary = pud_mask(addr + (1UL << 30));\ (__boundary - 1 < (end) - 1) ? __boundary : (end); \ }) #define pmd_index(addr) (((addr) >> 21) & 0x1ff) #define pmd_mask(addr) ((addr) & ~((1UL << 21) - 1)) #define pmd_next(addr, end) \ ({ unsigned long __boundary = pmd_mask(addr + (1UL << 21));\ (__boundary - 1 < (end) - 1) ? __boundary : (end); \ }) static int static int av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, struct io_pgtable_cfg *cfg, void *cookie) struct io_pgtable_cfg *cfg, void *cookie) { { int i, j, pg = 0; int i, j, pg = 0; struct page **pages, *page; struct page **pages, *page; dma_addr_t pud, pmd; int pmd_pg_index; struct msm_io_pgtable_info *pgtbl_info = to_msm_io_pgtable_info(cfg); dma_addr_t base = pgtbl_info->iova_base; dma_addr_t end = pgtbl_info->iova_end; pages = kmalloc(sizeof(*pages) * NUM_PGTBL_PAGES, __GFP_NOWARN | pages = kmalloc(sizeof(*pages) * NUM_PGTBL_PAGES, __GFP_NOWARN | __GFP_NORETRY); __GFP_NORETRY); Loading @@ -445,10 +475,11 @@ av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, data->pgd = page_address(page); data->pgd = page_address(page); /* /* * We need 2048 entries at level 2 to map 4GB of VA space. A page * We need max 2048 entries at level 2 to map 4GB of VA space. A page * can hold 512 entries, so we need 4 pages. * can hold 512 entries, so we need max 4 pages. */ */ for (i = 0; i < 4; ++i) { for (i = pud_index(base), pud = base; pud < end; ++i, pud = pud_next(pud, end)) { av8l_fast_iopte pte, *ptep; av8l_fast_iopte pte, *ptep; page = alloc_page(GFP_KERNEL | __GFP_ZERO); page = alloc_page(GFP_KERNEL | __GFP_ZERO); Loading @@ -463,12 +494,15 @@ av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, av8l_clean_range(cfg, data->pgd, data->pgd + 4); av8l_clean_range(cfg, data->pgd, data->pgd + 4); /* /* * We have 4 puds, each of which can point to 512 pmds, so we'll * We have max 4 puds, each of which can point to 512 pmds, so we'll * have 2048 pmds, each of which can hold 512 ptes, for a grand * have max 2048 pmds, each of which can hold 512 ptes, for a grand * total of 2048*512=1048576 PTEs. * total of 2048*512=1048576 PTEs. */ */ for (i = 0; i < 4; ++i) { pmd_pg_index = pg; for (j = 0; j < 512; ++j) { for (i = pud_index(base), pud = base; pud < end; ++i, pud = pud_next(pud, end)) { for (j = pmd_index(pud), pmd = pud; pmd < pud_next(pud, end); ++j, pmd = pmd_next(pmd, end)) { av8l_fast_iopte pte, *pudp; av8l_fast_iopte pte, *pudp; void *addr; void *addr; Loading @@ -487,21 +521,21 @@ av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data, av8l_clean_range(cfg, data->puds[i], data->puds[i] + 512); av8l_clean_range(cfg, data->puds[i], data->puds[i] + 512); } } if (WARN_ON(pg != NUM_PGTBL_PAGES)) goto err_free_pages; /* /* * We map the pmds into a virtually contiguous space so that we * We map the pmds into a virtually contiguous space so that we * don't have to traverse the first two levels of the page tables * don't have to traverse the first two levels of the page tables * to find the appropriate pud. Instead, it will be a simple * to find the appropriate pud. Instead, it will be a simple * offset from the virtual base of the pmds. * offset from the virtual base of the pmds. */ */ data->pmds = vmap(&pages[NUM_PGD_PAGES + NUM_PUD_PAGES], NUM_PMD_PAGES, data->pmds = vmap(&pages[pmd_pg_index], pg - pmd_pg_index, VM_IOREMAP, PAGE_KERNEL); VM_IOREMAP, PAGE_KERNEL); if (!data->pmds) if (!data->pmds) goto err_free_pages; goto err_free_pages; data->pages = pages; data->pages = pages; data->base = base; data->end = end; data->nr_pages = pg; return 0; return 0; err_free_pages: err_free_pages: Loading Loading @@ -605,7 +639,7 @@ static void av8l_fast_free_pgtable(struct io_pgtable *iop) struct av8l_fast_io_pgtable *data = iof_pgtable_to_data(iop); struct av8l_fast_io_pgtable *data = iof_pgtable_to_data(iop); vunmap(data->pmds); vunmap(data->pmds); for (i = 0; i < NUM_PGTBL_PAGES; ++i) for (i = 0; i < data->nr_pages; ++i) __free_page(data->pages[i]); __free_page(data->pages[i]); kvfree(data->pages); kvfree(data->pages); kfree(data); kfree(data); Loading Loading @@ -674,12 +708,15 @@ static int __init av8l_fast_positive_testing(void) int failed = 0; int failed = 0; u64 iova; u64 iova; struct io_pgtable_ops *ops; struct io_pgtable_ops *ops; struct io_pgtable_cfg cfg; struct msm_io_pgtable_info pgtable_info; struct av8l_fast_io_pgtable *data; struct av8l_fast_io_pgtable *data; av8l_fast_iopte *pmds; av8l_fast_iopte *pmds; u64 max = SZ_1G * 4ULL - 1; u64 max = SZ_1G * 4ULL - 1; u64 base = 0; cfg = (struct io_pgtable_cfg) { pgtable_info.iova_base = base; pgtable_info.iova_end = max; pgtable_info.pgtbl_cfg = (struct io_pgtable_cfg) { .quirks = 0, .quirks = 0, .tlb = &dummy_tlb_ops, .tlb = &dummy_tlb_ops, .ias = 32, .ias = 32, Loading @@ -688,8 +725,9 @@ static int __init av8l_fast_positive_testing(void) .coherent_walk = true, .coherent_walk = true, }; }; cfg_cookie = &cfg; cfg_cookie = &pgtable_info.pgtbl_cfg; ops = alloc_io_pgtable_ops(ARM_V8L_FAST, &cfg, &cfg); ops = alloc_io_pgtable_ops(ARM_V8L_FAST, &pgtable_info.pgtbl_cfg, &pgtable_info.pgtbl_cfg); if (WARN_ON(!ops)) if (WARN_ON(!ops)) return 1; return 1; Loading @@ -698,81 +736,81 @@ static int __init av8l_fast_positive_testing(void) pmds = data->pmds; pmds = data->pmds; /* map the entire 4GB VA space with 4K map calls */ /* map the entire 4GB VA space with 4K map calls */ for (iova = 0; iova < max; iova += SZ_4K) { for (iova = base; iova < max; iova += SZ_4K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_4K, IOMMU_READ))) { if (WARN_ON(ops->map(ops, iova, iova, SZ_4K, IOMMU_READ))) { failed++; failed++; continue; continue; } } } } if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0, if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base, max))) base, max - base))) failed++; failed++; /* unmap it all */ /* unmap it all */ for (iova = 0; iova < max; iova += SZ_4K) { for (iova = base; iova < max; iova += SZ_4K) { if (WARN_ON(ops->unmap(ops, iova, SZ_4K, NULL) != SZ_4K)) if (WARN_ON(ops->unmap(ops, iova, SZ_4K, NULL) != SZ_4K)) failed++; failed++; } } /* sweep up TLB proving PTEs */ /* sweep up TLB proving PTEs */ av8l_fast_clear_stale_ptes(ops, false); av8l_fast_clear_stale_ptes(ops, base, max, false); /* map the entire 4GB VA space with 8K map calls */ /* map the entire 4GB VA space with 8K map calls */ for (iova = 0; iova < max; iova += SZ_8K) { for (iova = base; iova < max; iova += SZ_8K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_8K, IOMMU_READ))) { if (WARN_ON(ops->map(ops, iova, iova, SZ_8K, IOMMU_READ))) { failed++; failed++; continue; continue; } } } } if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0, if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base, max))) base, max - base))) failed++; failed++; /* unmap it all with 8K unmap calls */ /* unmap it all with 8K unmap calls */ for (iova = 0; iova < max; iova += SZ_8K) { for (iova = base; iova < max; iova += SZ_8K) { if (WARN_ON(ops->unmap(ops, iova, SZ_8K, NULL) != SZ_8K)) if (WARN_ON(ops->unmap(ops, iova, SZ_8K, NULL) != SZ_8K)) failed++; failed++; } } /* sweep up TLB proving PTEs */ /* sweep up TLB proving PTEs */ av8l_fast_clear_stale_ptes(ops, false); av8l_fast_clear_stale_ptes(ops, base, max, false); /* map the entire 4GB VA space with 16K map calls */ /* map the entire 4GB VA space with 16K map calls */ for (iova = 0; iova < max; iova += SZ_16K) { for (iova = base; iova < max; iova += SZ_16K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_16K, IOMMU_READ))) { if (WARN_ON(ops->map(ops, iova, iova, SZ_16K, IOMMU_READ))) { failed++; failed++; continue; continue; } } } } if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0, if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base, max))) base, max - base))) failed++; failed++; /* unmap it all */ /* unmap it all */ for (iova = 0; iova < max; iova += SZ_16K) { for (iova = base; iova < max; iova += SZ_16K) { if (WARN_ON(ops->unmap(ops, iova, SZ_16K, NULL) != SZ_16K)) if (WARN_ON(ops->unmap(ops, iova, SZ_16K, NULL) != SZ_16K)) failed++; failed++; } } /* sweep up TLB proving PTEs */ /* sweep up TLB proving PTEs */ av8l_fast_clear_stale_ptes(ops, false); av8l_fast_clear_stale_ptes(ops, base, max, false); /* map the entire 4GB VA space with 64K map calls */ /* map the entire 4GB VA space with 64K map calls */ for (iova = 0; iova < max; iova += SZ_64K) { for (iova = base; iova < max; iova += SZ_64K) { if (WARN_ON(ops->map(ops, iova, iova, SZ_64K, IOMMU_READ))) { if (WARN_ON(ops->map(ops, iova, iova, SZ_64K, IOMMU_READ))) { failed++; failed++; continue; continue; } } } } if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0, if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, base, max))) base, max - base))) failed++; failed++; /* unmap it all at once */ /* unmap it all at once */ if (WARN_ON(ops->unmap(ops, 0, max, NULL) != max)) if (WARN_ON(ops->unmap(ops, base, max - base, NULL) != (max - base))) failed++; failed++; free_io_pgtable_ops(ops); free_io_pgtable_ops(ops); Loading
include/linux/io-pgtable-fast.h +7 −1 Original line number Original line Diff line number Diff line Loading @@ -23,6 +23,9 @@ struct av8l_fast_io_pgtable { av8l_fast_iopte *puds[4]; av8l_fast_iopte *puds[4]; av8l_fast_iopte *pmds; av8l_fast_iopte *pmds; struct page **pages; /* page table memory */ struct page **pages; /* page table memory */ int nr_pages; dma_addr_t base; dma_addr_t end; }; }; /* Struct accessors */ /* Struct accessors */ Loading Loading @@ -99,7 +102,8 @@ av8l_fast_iova_to_phys_public(struct io_pgtable_ops *ops, */ */ #define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0xa #define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0xa void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, bool skip_sync); void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, u64 base, u64 end, bool skip_sync); void av8l_register_notify(struct notifier_block *nb); void av8l_register_notify(struct notifier_block *nb); #else /* !CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */ #else /* !CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */ Loading @@ -107,6 +111,8 @@ void av8l_register_notify(struct notifier_block *nb); #define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0 #define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0 static inline void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, static inline void av8l_fast_clear_stale_ptes(struct io_pgtable_ops *ops, u64 base, u64 end, bool skip_sync) bool skip_sync) { { } } Loading
include/linux/io-pgtable.h +4 −0 Original line number Original line Diff line number Diff line Loading @@ -190,6 +190,8 @@ struct io_pgtable_ops { * and False if non-coherent. * and False if non-coherent. * @iova_to_pte: Translate iova to Page Table Entry (PTE). * @iova_to_pte: Translate iova to Page Table Entry (PTE). * @pgtbl_cfg: The configuration for a set of page tables. * @pgtbl_cfg: The configuration for a set of page tables. * @iova_base: Configured IOVA base * @iova_end: Configured IOVA end */ */ struct msm_io_pgtable_info { struct msm_io_pgtable_info { int (*map_sg)(struct io_pgtable_ops *ops, unsigned long iova, int (*map_sg)(struct io_pgtable_ops *ops, unsigned long iova, Loading @@ -199,6 +201,8 @@ struct msm_io_pgtable_info { unsigned long iova); unsigned long iova); uint64_t (*iova_to_pte)(struct io_pgtable_ops *ops, unsigned long iova); uint64_t (*iova_to_pte)(struct io_pgtable_ops *ops, unsigned long iova); struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_cfg pgtbl_cfg; dma_addr_t iova_base; dma_addr_t iova_end; }; }; /** /** Loading