Loading Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt +10 −2 Original line number Diff line number Diff line Loading @@ -7,7 +7,15 @@ connected to the IPMMU through a port called micro-TLB. Required Properties: - compatible: Must contain "renesas,ipmmu-vmsa". - compatible: Must contain SoC-specific and generic entries from below. - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU. - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU. - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU. - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU. - "renesas,ipmmu-r8a7794" for the R8A7794 (R-Car E2) IPMMU. - "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU. - reg: Base address and size of the IPMMU registers. - interrupts: Specifiers for the MMU fault interrupts. For instances that support secure mode two interrupts must be specified, for non-secure and Loading @@ -27,7 +35,7 @@ node with the following property: Example: R8A7791 IPMMU-MX and VSP1-D0 bus master ipmmu_mx: mmu@fe951000 { compatible = "renasas,ipmmu-vmsa"; compatible = "renasas,ipmmu-r8a7791", "renasas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>, <0 221 IRQ_TYPE_LEVEL_HIGH>; Loading drivers/iommu/Kconfig +0 −75 Original line number Diff line number Diff line Loading @@ -263,81 +263,6 @@ config EXYNOS_IOMMU_DEBUG Say N unless you need kernel log message for IOMMU debugging. config SHMOBILE_IPMMU bool config SHMOBILE_IPMMU_TLB bool config SHMOBILE_IOMMU bool "IOMMU for Renesas IPMMU/IPMMUI" default n depends on ARM && MMU depends on ARCH_SHMOBILE || COMPILE_TEST select IOMMU_API select ARM_DMA_USE_IOMMU select SHMOBILE_IPMMU select SHMOBILE_IPMMU_TLB help Support for Renesas IPMMU/IPMMUI. This option enables remapping of DMA memory accesses from all of the IP blocks on the ICB. Warning: Drivers (including userspace drivers of UIO devices) of the IP blocks on the ICB *must* use addresses allocated from the IPMMU (iova) for DMA with this option enabled. If unsure, say N. choice prompt "IPMMU/IPMMUI address space size" default SHMOBILE_IOMMU_ADDRSIZE_2048MB depends on SHMOBILE_IOMMU help This option sets IPMMU/IPMMUI address space size by adjusting the 1st level page table size. The page table size is calculated as follows: page table size = number of page table entries * 4 bytes number of page table entries = address space size / 1 MiB For example, when the address space size is 2048 MiB, the 1st level page table size is 8192 bytes. config SHMOBILE_IOMMU_ADDRSIZE_2048MB bool "2 GiB" config SHMOBILE_IOMMU_ADDRSIZE_1024MB bool "1 GiB" config SHMOBILE_IOMMU_ADDRSIZE_512MB bool "512 MiB" config SHMOBILE_IOMMU_ADDRSIZE_256MB bool "256 MiB" config SHMOBILE_IOMMU_ADDRSIZE_128MB bool "128 MiB" config SHMOBILE_IOMMU_ADDRSIZE_64MB bool "64 MiB" config SHMOBILE_IOMMU_ADDRSIZE_32MB bool "32 MiB" endchoice config SHMOBILE_IOMMU_L1SIZE int default 8192 if SHMOBILE_IOMMU_ADDRSIZE_2048MB default 4096 if SHMOBILE_IOMMU_ADDRSIZE_1024MB default 2048 if SHMOBILE_IOMMU_ADDRSIZE_512MB default 1024 if SHMOBILE_IOMMU_ADDRSIZE_256MB default 512 if SHMOBILE_IOMMU_ADDRSIZE_128MB default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" depends on ARM_LPAE Loading drivers/iommu/Makefile +0 −2 Original line number Diff line number Diff line Loading @@ -22,7 +22,5 @@ obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o drivers/iommu/amd_iommu.c +247 −149 Original line number Diff line number Diff line Loading @@ -35,6 +35,7 @@ #include <linux/msi.h> #include <linux/dma-contiguous.h> #include <linux/irqdomain.h> #include <linux/percpu.h> #include <asm/irq_remapping.h> #include <asm/io_apic.h> #include <asm/apic.h> Loading Loading @@ -114,6 +115,45 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); /* * For dynamic growth the aperture size is split into ranges of 128MB of * DMA address space each. This struct represents one such range. */ struct aperture_range { spinlock_t bitmap_lock; /* address allocation bitmap */ unsigned long *bitmap; unsigned long offset; unsigned long next_bit; /* * Array of PTE pages for the aperture. In this array we save all the * leaf pages of the domain page table used for the aperture. This way * we don't need to walk the page table to find a specific PTE. We can * just calculate its address in constant time. */ u64 *pte_pages[64]; }; /* * Data container for a dma_ops specific protection domain */ struct dma_ops_domain { /* generic protection domain information */ struct protection_domain domain; /* size of the aperture for the mappings */ unsigned long aperture_size; /* aperture index we start searching for free addresses */ u32 __percpu *next_index; /* address space relevant data */ struct aperture_range *aperture[APERTURE_MAX_RANGES]; }; /**************************************************************************** * * Helper functions Loading Loading @@ -1167,11 +1207,21 @@ static u64 *alloc_pte(struct protection_domain *domain, end_lvl = PAGE_SIZE_LEVEL(page_size); while (level > end_lvl) { if (!IOMMU_PTE_PRESENT(*pte)) { u64 __pte, __npte; __pte = *pte; if (!IOMMU_PTE_PRESENT(__pte)) { page = (u64 *)get_zeroed_page(gfp); if (!page) return NULL; *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); __npte = PM_LEVEL_PDE(level, virt_to_phys(page)); if (cmpxchg64(pte, __pte, __npte)) { free_page((unsigned long)page); continue; } } /* No level skipping support yet */ Loading Loading @@ -1376,8 +1426,10 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, bool populate, gfp_t gfp) { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; struct amd_iommu *iommu; unsigned long i, old_size, pte_pgsize; struct aperture_range *range; struct amd_iommu *iommu; unsigned long flags; #ifdef CONFIG_IOMMU_STRESS populate = false; Loading @@ -1386,15 +1438,17 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, if (index >= APERTURE_MAX_RANGES) return -ENOMEM; dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); if (!dma_dom->aperture[index]) range = kzalloc(sizeof(struct aperture_range), gfp); if (!range) return -ENOMEM; dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); if (!dma_dom->aperture[index]->bitmap) range->bitmap = (void *)get_zeroed_page(gfp); if (!range->bitmap) goto out_free; dma_dom->aperture[index]->offset = dma_dom->aperture_size; range->offset = dma_dom->aperture_size; spin_lock_init(&range->bitmap_lock); if (populate) { unsigned long address = dma_dom->aperture_size; Loading @@ -1407,13 +1461,19 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, if (!pte) goto out_free; dma_dom->aperture[index]->pte_pages[i] = pte_page; range->pte_pages[i] = pte_page; address += APERTURE_RANGE_SIZE / 64; } } spin_lock_irqsave(&dma_dom->domain.lock, flags); /* First take the bitmap_lock and then publish the range */ spin_lock(&range->bitmap_lock); old_size = dma_dom->aperture_size; dma_dom->aperture[index] = range; dma_dom->aperture_size += APERTURE_RANGE_SIZE; /* Reserve address range used for MSI messages */ Loading Loading @@ -1461,62 +1521,123 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, update_domain(&dma_dom->domain); spin_unlock(&range->bitmap_lock); spin_unlock_irqrestore(&dma_dom->domain.lock, flags); return 0; out_free: update_domain(&dma_dom->domain); free_page((unsigned long)dma_dom->aperture[index]->bitmap); free_page((unsigned long)range->bitmap); kfree(dma_dom->aperture[index]); dma_dom->aperture[index] = NULL; kfree(range); return -ENOMEM; } static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom, struct aperture_range *range, unsigned long pages, unsigned long dma_mask, unsigned long boundary_size, unsigned long align_mask, bool trylock) { unsigned long offset, limit, flags; dma_addr_t address; bool flush = false; offset = range->offset >> PAGE_SHIFT; limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, dma_mask >> PAGE_SHIFT); if (trylock) { if (!spin_trylock_irqsave(&range->bitmap_lock, flags)) return -1; } else { spin_lock_irqsave(&range->bitmap_lock, flags); } address = iommu_area_alloc(range->bitmap, limit, range->next_bit, pages, offset, boundary_size, align_mask); if (address == -1) { /* Nothing found, retry one time */ address = iommu_area_alloc(range->bitmap, limit, 0, pages, offset, boundary_size, align_mask); flush = true; } if (address != -1) range->next_bit = address + pages; spin_unlock_irqrestore(&range->bitmap_lock, flags); if (flush) { domain_flush_tlb(&dom->domain); domain_flush_complete(&dom->domain); } return address; } static unsigned long dma_ops_area_alloc(struct device *dev, struct dma_ops_domain *dom, unsigned int pages, unsigned long align_mask, u64 dma_mask, unsigned long start) u64 dma_mask) { unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; int i = start >> APERTURE_RANGE_SHIFT; unsigned long boundary_size, mask; unsigned long address = -1; unsigned long limit; bool first = true; u32 start, i; next_bit >>= PAGE_SHIFT; preempt_disable(); mask = dma_get_seg_boundary(dev); again: start = this_cpu_read(*dom->next_index); /* Sanity check - is it really necessary? */ if (unlikely(start > APERTURE_MAX_RANGES)) { start = 0; this_cpu_write(*dom->next_index, 0); } boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT : 1UL << (BITS_PER_LONG - PAGE_SHIFT); for (;i < max_index; ++i) { unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; for (i = 0; i < APERTURE_MAX_RANGES; ++i) { struct aperture_range *range; int index; if (dom->aperture[i]->offset >= dma_mask) break; index = (start + i) % APERTURE_MAX_RANGES; limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, dma_mask >> PAGE_SHIFT); range = dom->aperture[index]; if (!range || range->offset >= dma_mask) continue; address = iommu_area_alloc(dom->aperture[i]->bitmap, limit, next_bit, pages, 0, boundary_size, align_mask); address = dma_ops_aperture_alloc(dom, range, pages, dma_mask, boundary_size, align_mask, first); if (address != -1) { address = dom->aperture[i]->offset + (address << PAGE_SHIFT); dom->next_address = address + (pages << PAGE_SHIFT); address = range->offset + (address << PAGE_SHIFT); this_cpu_write(*dom->next_index, index); break; } } next_bit = 0; if (address == -1 && first) { first = false; goto again; } preempt_enable(); return address; } Loading @@ -1526,21 +1647,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, unsigned long align_mask, u64 dma_mask) { unsigned long address; #ifdef CONFIG_IOMMU_STRESS dom->next_address = 0; dom->need_flush = true; #endif unsigned long address = -1; address = dma_ops_area_alloc(dev, dom, pages, align_mask, dma_mask, dom->next_address); while (address == -1) { address = dma_ops_area_alloc(dev, dom, pages, align_mask, dma_mask); if (address == -1) { dom->next_address = 0; address = dma_ops_area_alloc(dev, dom, pages, align_mask, dma_mask, 0); dom->need_flush = true; if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC)) break; } if (unlikely(address == -1)) Loading @@ -1562,6 +1676,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, { unsigned i = address >> APERTURE_RANGE_SHIFT; struct aperture_range *range = dom->aperture[i]; unsigned long flags; BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); Loading @@ -1570,12 +1685,18 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, return; #endif if (address >= dom->next_address) dom->need_flush = true; if (amd_iommu_unmap_flush) { domain_flush_tlb(&dom->domain); domain_flush_complete(&dom->domain); } address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&range->bitmap_lock, flags); if (address + pages > range->next_bit) range->next_bit = address + pages; bitmap_clear(range->bitmap, address, pages); spin_unlock_irqrestore(&range->bitmap_lock, flags); } Loading Loading @@ -1755,6 +1876,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; free_percpu(dom->next_index); del_domain_from_list(&dom->domain); free_pagetable(&dom->domain); Loading @@ -1769,6 +1892,23 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) kfree(dom); } static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom, int max_apertures) { int ret, i, apertures; apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; ret = 0; for (i = apertures; i < max_apertures; ++i) { ret = alloc_new_range(dma_dom, false, GFP_KERNEL); if (ret) break; } return ret; } /* * Allocates a new protection domain usable for the dma_ops functions. * It also initializes the page table and the address allocator data Loading @@ -1777,6 +1917,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) static struct dma_ops_domain *dma_ops_domain_alloc(void) { struct dma_ops_domain *dma_dom; int cpu; dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); if (!dma_dom) Loading @@ -1785,6 +1926,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (protection_domain_init(&dma_dom->domain)) goto free_dma_dom; dma_dom->next_index = alloc_percpu(u32); if (!dma_dom->next_index) goto free_dma_dom; dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; Loading @@ -1792,8 +1937,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (!dma_dom->domain.pt_root) goto free_dma_dom; dma_dom->need_flush = false; add_domain_to_list(&dma_dom->domain); if (alloc_new_range(dma_dom, true, GFP_KERNEL)) Loading @@ -1804,8 +1947,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) * a valid dma-address. So we can use 0 as error value */ dma_dom->aperture[0]->bitmap[0] = 1; dma_dom->next_address = 0; for_each_possible_cpu(cpu) *per_cpu_ptr(dma_dom->next_index, cpu) = 0; return dma_dom; Loading Loading @@ -2328,7 +2472,7 @@ static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, else if (direction == DMA_BIDIRECTIONAL) __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; WARN_ON(*pte); WARN_ON_ONCE(*pte); *pte = __pte; Loading Loading @@ -2357,7 +2501,7 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom, pte += PM_LEVEL_INDEX(0, address); WARN_ON(!*pte); WARN_ON_ONCE(!*pte); *pte = 0ULL; } Loading Loading @@ -2393,27 +2537,12 @@ static dma_addr_t __map_single(struct device *dev, if (align) align_mask = (1UL << get_order(size)) - 1; retry: address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, dma_mask); if (unlikely(address == DMA_ERROR_CODE)) { /* * setting next_address here will let the address * allocator only scan the new allocated range in the * first run. This is a small optimization. */ dma_dom->next_address = dma_dom->aperture_size; if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) if (address == DMA_ERROR_CODE) goto out; /* * aperture was successfully enlarged by 128 MB, try * allocation again */ goto retry; } start = address; for (i = 0; i < pages; ++i) { ret = dma_ops_domain_map(dma_dom, start, paddr, dir); Loading @@ -2427,11 +2556,10 @@ static dma_addr_t __map_single(struct device *dev, ADD_STATS_COUNTER(alloced_io_mem, size); if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { domain_flush_tlb(&dma_dom->domain); dma_dom->need_flush = false; } else if (unlikely(amd_iommu_np_cache)) if (unlikely(amd_iommu_np_cache)) { domain_flush_pages(&dma_dom->domain, address, size); domain_flush_complete(&dma_dom->domain); } out: return address; Loading Loading @@ -2478,11 +2606,6 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, SUB_STATS_COUNTER(alloced_io_mem, size); dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { domain_flush_pages(&dma_dom->domain, flush_addr, size); dma_dom->need_flush = false; } } /* Loading @@ -2493,11 +2616,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; phys_addr_t paddr = page_to_phys(page) + offset; struct protection_domain *domain; dma_addr_t addr; u64 dma_mask; phys_addr_t paddr = page_to_phys(page) + offset; INC_STATS_COUNTER(cnt_map_single); Loading @@ -2509,19 +2630,8 @@ static dma_addr_t map_page(struct device *dev, struct page *page, dma_mask = *dev->dma_mask; spin_lock_irqsave(&domain->lock, flags); addr = __map_single(dev, domain->priv, paddr, size, dir, false, return __map_single(dev, domain->priv, paddr, size, dir, false, dma_mask); if (addr == DMA_ERROR_CODE) goto out; domain_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); return addr; } /* Loading @@ -2530,7 +2640,6 @@ static dma_addr_t map_page(struct device *dev, struct page *page, static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; struct protection_domain *domain; INC_STATS_COUNTER(cnt_unmap_single); Loading @@ -2539,13 +2648,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); __unmap_single(domain->priv, dma_addr, size, dir); domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } /* Loading @@ -2556,7 +2659,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; struct protection_domain *domain; int i; struct scatterlist *s; Loading @@ -2572,8 +2674,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, dma_mask = *dev->dma_mask; spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { paddr = sg_phys(s); Loading @@ -2588,12 +2688,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } domain_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); return mapped_elems; unmap: for_each_sg(sglist, s, mapped_elems, i) { if (s->dma_address) Loading @@ -2602,9 +2698,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } mapped_elems = 0; goto out; return 0; } /* Loading @@ -2615,7 +2709,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; struct protection_domain *domain; struct scatterlist *s; int i; Loading @@ -2626,17 +2719,11 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } /* Loading @@ -2648,7 +2735,6 @@ static void *alloc_coherent(struct device *dev, size_t size, { u64 dma_mask = dev->coherent_dma_mask; struct protection_domain *domain; unsigned long flags; struct page *page; INC_STATS_COUNTER(cnt_alloc_coherent); Loading Loading @@ -2680,19 +2766,11 @@ static void *alloc_coherent(struct device *dev, size_t size, if (!dma_mask) dma_mask = *dev->dma_mask; spin_lock_irqsave(&domain->lock, flags); *dma_addr = __map_single(dev, domain->priv, page_to_phys(page), size, DMA_BIDIRECTIONAL, true, dma_mask); if (*dma_addr == DMA_ERROR_CODE) { spin_unlock_irqrestore(&domain->lock, flags); if (*dma_addr == DMA_ERROR_CODE) goto out_free; } domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); return page_address(page); Loading @@ -2712,7 +2790,6 @@ static void free_coherent(struct device *dev, size_t size, struct dma_attrs *attrs) { struct protection_domain *domain; unsigned long flags; struct page *page; INC_STATS_COUNTER(cnt_free_coherent); Loading @@ -2724,14 +2801,8 @@ static void free_coherent(struct device *dev, size_t size, if (IS_ERR(domain)) goto free_mem; spin_lock_irqsave(&domain->lock, flags); __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); free_mem: if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) __free_pages(page, get_order(size)); Loading @@ -2746,6 +2817,34 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) return check_device(dev); } static int set_dma_mask(struct device *dev, u64 mask) { struct protection_domain *domain; int max_apertures = 1; domain = get_domain(dev); if (IS_ERR(domain)) return PTR_ERR(domain); if (mask == DMA_BIT_MASK(64)) max_apertures = 8; else if (mask > DMA_BIT_MASK(32)) max_apertures = 4; /* * To prevent lock contention it doesn't make sense to allocate more * apertures than online cpus */ if (max_apertures > num_online_cpus()) max_apertures = num_online_cpus(); if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures)) dev_err(dev, "Can't allocate %d iommu apertures\n", max_apertures); return 0; } static struct dma_map_ops amd_iommu_dma_ops = { .alloc = alloc_coherent, .free = free_coherent, Loading @@ -2754,6 +2853,7 @@ static struct dma_map_ops amd_iommu_dma_ops = { .map_sg = map_sg, .unmap_sg = unmap_sg, .dma_supported = amd_iommu_dma_supported, .set_dma_mask = set_dma_mask, }; int __init amd_iommu_init_api(void) Loading Loading @@ -3757,11 +3857,9 @@ static struct irq_domain *get_irq_domain(struct irq_alloc_info *info) case X86_IRQ_ALLOC_TYPE_MSI: case X86_IRQ_ALLOC_TYPE_MSIX: devid = get_device_id(&info->msi_dev->dev); if (devid >= 0) { iommu = amd_iommu_rlookup_table[devid]; if (iommu) return iommu->msi_domain; } break; default: break; Loading drivers/iommu/amd_iommu_types.h +0 −40 Original line number Diff line number Diff line Loading @@ -424,46 +424,6 @@ struct protection_domain { void *priv; /* private data */ }; /* * For dynamic growth the aperture size is split into ranges of 128MB of * DMA address space each. This struct represents one such range. */ struct aperture_range { /* address allocation bitmap */ unsigned long *bitmap; /* * Array of PTE pages for the aperture. In this array we save all the * leaf pages of the domain page table used for the aperture. This way * we don't need to walk the page table to find a specific PTE. We can * just calculate its address in constant time. */ u64 *pte_pages[64]; unsigned long offset; }; /* * Data container for a dma_ops specific protection domain */ struct dma_ops_domain { /* generic protection domain information */ struct protection_domain domain; /* size of the aperture for the mappings */ unsigned long aperture_size; /* address we start to search for free addresses */ unsigned long next_address; /* address space relevant data */ struct aperture_range *aperture[APERTURE_MAX_RANGES]; /* This will be set to true when TLB needs to be flushed */ bool need_flush; }; /* * Structure where we save information about one hardware AMD IOMMU in the * system. Loading Loading
Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt +10 −2 Original line number Diff line number Diff line Loading @@ -7,7 +7,15 @@ connected to the IPMMU through a port called micro-TLB. Required Properties: - compatible: Must contain "renesas,ipmmu-vmsa". - compatible: Must contain SoC-specific and generic entries from below. - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU. - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU. - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU. - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU. - "renesas,ipmmu-r8a7794" for the R8A7794 (R-Car E2) IPMMU. - "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU. - reg: Base address and size of the IPMMU registers. - interrupts: Specifiers for the MMU fault interrupts. For instances that support secure mode two interrupts must be specified, for non-secure and Loading @@ -27,7 +35,7 @@ node with the following property: Example: R8A7791 IPMMU-MX and VSP1-D0 bus master ipmmu_mx: mmu@fe951000 { compatible = "renasas,ipmmu-vmsa"; compatible = "renasas,ipmmu-r8a7791", "renasas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>, <0 221 IRQ_TYPE_LEVEL_HIGH>; Loading
drivers/iommu/Kconfig +0 −75 Original line number Diff line number Diff line Loading @@ -263,81 +263,6 @@ config EXYNOS_IOMMU_DEBUG Say N unless you need kernel log message for IOMMU debugging. config SHMOBILE_IPMMU bool config SHMOBILE_IPMMU_TLB bool config SHMOBILE_IOMMU bool "IOMMU for Renesas IPMMU/IPMMUI" default n depends on ARM && MMU depends on ARCH_SHMOBILE || COMPILE_TEST select IOMMU_API select ARM_DMA_USE_IOMMU select SHMOBILE_IPMMU select SHMOBILE_IPMMU_TLB help Support for Renesas IPMMU/IPMMUI. This option enables remapping of DMA memory accesses from all of the IP blocks on the ICB. Warning: Drivers (including userspace drivers of UIO devices) of the IP blocks on the ICB *must* use addresses allocated from the IPMMU (iova) for DMA with this option enabled. If unsure, say N. choice prompt "IPMMU/IPMMUI address space size" default SHMOBILE_IOMMU_ADDRSIZE_2048MB depends on SHMOBILE_IOMMU help This option sets IPMMU/IPMMUI address space size by adjusting the 1st level page table size. The page table size is calculated as follows: page table size = number of page table entries * 4 bytes number of page table entries = address space size / 1 MiB For example, when the address space size is 2048 MiB, the 1st level page table size is 8192 bytes. config SHMOBILE_IOMMU_ADDRSIZE_2048MB bool "2 GiB" config SHMOBILE_IOMMU_ADDRSIZE_1024MB bool "1 GiB" config SHMOBILE_IOMMU_ADDRSIZE_512MB bool "512 MiB" config SHMOBILE_IOMMU_ADDRSIZE_256MB bool "256 MiB" config SHMOBILE_IOMMU_ADDRSIZE_128MB bool "128 MiB" config SHMOBILE_IOMMU_ADDRSIZE_64MB bool "64 MiB" config SHMOBILE_IOMMU_ADDRSIZE_32MB bool "32 MiB" endchoice config SHMOBILE_IOMMU_L1SIZE int default 8192 if SHMOBILE_IOMMU_ADDRSIZE_2048MB default 4096 if SHMOBILE_IOMMU_ADDRSIZE_1024MB default 2048 if SHMOBILE_IOMMU_ADDRSIZE_512MB default 1024 if SHMOBILE_IOMMU_ADDRSIZE_256MB default 512 if SHMOBILE_IOMMU_ADDRSIZE_128MB default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" depends on ARM_LPAE Loading
drivers/iommu/Makefile +0 −2 Original line number Diff line number Diff line Loading @@ -22,7 +22,5 @@ obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
drivers/iommu/amd_iommu.c +247 −149 Original line number Diff line number Diff line Loading @@ -35,6 +35,7 @@ #include <linux/msi.h> #include <linux/dma-contiguous.h> #include <linux/irqdomain.h> #include <linux/percpu.h> #include <asm/irq_remapping.h> #include <asm/io_apic.h> #include <asm/apic.h> Loading Loading @@ -114,6 +115,45 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); /* * For dynamic growth the aperture size is split into ranges of 128MB of * DMA address space each. This struct represents one such range. */ struct aperture_range { spinlock_t bitmap_lock; /* address allocation bitmap */ unsigned long *bitmap; unsigned long offset; unsigned long next_bit; /* * Array of PTE pages for the aperture. In this array we save all the * leaf pages of the domain page table used for the aperture. This way * we don't need to walk the page table to find a specific PTE. We can * just calculate its address in constant time. */ u64 *pte_pages[64]; }; /* * Data container for a dma_ops specific protection domain */ struct dma_ops_domain { /* generic protection domain information */ struct protection_domain domain; /* size of the aperture for the mappings */ unsigned long aperture_size; /* aperture index we start searching for free addresses */ u32 __percpu *next_index; /* address space relevant data */ struct aperture_range *aperture[APERTURE_MAX_RANGES]; }; /**************************************************************************** * * Helper functions Loading Loading @@ -1167,11 +1207,21 @@ static u64 *alloc_pte(struct protection_domain *domain, end_lvl = PAGE_SIZE_LEVEL(page_size); while (level > end_lvl) { if (!IOMMU_PTE_PRESENT(*pte)) { u64 __pte, __npte; __pte = *pte; if (!IOMMU_PTE_PRESENT(__pte)) { page = (u64 *)get_zeroed_page(gfp); if (!page) return NULL; *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); __npte = PM_LEVEL_PDE(level, virt_to_phys(page)); if (cmpxchg64(pte, __pte, __npte)) { free_page((unsigned long)page); continue; } } /* No level skipping support yet */ Loading Loading @@ -1376,8 +1426,10 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, bool populate, gfp_t gfp) { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; struct amd_iommu *iommu; unsigned long i, old_size, pte_pgsize; struct aperture_range *range; struct amd_iommu *iommu; unsigned long flags; #ifdef CONFIG_IOMMU_STRESS populate = false; Loading @@ -1386,15 +1438,17 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, if (index >= APERTURE_MAX_RANGES) return -ENOMEM; dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); if (!dma_dom->aperture[index]) range = kzalloc(sizeof(struct aperture_range), gfp); if (!range) return -ENOMEM; dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); if (!dma_dom->aperture[index]->bitmap) range->bitmap = (void *)get_zeroed_page(gfp); if (!range->bitmap) goto out_free; dma_dom->aperture[index]->offset = dma_dom->aperture_size; range->offset = dma_dom->aperture_size; spin_lock_init(&range->bitmap_lock); if (populate) { unsigned long address = dma_dom->aperture_size; Loading @@ -1407,13 +1461,19 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, if (!pte) goto out_free; dma_dom->aperture[index]->pte_pages[i] = pte_page; range->pte_pages[i] = pte_page; address += APERTURE_RANGE_SIZE / 64; } } spin_lock_irqsave(&dma_dom->domain.lock, flags); /* First take the bitmap_lock and then publish the range */ spin_lock(&range->bitmap_lock); old_size = dma_dom->aperture_size; dma_dom->aperture[index] = range; dma_dom->aperture_size += APERTURE_RANGE_SIZE; /* Reserve address range used for MSI messages */ Loading Loading @@ -1461,62 +1521,123 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, update_domain(&dma_dom->domain); spin_unlock(&range->bitmap_lock); spin_unlock_irqrestore(&dma_dom->domain.lock, flags); return 0; out_free: update_domain(&dma_dom->domain); free_page((unsigned long)dma_dom->aperture[index]->bitmap); free_page((unsigned long)range->bitmap); kfree(dma_dom->aperture[index]); dma_dom->aperture[index] = NULL; kfree(range); return -ENOMEM; } static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom, struct aperture_range *range, unsigned long pages, unsigned long dma_mask, unsigned long boundary_size, unsigned long align_mask, bool trylock) { unsigned long offset, limit, flags; dma_addr_t address; bool flush = false; offset = range->offset >> PAGE_SHIFT; limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, dma_mask >> PAGE_SHIFT); if (trylock) { if (!spin_trylock_irqsave(&range->bitmap_lock, flags)) return -1; } else { spin_lock_irqsave(&range->bitmap_lock, flags); } address = iommu_area_alloc(range->bitmap, limit, range->next_bit, pages, offset, boundary_size, align_mask); if (address == -1) { /* Nothing found, retry one time */ address = iommu_area_alloc(range->bitmap, limit, 0, pages, offset, boundary_size, align_mask); flush = true; } if (address != -1) range->next_bit = address + pages; spin_unlock_irqrestore(&range->bitmap_lock, flags); if (flush) { domain_flush_tlb(&dom->domain); domain_flush_complete(&dom->domain); } return address; } static unsigned long dma_ops_area_alloc(struct device *dev, struct dma_ops_domain *dom, unsigned int pages, unsigned long align_mask, u64 dma_mask, unsigned long start) u64 dma_mask) { unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; int i = start >> APERTURE_RANGE_SHIFT; unsigned long boundary_size, mask; unsigned long address = -1; unsigned long limit; bool first = true; u32 start, i; next_bit >>= PAGE_SHIFT; preempt_disable(); mask = dma_get_seg_boundary(dev); again: start = this_cpu_read(*dom->next_index); /* Sanity check - is it really necessary? */ if (unlikely(start > APERTURE_MAX_RANGES)) { start = 0; this_cpu_write(*dom->next_index, 0); } boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT : 1UL << (BITS_PER_LONG - PAGE_SHIFT); for (;i < max_index; ++i) { unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; for (i = 0; i < APERTURE_MAX_RANGES; ++i) { struct aperture_range *range; int index; if (dom->aperture[i]->offset >= dma_mask) break; index = (start + i) % APERTURE_MAX_RANGES; limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, dma_mask >> PAGE_SHIFT); range = dom->aperture[index]; if (!range || range->offset >= dma_mask) continue; address = iommu_area_alloc(dom->aperture[i]->bitmap, limit, next_bit, pages, 0, boundary_size, align_mask); address = dma_ops_aperture_alloc(dom, range, pages, dma_mask, boundary_size, align_mask, first); if (address != -1) { address = dom->aperture[i]->offset + (address << PAGE_SHIFT); dom->next_address = address + (pages << PAGE_SHIFT); address = range->offset + (address << PAGE_SHIFT); this_cpu_write(*dom->next_index, index); break; } } next_bit = 0; if (address == -1 && first) { first = false; goto again; } preempt_enable(); return address; } Loading @@ -1526,21 +1647,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, unsigned long align_mask, u64 dma_mask) { unsigned long address; #ifdef CONFIG_IOMMU_STRESS dom->next_address = 0; dom->need_flush = true; #endif unsigned long address = -1; address = dma_ops_area_alloc(dev, dom, pages, align_mask, dma_mask, dom->next_address); while (address == -1) { address = dma_ops_area_alloc(dev, dom, pages, align_mask, dma_mask); if (address == -1) { dom->next_address = 0; address = dma_ops_area_alloc(dev, dom, pages, align_mask, dma_mask, 0); dom->need_flush = true; if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC)) break; } if (unlikely(address == -1)) Loading @@ -1562,6 +1676,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, { unsigned i = address >> APERTURE_RANGE_SHIFT; struct aperture_range *range = dom->aperture[i]; unsigned long flags; BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); Loading @@ -1570,12 +1685,18 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, return; #endif if (address >= dom->next_address) dom->need_flush = true; if (amd_iommu_unmap_flush) { domain_flush_tlb(&dom->domain); domain_flush_complete(&dom->domain); } address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&range->bitmap_lock, flags); if (address + pages > range->next_bit) range->next_bit = address + pages; bitmap_clear(range->bitmap, address, pages); spin_unlock_irqrestore(&range->bitmap_lock, flags); } Loading Loading @@ -1755,6 +1876,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; free_percpu(dom->next_index); del_domain_from_list(&dom->domain); free_pagetable(&dom->domain); Loading @@ -1769,6 +1892,23 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) kfree(dom); } static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom, int max_apertures) { int ret, i, apertures; apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; ret = 0; for (i = apertures; i < max_apertures; ++i) { ret = alloc_new_range(dma_dom, false, GFP_KERNEL); if (ret) break; } return ret; } /* * Allocates a new protection domain usable for the dma_ops functions. * It also initializes the page table and the address allocator data Loading @@ -1777,6 +1917,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) static struct dma_ops_domain *dma_ops_domain_alloc(void) { struct dma_ops_domain *dma_dom; int cpu; dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); if (!dma_dom) Loading @@ -1785,6 +1926,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (protection_domain_init(&dma_dom->domain)) goto free_dma_dom; dma_dom->next_index = alloc_percpu(u32); if (!dma_dom->next_index) goto free_dma_dom; dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; Loading @@ -1792,8 +1937,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (!dma_dom->domain.pt_root) goto free_dma_dom; dma_dom->need_flush = false; add_domain_to_list(&dma_dom->domain); if (alloc_new_range(dma_dom, true, GFP_KERNEL)) Loading @@ -1804,8 +1947,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) * a valid dma-address. So we can use 0 as error value */ dma_dom->aperture[0]->bitmap[0] = 1; dma_dom->next_address = 0; for_each_possible_cpu(cpu) *per_cpu_ptr(dma_dom->next_index, cpu) = 0; return dma_dom; Loading Loading @@ -2328,7 +2472,7 @@ static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, else if (direction == DMA_BIDIRECTIONAL) __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; WARN_ON(*pte); WARN_ON_ONCE(*pte); *pte = __pte; Loading Loading @@ -2357,7 +2501,7 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom, pte += PM_LEVEL_INDEX(0, address); WARN_ON(!*pte); WARN_ON_ONCE(!*pte); *pte = 0ULL; } Loading Loading @@ -2393,27 +2537,12 @@ static dma_addr_t __map_single(struct device *dev, if (align) align_mask = (1UL << get_order(size)) - 1; retry: address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, dma_mask); if (unlikely(address == DMA_ERROR_CODE)) { /* * setting next_address here will let the address * allocator only scan the new allocated range in the * first run. This is a small optimization. */ dma_dom->next_address = dma_dom->aperture_size; if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) if (address == DMA_ERROR_CODE) goto out; /* * aperture was successfully enlarged by 128 MB, try * allocation again */ goto retry; } start = address; for (i = 0; i < pages; ++i) { ret = dma_ops_domain_map(dma_dom, start, paddr, dir); Loading @@ -2427,11 +2556,10 @@ static dma_addr_t __map_single(struct device *dev, ADD_STATS_COUNTER(alloced_io_mem, size); if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { domain_flush_tlb(&dma_dom->domain); dma_dom->need_flush = false; } else if (unlikely(amd_iommu_np_cache)) if (unlikely(amd_iommu_np_cache)) { domain_flush_pages(&dma_dom->domain, address, size); domain_flush_complete(&dma_dom->domain); } out: return address; Loading Loading @@ -2478,11 +2606,6 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, SUB_STATS_COUNTER(alloced_io_mem, size); dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { domain_flush_pages(&dma_dom->domain, flush_addr, size); dma_dom->need_flush = false; } } /* Loading @@ -2493,11 +2616,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; phys_addr_t paddr = page_to_phys(page) + offset; struct protection_domain *domain; dma_addr_t addr; u64 dma_mask; phys_addr_t paddr = page_to_phys(page) + offset; INC_STATS_COUNTER(cnt_map_single); Loading @@ -2509,19 +2630,8 @@ static dma_addr_t map_page(struct device *dev, struct page *page, dma_mask = *dev->dma_mask; spin_lock_irqsave(&domain->lock, flags); addr = __map_single(dev, domain->priv, paddr, size, dir, false, return __map_single(dev, domain->priv, paddr, size, dir, false, dma_mask); if (addr == DMA_ERROR_CODE) goto out; domain_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); return addr; } /* Loading @@ -2530,7 +2640,6 @@ static dma_addr_t map_page(struct device *dev, struct page *page, static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; struct protection_domain *domain; INC_STATS_COUNTER(cnt_unmap_single); Loading @@ -2539,13 +2648,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); __unmap_single(domain->priv, dma_addr, size, dir); domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } /* Loading @@ -2556,7 +2659,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; struct protection_domain *domain; int i; struct scatterlist *s; Loading @@ -2572,8 +2674,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, dma_mask = *dev->dma_mask; spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { paddr = sg_phys(s); Loading @@ -2588,12 +2688,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } domain_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); return mapped_elems; unmap: for_each_sg(sglist, s, mapped_elems, i) { if (s->dma_address) Loading @@ -2602,9 +2698,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } mapped_elems = 0; goto out; return 0; } /* Loading @@ -2615,7 +2709,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; struct protection_domain *domain; struct scatterlist *s; int i; Loading @@ -2626,17 +2719,11 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } /* Loading @@ -2648,7 +2735,6 @@ static void *alloc_coherent(struct device *dev, size_t size, { u64 dma_mask = dev->coherent_dma_mask; struct protection_domain *domain; unsigned long flags; struct page *page; INC_STATS_COUNTER(cnt_alloc_coherent); Loading Loading @@ -2680,19 +2766,11 @@ static void *alloc_coherent(struct device *dev, size_t size, if (!dma_mask) dma_mask = *dev->dma_mask; spin_lock_irqsave(&domain->lock, flags); *dma_addr = __map_single(dev, domain->priv, page_to_phys(page), size, DMA_BIDIRECTIONAL, true, dma_mask); if (*dma_addr == DMA_ERROR_CODE) { spin_unlock_irqrestore(&domain->lock, flags); if (*dma_addr == DMA_ERROR_CODE) goto out_free; } domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); return page_address(page); Loading @@ -2712,7 +2790,6 @@ static void free_coherent(struct device *dev, size_t size, struct dma_attrs *attrs) { struct protection_domain *domain; unsigned long flags; struct page *page; INC_STATS_COUNTER(cnt_free_coherent); Loading @@ -2724,14 +2801,8 @@ static void free_coherent(struct device *dev, size_t size, if (IS_ERR(domain)) goto free_mem; spin_lock_irqsave(&domain->lock, flags); __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); free_mem: if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) __free_pages(page, get_order(size)); Loading @@ -2746,6 +2817,34 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) return check_device(dev); } static int set_dma_mask(struct device *dev, u64 mask) { struct protection_domain *domain; int max_apertures = 1; domain = get_domain(dev); if (IS_ERR(domain)) return PTR_ERR(domain); if (mask == DMA_BIT_MASK(64)) max_apertures = 8; else if (mask > DMA_BIT_MASK(32)) max_apertures = 4; /* * To prevent lock contention it doesn't make sense to allocate more * apertures than online cpus */ if (max_apertures > num_online_cpus()) max_apertures = num_online_cpus(); if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures)) dev_err(dev, "Can't allocate %d iommu apertures\n", max_apertures); return 0; } static struct dma_map_ops amd_iommu_dma_ops = { .alloc = alloc_coherent, .free = free_coherent, Loading @@ -2754,6 +2853,7 @@ static struct dma_map_ops amd_iommu_dma_ops = { .map_sg = map_sg, .unmap_sg = unmap_sg, .dma_supported = amd_iommu_dma_supported, .set_dma_mask = set_dma_mask, }; int __init amd_iommu_init_api(void) Loading Loading @@ -3757,11 +3857,9 @@ static struct irq_domain *get_irq_domain(struct irq_alloc_info *info) case X86_IRQ_ALLOC_TYPE_MSI: case X86_IRQ_ALLOC_TYPE_MSIX: devid = get_device_id(&info->msi_dev->dev); if (devid >= 0) { iommu = amd_iommu_rlookup_table[devid]; if (iommu) return iommu->msi_domain; } break; default: break; Loading
drivers/iommu/amd_iommu_types.h +0 −40 Original line number Diff line number Diff line Loading @@ -424,46 +424,6 @@ struct protection_domain { void *priv; /* private data */ }; /* * For dynamic growth the aperture size is split into ranges of 128MB of * DMA address space each. This struct represents one such range. */ struct aperture_range { /* address allocation bitmap */ unsigned long *bitmap; /* * Array of PTE pages for the aperture. In this array we save all the * leaf pages of the domain page table used for the aperture. This way * we don't need to walk the page table to find a specific PTE. We can * just calculate its address in constant time. */ u64 *pte_pages[64]; unsigned long offset; }; /* * Data container for a dma_ops specific protection domain */ struct dma_ops_domain { /* generic protection domain information */ struct protection_domain domain; /* size of the aperture for the mappings */ unsigned long aperture_size; /* address we start to search for free addresses */ unsigned long next_address; /* address space relevant data */ struct aperture_range *aperture[APERTURE_MAX_RANGES]; /* This will be set to true when TLB needs to be flushed */ bool need_flush; }; /* * Structure where we save information about one hardware AMD IOMMU in the * system. Loading