Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 32704253 authored by Joerg Roedel's avatar Joerg Roedel
Browse files

Merge branches 's390', 'arm/renesas', 'arm/msm', 'arm/shmobile', 'arm/smmu',...

Merge branches 's390', 'arm/renesas', 'arm/msm', 'arm/shmobile', 'arm/smmu', 'x86/amd' and 'x86/vt-d' into next
Loading
+10 −2
Original line number Diff line number Diff line
@@ -7,7 +7,15 @@ connected to the IPMMU through a port called micro-TLB.

Required Properties:

  - compatible: Must contain "renesas,ipmmu-vmsa".
  - compatible: Must contain SoC-specific and generic entries from below.

    - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU.
    - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
    - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
    - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU.
    - "renesas,ipmmu-r8a7794" for the R8A7794 (R-Car E2) IPMMU.
    - "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU.

  - reg: Base address and size of the IPMMU registers.
  - interrupts: Specifiers for the MMU fault interrupts. For instances that
    support secure mode two interrupts must be specified, for non-secure and
@@ -27,7 +35,7 @@ node with the following property:
Example: R8A7791 IPMMU-MX and VSP1-D0 bus master

	ipmmu_mx: mmu@fe951000 {
		compatible = "renasas,ipmmu-vmsa";
		compatible = "renasas,ipmmu-r8a7791", "renasas,ipmmu-vmsa";
		reg = <0 0xfe951000 0 0x1000>;
		interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>,
			     <0 221 IRQ_TYPE_LEVEL_HIGH>;
+0 −75
Original line number Diff line number Diff line
@@ -263,81 +263,6 @@ config EXYNOS_IOMMU_DEBUG

	  Say N unless you need kernel log message for IOMMU debugging.

config SHMOBILE_IPMMU
	bool

config SHMOBILE_IPMMU_TLB
	bool

config SHMOBILE_IOMMU
	bool "IOMMU for Renesas IPMMU/IPMMUI"
	default n
	depends on ARM && MMU
	depends on ARCH_SHMOBILE || COMPILE_TEST
	select IOMMU_API
	select ARM_DMA_USE_IOMMU
	select SHMOBILE_IPMMU
	select SHMOBILE_IPMMU_TLB
	help
	  Support for Renesas IPMMU/IPMMUI. This option enables
	  remapping of DMA memory accesses from all of the IP blocks
	  on the ICB.

	  Warning: Drivers (including userspace drivers of UIO
	  devices) of the IP blocks on the ICB *must* use addresses
	  allocated from the IPMMU (iova) for DMA with this option
	  enabled.

	  If unsure, say N.

choice
	prompt "IPMMU/IPMMUI address space size"
	default SHMOBILE_IOMMU_ADDRSIZE_2048MB
	depends on SHMOBILE_IOMMU
	help
	  This option sets IPMMU/IPMMUI address space size by
	  adjusting the 1st level page table size. The page table size
	  is calculated as follows:

	      page table size = number of page table entries * 4 bytes
	      number of page table entries = address space size / 1 MiB

	  For example, when the address space size is 2048 MiB, the
	  1st level page table size is 8192 bytes.

	config SHMOBILE_IOMMU_ADDRSIZE_2048MB
		bool "2 GiB"

	config SHMOBILE_IOMMU_ADDRSIZE_1024MB
		bool "1 GiB"

	config SHMOBILE_IOMMU_ADDRSIZE_512MB
		bool "512 MiB"

	config SHMOBILE_IOMMU_ADDRSIZE_256MB
		bool "256 MiB"

	config SHMOBILE_IOMMU_ADDRSIZE_128MB
		bool "128 MiB"

	config SHMOBILE_IOMMU_ADDRSIZE_64MB
		bool "64 MiB"

	config SHMOBILE_IOMMU_ADDRSIZE_32MB
		bool "32 MiB"

endchoice

config SHMOBILE_IOMMU_L1SIZE
	int
	default 8192 if SHMOBILE_IOMMU_ADDRSIZE_2048MB
	default 4096 if SHMOBILE_IOMMU_ADDRSIZE_1024MB
	default 2048 if SHMOBILE_IOMMU_ADDRSIZE_512MB
	default 1024 if SHMOBILE_IOMMU_ADDRSIZE_256MB
	default 512 if SHMOBILE_IOMMU_ADDRSIZE_128MB
	default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB
	default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB

config IPMMU_VMSA
	bool "Renesas VMSA-compatible IPMMU"
	depends on ARM_LPAE
+0 −2
Original line number Diff line number Diff line
@@ -22,7 +22,5 @@ obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
+247 −149
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@
#include <linux/msi.h>
#include <linux/dma-contiguous.h>
#include <linux/irqdomain.h>
#include <linux/percpu.h>
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
@@ -114,6 +115,45 @@ struct kmem_cache *amd_iommu_irq_cache;
static void update_domain(struct protection_domain *domain);
static int protection_domain_init(struct protection_domain *domain);

/*
 * For dynamic growth the aperture size is split into ranges of 128MB of
 * DMA address space each. This struct represents one such range.
 */
struct aperture_range {

	spinlock_t bitmap_lock;

	/* address allocation bitmap */
	unsigned long *bitmap;
	unsigned long offset;
	unsigned long next_bit;

	/*
	 * Array of PTE pages for the aperture. In this array we save all the
	 * leaf pages of the domain page table used for the aperture. This way
	 * we don't need to walk the page table to find a specific PTE. We can
	 * just calculate its address in constant time.
	 */
	u64 *pte_pages[64];
};

/*
 * Data container for a dma_ops specific protection domain
 */
struct dma_ops_domain {
	/* generic protection domain information */
	struct protection_domain domain;

	/* size of the aperture for the mappings */
	unsigned long aperture_size;

	/* aperture index we start searching for free addresses */
	u32 __percpu *next_index;

	/* address space relevant data */
	struct aperture_range *aperture[APERTURE_MAX_RANGES];
};

/****************************************************************************
 *
 * Helper functions
@@ -1167,11 +1207,21 @@ static u64 *alloc_pte(struct protection_domain *domain,
	end_lvl = PAGE_SIZE_LEVEL(page_size);

	while (level > end_lvl) {
		if (!IOMMU_PTE_PRESENT(*pte)) {
		u64 __pte, __npte;

		__pte = *pte;

		if (!IOMMU_PTE_PRESENT(__pte)) {
			page = (u64 *)get_zeroed_page(gfp);
			if (!page)
				return NULL;
			*pte = PM_LEVEL_PDE(level, virt_to_phys(page));

			__npte = PM_LEVEL_PDE(level, virt_to_phys(page));

			if (cmpxchg64(pte, __pte, __npte)) {
				free_page((unsigned long)page);
				continue;
			}
		}

		/* No level skipping support yet */
@@ -1376,8 +1426,10 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
			   bool populate, gfp_t gfp)
{
	int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
	struct amd_iommu *iommu;
	unsigned long i, old_size, pte_pgsize;
	struct aperture_range *range;
	struct amd_iommu *iommu;
	unsigned long flags;

#ifdef CONFIG_IOMMU_STRESS
	populate = false;
@@ -1386,15 +1438,17 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
	if (index >= APERTURE_MAX_RANGES)
		return -ENOMEM;

	dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
	if (!dma_dom->aperture[index])
	range = kzalloc(sizeof(struct aperture_range), gfp);
	if (!range)
		return -ENOMEM;

	dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
	if (!dma_dom->aperture[index]->bitmap)
	range->bitmap = (void *)get_zeroed_page(gfp);
	if (!range->bitmap)
		goto out_free;

	dma_dom->aperture[index]->offset = dma_dom->aperture_size;
	range->offset = dma_dom->aperture_size;

	spin_lock_init(&range->bitmap_lock);

	if (populate) {
		unsigned long address = dma_dom->aperture_size;
@@ -1407,13 +1461,19 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
			if (!pte)
				goto out_free;

			dma_dom->aperture[index]->pte_pages[i] = pte_page;
			range->pte_pages[i] = pte_page;

			address += APERTURE_RANGE_SIZE / 64;
		}
	}

	spin_lock_irqsave(&dma_dom->domain.lock, flags);

	/* First take the bitmap_lock and then publish the range */
	spin_lock(&range->bitmap_lock);

	old_size                 = dma_dom->aperture_size;
	dma_dom->aperture[index] = range;
	dma_dom->aperture_size  += APERTURE_RANGE_SIZE;

	/* Reserve address range used for MSI messages */
@@ -1461,62 +1521,123 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,

	update_domain(&dma_dom->domain);

	spin_unlock(&range->bitmap_lock);

	spin_unlock_irqrestore(&dma_dom->domain.lock, flags);

	return 0;

out_free:
	update_domain(&dma_dom->domain);

	free_page((unsigned long)dma_dom->aperture[index]->bitmap);
	free_page((unsigned long)range->bitmap);

	kfree(dma_dom->aperture[index]);
	dma_dom->aperture[index] = NULL;
	kfree(range);

	return -ENOMEM;
}

static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom,
					 struct aperture_range *range,
					 unsigned long pages,
					 unsigned long dma_mask,
					 unsigned long boundary_size,
					 unsigned long align_mask,
					 bool trylock)
{
	unsigned long offset, limit, flags;
	dma_addr_t address;
	bool flush = false;

	offset = range->offset >> PAGE_SHIFT;
	limit  = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
					dma_mask >> PAGE_SHIFT);

	if (trylock) {
		if (!spin_trylock_irqsave(&range->bitmap_lock, flags))
			return -1;
	} else {
		spin_lock_irqsave(&range->bitmap_lock, flags);
	}

	address = iommu_area_alloc(range->bitmap, limit, range->next_bit,
				   pages, offset, boundary_size, align_mask);
	if (address == -1) {
		/* Nothing found, retry one time */
		address = iommu_area_alloc(range->bitmap, limit,
					   0, pages, offset, boundary_size,
					   align_mask);
		flush = true;
	}

	if (address != -1)
		range->next_bit = address + pages;

	spin_unlock_irqrestore(&range->bitmap_lock, flags);

	if (flush) {
		domain_flush_tlb(&dom->domain);
		domain_flush_complete(&dom->domain);
	}

	return address;
}

static unsigned long dma_ops_area_alloc(struct device *dev,
					struct dma_ops_domain *dom,
					unsigned int pages,
					unsigned long align_mask,
					u64 dma_mask,
					unsigned long start)
					u64 dma_mask)
{
	unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
	int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
	int i = start >> APERTURE_RANGE_SHIFT;
	unsigned long boundary_size, mask;
	unsigned long address = -1;
	unsigned long limit;
	bool first = true;
	u32 start, i;

	next_bit >>= PAGE_SHIFT;
	preempt_disable();

	mask = dma_get_seg_boundary(dev);

again:
	start = this_cpu_read(*dom->next_index);

	/* Sanity check - is it really necessary? */
	if (unlikely(start > APERTURE_MAX_RANGES)) {
		start = 0;
		this_cpu_write(*dom->next_index, 0);
	}

	boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
				   1UL << (BITS_PER_LONG - PAGE_SHIFT);

	for (;i < max_index; ++i) {
		unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
	for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
		struct aperture_range *range;
		int index;

		if (dom->aperture[i]->offset >= dma_mask)
			break;
		index = (start + i) % APERTURE_MAX_RANGES;

		limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
					       dma_mask >> PAGE_SHIFT);
		range = dom->aperture[index];

		if (!range || range->offset >= dma_mask)
			continue;

		address = iommu_area_alloc(dom->aperture[i]->bitmap,
					   limit, next_bit, pages, 0,
					    boundary_size, align_mask);
		address = dma_ops_aperture_alloc(dom, range, pages,
						 dma_mask, boundary_size,
						 align_mask, first);
		if (address != -1) {
			address = dom->aperture[i]->offset +
				  (address << PAGE_SHIFT);
			dom->next_address = address + (pages << PAGE_SHIFT);
			address = range->offset + (address << PAGE_SHIFT);
			this_cpu_write(*dom->next_index, index);
			break;
		}
	}

		next_bit = 0;
	if (address == -1 && first) {
		first = false;
		goto again;
	}

	preempt_enable();

	return address;
}

@@ -1526,21 +1647,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
					     unsigned long align_mask,
					     u64 dma_mask)
{
	unsigned long address;

#ifdef CONFIG_IOMMU_STRESS
	dom->next_address = 0;
	dom->need_flush = true;
#endif
	unsigned long address = -1;

	address = dma_ops_area_alloc(dev, dom, pages, align_mask,
				     dma_mask, dom->next_address);
	while (address == -1) {
		address = dma_ops_area_alloc(dev, dom, pages,
					     align_mask, dma_mask);

	if (address == -1) {
		dom->next_address = 0;
		address = dma_ops_area_alloc(dev, dom, pages, align_mask,
					     dma_mask, 0);
		dom->need_flush = true;
		if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC))
			break;
	}

	if (unlikely(address == -1))
@@ -1562,6 +1676,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
{
	unsigned i = address >> APERTURE_RANGE_SHIFT;
	struct aperture_range *range = dom->aperture[i];
	unsigned long flags;

	BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);

@@ -1570,12 +1685,18 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
		return;
#endif

	if (address >= dom->next_address)
		dom->need_flush = true;
	if (amd_iommu_unmap_flush) {
		domain_flush_tlb(&dom->domain);
		domain_flush_complete(&dom->domain);
	}

	address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;

	spin_lock_irqsave(&range->bitmap_lock, flags);
	if (address + pages > range->next_bit)
		range->next_bit = address + pages;
	bitmap_clear(range->bitmap, address, pages);
	spin_unlock_irqrestore(&range->bitmap_lock, flags);

}

@@ -1755,6 +1876,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
	if (!dom)
		return;

	free_percpu(dom->next_index);

	del_domain_from_list(&dom->domain);

	free_pagetable(&dom->domain);
@@ -1769,6 +1892,23 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
	kfree(dom);
}

static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom,
					  int max_apertures)
{
	int ret, i, apertures;

	apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
	ret       = 0;

	for (i = apertures; i < max_apertures; ++i) {
		ret = alloc_new_range(dma_dom, false, GFP_KERNEL);
		if (ret)
			break;
	}

	return ret;
}

/*
 * Allocates a new protection domain usable for the dma_ops functions.
 * It also initializes the page table and the address allocator data
@@ -1777,6 +1917,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
static struct dma_ops_domain *dma_ops_domain_alloc(void)
{
	struct dma_ops_domain *dma_dom;
	int cpu;

	dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
	if (!dma_dom)
@@ -1785,6 +1926,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
	if (protection_domain_init(&dma_dom->domain))
		goto free_dma_dom;

	dma_dom->next_index = alloc_percpu(u32);
	if (!dma_dom->next_index)
		goto free_dma_dom;

	dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
	dma_dom->domain.flags = PD_DMA_OPS_MASK;
@@ -1792,8 +1937,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
	if (!dma_dom->domain.pt_root)
		goto free_dma_dom;

	dma_dom->need_flush = false;

	add_domain_to_list(&dma_dom->domain);

	if (alloc_new_range(dma_dom, true, GFP_KERNEL))
@@ -1804,8 +1947,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
	 * a valid dma-address. So we can use 0 as error value
	 */
	dma_dom->aperture[0]->bitmap[0] = 1;
	dma_dom->next_address = 0;

	for_each_possible_cpu(cpu)
		*per_cpu_ptr(dma_dom->next_index, cpu) = 0;

	return dma_dom;

@@ -2328,7 +2472,7 @@ static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
	else if (direction == DMA_BIDIRECTIONAL)
		__pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;

	WARN_ON(*pte);
	WARN_ON_ONCE(*pte);

	*pte = __pte;

@@ -2357,7 +2501,7 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom,

	pte += PM_LEVEL_INDEX(0, address);

	WARN_ON(!*pte);
	WARN_ON_ONCE(!*pte);

	*pte = 0ULL;
}
@@ -2393,27 +2537,12 @@ static dma_addr_t __map_single(struct device *dev,
	if (align)
		align_mask = (1UL << get_order(size)) - 1;

retry:
	address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
					  dma_mask);
	if (unlikely(address == DMA_ERROR_CODE)) {
		/*
		 * setting next_address here will let the address
		 * allocator only scan the new allocated range in the
		 * first run. This is a small optimization.
		 */
		dma_dom->next_address = dma_dom->aperture_size;

		if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
	if (address == DMA_ERROR_CODE)
		goto out;

		/*
		 * aperture was successfully enlarged by 128 MB, try
		 * allocation again
		 */
		goto retry;
	}

	start = address;
	for (i = 0; i < pages; ++i) {
		ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
@@ -2427,11 +2556,10 @@ static dma_addr_t __map_single(struct device *dev,

	ADD_STATS_COUNTER(alloced_io_mem, size);

	if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
		domain_flush_tlb(&dma_dom->domain);
		dma_dom->need_flush = false;
	} else if (unlikely(amd_iommu_np_cache))
	if (unlikely(amd_iommu_np_cache)) {
		domain_flush_pages(&dma_dom->domain, address, size);
		domain_flush_complete(&dma_dom->domain);
	}

out:
	return address;
@@ -2478,11 +2606,6 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
	SUB_STATS_COUNTER(alloced_io_mem, size);

	dma_ops_free_addresses(dma_dom, dma_addr, pages);

	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
		domain_flush_pages(&dma_dom->domain, flush_addr, size);
		dma_dom->need_flush = false;
	}
}

/*
@@ -2493,11 +2616,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
			   enum dma_data_direction dir,
			   struct dma_attrs *attrs)
{
	unsigned long flags;
	phys_addr_t paddr = page_to_phys(page) + offset;
	struct protection_domain *domain;
	dma_addr_t addr;
	u64 dma_mask;
	phys_addr_t paddr = page_to_phys(page) + offset;

	INC_STATS_COUNTER(cnt_map_single);

@@ -2509,19 +2630,8 @@ static dma_addr_t map_page(struct device *dev, struct page *page,

	dma_mask = *dev->dma_mask;

	spin_lock_irqsave(&domain->lock, flags);

	addr = __map_single(dev, domain->priv, paddr, size, dir, false,
	return __map_single(dev, domain->priv, paddr, size, dir, false,
			    dma_mask);
	if (addr == DMA_ERROR_CODE)
		goto out;

	domain_flush_complete(domain);

out:
	spin_unlock_irqrestore(&domain->lock, flags);

	return addr;
}

/*
@@ -2530,7 +2640,6 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
		       enum dma_data_direction dir, struct dma_attrs *attrs)
{
	unsigned long flags;
	struct protection_domain *domain;

	INC_STATS_COUNTER(cnt_unmap_single);
@@ -2539,13 +2648,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
	if (IS_ERR(domain))
		return;

	spin_lock_irqsave(&domain->lock, flags);

	__unmap_single(domain->priv, dma_addr, size, dir);

	domain_flush_complete(domain);

	spin_unlock_irqrestore(&domain->lock, flags);
}

/*
@@ -2556,7 +2659,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
		  int nelems, enum dma_data_direction dir,
		  struct dma_attrs *attrs)
{
	unsigned long flags;
	struct protection_domain *domain;
	int i;
	struct scatterlist *s;
@@ -2572,8 +2674,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,

	dma_mask = *dev->dma_mask;

	spin_lock_irqsave(&domain->lock, flags);

	for_each_sg(sglist, s, nelems, i) {
		paddr = sg_phys(s);

@@ -2588,12 +2688,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
			goto unmap;
	}

	domain_flush_complete(domain);

out:
	spin_unlock_irqrestore(&domain->lock, flags);

	return mapped_elems;

unmap:
	for_each_sg(sglist, s, mapped_elems, i) {
		if (s->dma_address)
@@ -2602,9 +2698,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
		s->dma_address = s->dma_length = 0;
	}

	mapped_elems = 0;

	goto out;
	return 0;
}

/*
@@ -2615,7 +2709,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
		     int nelems, enum dma_data_direction dir,
		     struct dma_attrs *attrs)
{
	unsigned long flags;
	struct protection_domain *domain;
	struct scatterlist *s;
	int i;
@@ -2626,17 +2719,11 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
	if (IS_ERR(domain))
		return;

	spin_lock_irqsave(&domain->lock, flags);

	for_each_sg(sglist, s, nelems, i) {
		__unmap_single(domain->priv, s->dma_address,
			       s->dma_length, dir);
		s->dma_address = s->dma_length = 0;
	}

	domain_flush_complete(domain);

	spin_unlock_irqrestore(&domain->lock, flags);
}

/*
@@ -2648,7 +2735,6 @@ static void *alloc_coherent(struct device *dev, size_t size,
{
	u64 dma_mask = dev->coherent_dma_mask;
	struct protection_domain *domain;
	unsigned long flags;
	struct page *page;

	INC_STATS_COUNTER(cnt_alloc_coherent);
@@ -2680,19 +2766,11 @@ static void *alloc_coherent(struct device *dev, size_t size,
	if (!dma_mask)
		dma_mask = *dev->dma_mask;

	spin_lock_irqsave(&domain->lock, flags);

	*dma_addr = __map_single(dev, domain->priv, page_to_phys(page),
				 size, DMA_BIDIRECTIONAL, true, dma_mask);

	if (*dma_addr == DMA_ERROR_CODE) {
		spin_unlock_irqrestore(&domain->lock, flags);
	if (*dma_addr == DMA_ERROR_CODE)
		goto out_free;
	}

	domain_flush_complete(domain);

	spin_unlock_irqrestore(&domain->lock, flags);

	return page_address(page);

@@ -2712,7 +2790,6 @@ static void free_coherent(struct device *dev, size_t size,
			  struct dma_attrs *attrs)
{
	struct protection_domain *domain;
	unsigned long flags;
	struct page *page;

	INC_STATS_COUNTER(cnt_free_coherent);
@@ -2724,14 +2801,8 @@ static void free_coherent(struct device *dev, size_t size,
	if (IS_ERR(domain))
		goto free_mem;

	spin_lock_irqsave(&domain->lock, flags);

	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);

	domain_flush_complete(domain);

	spin_unlock_irqrestore(&domain->lock, flags);

free_mem:
	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
		__free_pages(page, get_order(size));
@@ -2746,6 +2817,34 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
	return check_device(dev);
}

static int set_dma_mask(struct device *dev, u64 mask)
{
	struct protection_domain *domain;
	int max_apertures = 1;

	domain = get_domain(dev);
	if (IS_ERR(domain))
		return PTR_ERR(domain);

	if (mask == DMA_BIT_MASK(64))
		max_apertures = 8;
	else if (mask > DMA_BIT_MASK(32))
		max_apertures = 4;

	/*
	 * To prevent lock contention it doesn't make sense to allocate more
	 * apertures than online cpus
	 */
	if (max_apertures > num_online_cpus())
		max_apertures = num_online_cpus();

	if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures))
		dev_err(dev, "Can't allocate %d iommu apertures\n",
			max_apertures);

	return 0;
}

static struct dma_map_ops amd_iommu_dma_ops = {
	.alloc		= alloc_coherent,
	.free		= free_coherent,
@@ -2754,6 +2853,7 @@ static struct dma_map_ops amd_iommu_dma_ops = {
	.map_sg		= map_sg,
	.unmap_sg	= unmap_sg,
	.dma_supported	= amd_iommu_dma_supported,
	.set_dma_mask	= set_dma_mask,
};

int __init amd_iommu_init_api(void)
@@ -3757,11 +3857,9 @@ static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
	case X86_IRQ_ALLOC_TYPE_MSI:
	case X86_IRQ_ALLOC_TYPE_MSIX:
		devid = get_device_id(&info->msi_dev->dev);
		if (devid >= 0) {
		iommu = amd_iommu_rlookup_table[devid];
		if (iommu)
			return iommu->msi_domain;
		}
		break;
	default:
		break;
+0 −40
Original line number Diff line number Diff line
@@ -424,46 +424,6 @@ struct protection_domain {
	void *priv;             /* private data */
};

/*
 * For dynamic growth the aperture size is split into ranges of 128MB of
 * DMA address space each. This struct represents one such range.
 */
struct aperture_range {

	/* address allocation bitmap */
	unsigned long *bitmap;

	/*
	 * Array of PTE pages for the aperture. In this array we save all the
	 * leaf pages of the domain page table used for the aperture. This way
	 * we don't need to walk the page table to find a specific PTE. We can
	 * just calculate its address in constant time.
	 */
	u64 *pte_pages[64];

	unsigned long offset;
};

/*
 * Data container for a dma_ops specific protection domain
 */
struct dma_ops_domain {
	/* generic protection domain information */
	struct protection_domain domain;

	/* size of the aperture for the mappings */
	unsigned long aperture_size;

	/* address we start to search for free addresses */
	unsigned long next_address;

	/* address space relevant data */
	struct aperture_range *aperture[APERTURE_MAX_RANGES];

	/* This will be set to true when TLB needs to be flushed */
	bool need_flush;
};

/*
 * Structure where we save information about one hardware AMD IOMMU in the
 * system.
Loading