Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a83f5d6a authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'generic-iommu-allocator'



Sowmini Varadhan says:

====================
Generic IOMMU pooled allocator

Investigation of network performance on Sparc shows a high
degree of locking contention in the IOMMU allocator, and it
was noticed that the PowerPC code has a better locking model.

This patch series tries to extract the generic parts of the
PowerPC code so that it can be shared across multiple PCI
devices and architectures.

v10: resend patchv9 without RFC tag, and a new mail Message-Id,
(previous non-RFC attempt did not show up on the patchwork queue?)

Full revision history below:
v2 changes:
  - incorporate David Miller editorial comments: sparc specific
    fields moved from iommu-common into sparc's iommu_64.h
  - make the npools value an input parameter, for the case when
    the iommu map size is not very large
  - cookie_to_index mapping, and optimizations for span-boundary
    check, for use case such as LDC.

v3: eliminate iommu_sparc, rearrange the ->demap indirection to
    be invoked under the pool lock.

v4: David Miller review changes:
  - s/IOMMU_ERROR_CODE/DMA_ERROR_CODE
  - page_table_map_base and page_table_shift are unsigned long, not u32.

v5: removed ->cookie_to_index and ->demap indirection from the
    iommu_tbl_ops The caller needs to call these functions as needed,
    before invoking the generic arena allocator functions.
    Added the "skip_span_boundary" argument to iommu_tbl_pool_init() for
    those callers like LDC which do no care about span boundary checks.

v6: removed iommu_tbl_ops, and instead pass the ->flush_all as
    an indirection to iommu_tbl_pool_init(); only invoke ->flush_all
    when there is no large_pool, based on the assumption that large-pool
    usage is infrequently encountered

v7: moved pool_hash initialization to lib/iommu-common.c and cleaned up
    code duplication from sun4v/sun4u/ldc.

v8: Addresses BenH comments with one exception: I've left the
    IOMMU_POOL_HASH as is, so that powerpc can tailor it to their
    convenience.  Discard trylock for simple spin_lock to acquire pool

v9: Addresses latest BenH comments: need_flush checks, add support
    for dma mask and align_order.

v10: resend without RFC tag, and new mail Message-Id.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 497a5df7 671d7732
Loading
Loading
Loading
Loading
+3 −4
Original line number Original line Diff line number Diff line
@@ -16,6 +16,7 @@
#define IOPTE_WRITE   0x0000000000000002UL
#define IOPTE_WRITE   0x0000000000000002UL


#define IOMMU_NUM_CTXS	4096
#define IOMMU_NUM_CTXS	4096
#include <linux/iommu-common.h>


struct iommu_arena {
struct iommu_arena {
	unsigned long	*map;
	unsigned long	*map;
@@ -24,11 +25,10 @@ struct iommu_arena {
};
};


struct iommu {
struct iommu {
	struct iommu_table	tbl;
	spinlock_t		lock;
	spinlock_t		lock;
	struct iommu_arena	arena;
	u32			dma_addr_mask;
	void			(*flush_all)(struct iommu *);
	iopte_t			*page_table;
	iopte_t			*page_table;
	u32			page_table_map_base;
	unsigned long		iommu_control;
	unsigned long		iommu_control;
	unsigned long		iommu_tsbbase;
	unsigned long		iommu_tsbbase;
	unsigned long		iommu_flush;
	unsigned long		iommu_flush;
@@ -40,7 +40,6 @@ struct iommu {
	unsigned long		dummy_page_pa;
	unsigned long		dummy_page_pa;
	unsigned long		ctx_lowest_free;
	unsigned long		ctx_lowest_free;
	DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS);
	DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS);
	u32			dma_addr_mask;
};
};


struct strbuf {
struct strbuf {
+63 −125
Original line number Original line Diff line number Diff line
@@ -13,11 +13,15 @@
#include <linux/errno.h>
#include <linux/errno.h>
#include <linux/iommu-helper.h>
#include <linux/iommu-helper.h>
#include <linux/bitmap.h>
#include <linux/bitmap.h>
#include <linux/hash.h>
#include <linux/iommu-common.h>


#ifdef CONFIG_PCI
#ifdef CONFIG_PCI
#include <linux/pci.h>
#include <linux/pci.h>
#endif
#endif


static	DEFINE_PER_CPU(unsigned int, iommu_pool_hash);

#include <asm/iommu.h>
#include <asm/iommu.h>


#include "iommu_common.h"
#include "iommu_common.h"
@@ -45,8 +49,9 @@
			       "i" (ASI_PHYS_BYPASS_EC_E))
			       "i" (ASI_PHYS_BYPASS_EC_E))


/* Must be invoked under the IOMMU lock. */
/* Must be invoked under the IOMMU lock. */
static void iommu_flushall(struct iommu *iommu)
static void iommu_flushall(struct iommu_table *iommu_table)
{
{
	struct iommu *iommu = container_of(iommu_table, struct iommu, tbl);
	if (iommu->iommu_flushinv) {
	if (iommu->iommu_flushinv) {
		iommu_write(iommu->iommu_flushinv, ~(u64)0);
		iommu_write(iommu->iommu_flushinv, ~(u64)0);
	} else {
	} else {
@@ -87,94 +92,23 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
	iopte_val(*iopte) = val;
	iopte_val(*iopte) = val;
}
}


/* Based almost entirely upon the ppc64 iommu allocator.  If you use the 'handle'
static struct iommu_tbl_ops iommu_sparc_ops = {
 * facility it must all be done in one pass while under the iommu lock.
	.reset	= iommu_flushall
 *
};
 * On sun4u platforms, we only flush the IOMMU once every time we've passed
 * over the entire page table doing allocations.  Therefore we only ever advance
 * the hint and cannot backtrack it.
 */
unsigned long iommu_range_alloc(struct device *dev,
				struct iommu *iommu,
				unsigned long npages,
				unsigned long *handle)
{
	unsigned long n, end, start, limit, boundary_size;
	struct iommu_arena *arena = &iommu->arena;
	int pass = 0;

	/* This allocator was derived from x86_64's bit string search */

	/* Sanity check */
	if (unlikely(npages == 0)) {
		if (printk_ratelimit())
			WARN_ON(1);
		return DMA_ERROR_CODE;
	}

	if (handle && *handle)
		start = *handle;
	else
		start = arena->hint;

	limit = arena->limit;

	/* The case below can happen if we have a small segment appended
	 * to a large, or when the previous alloc was at the very end of
	 * the available space. If so, go back to the beginning and flush.
	 */
	if (start >= limit) {
		start = 0;
		if (iommu->flush_all)
			iommu->flush_all(iommu);
	}

 again:

	if (dev)
		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
				      1 << IO_PAGE_SHIFT);
	else
		boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);

	n = iommu_area_alloc(arena->map, limit, start, npages,
			     iommu->page_table_map_base >> IO_PAGE_SHIFT,
			     boundary_size >> IO_PAGE_SHIFT, 0);
	if (n == -1) {
		if (likely(pass < 1)) {
			/* First failure, rescan from the beginning.  */
			start = 0;
			if (iommu->flush_all)
				iommu->flush_all(iommu);
			pass++;
			goto again;
		} else {
			/* Second failure, give up */
			return DMA_ERROR_CODE;
		}
	}

	end = n + npages;

	arena->hint = end;

	/* Update handle for SG allocations */
	if (handle)
		*handle = end;

	return n;
}


void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
static void setup_iommu_pool_hash(void)
{
{
	struct iommu_arena *arena = &iommu->arena;
	unsigned int i;
	unsigned long entry;
	static bool do_once;

	entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;


	bitmap_clear(arena->map, entry, npages);
	if (do_once)
		return;
	do_once = true;
	for_each_possible_cpu(i)
		per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
}
}



int iommu_table_init(struct iommu *iommu, int tsbsize,
int iommu_table_init(struct iommu *iommu, int tsbsize,
		     u32 dma_offset, u32 dma_addr_mask,
		     u32 dma_offset, u32 dma_addr_mask,
		     int numa_node)
		     int numa_node)
@@ -187,22 +121,22 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
	/* Setup initial software IOMMU state. */
	/* Setup initial software IOMMU state. */
	spin_lock_init(&iommu->lock);
	spin_lock_init(&iommu->lock);
	iommu->ctx_lowest_free = 1;
	iommu->ctx_lowest_free = 1;
	iommu->page_table_map_base = dma_offset;
	iommu->tbl.page_table_map_base = dma_offset;
	iommu->dma_addr_mask = dma_addr_mask;
	iommu->dma_addr_mask = dma_addr_mask;


	/* Allocate and initialize the free area map.  */
	/* Allocate and initialize the free area map.  */
	sz = num_tsb_entries / 8;
	sz = num_tsb_entries / 8;
	sz = (sz + 7UL) & ~7UL;
	sz = (sz + 7UL) & ~7UL;
	iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
	iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
	if (!iommu->arena.map) {
	if (!iommu->tbl.map)
		printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n");
		return -ENOMEM;
		return -ENOMEM;
	}
	memset(iommu->tbl.map, 0, sz);
	memset(iommu->arena.map, 0, sz);
	iommu->arena.limit = num_tsb_entries;

	if (tlb_type != hypervisor)
	if (tlb_type != hypervisor)
		iommu->flush_all = iommu_flushall;
		iommu_sparc_ops.reset = NULL; /* not needed on on sun4v */

	setup_iommu_pool_hash();
	iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
			    &iommu_sparc_ops, false, 1);


	/* Allocate and initialize the dummy page which we
	/* Allocate and initialize the dummy page which we
	 * set inactive IO PTEs to point to.
	 * set inactive IO PTEs to point to.
@@ -235,18 +169,20 @@ out_free_dummy_page:
	iommu->dummy_page = 0UL;
	iommu->dummy_page = 0UL;


out_free_map:
out_free_map:
	kfree(iommu->arena.map);
	kfree(iommu->tbl.map);
	iommu->arena.map = NULL;
	iommu->tbl.map = NULL;


	return -ENOMEM;
	return -ENOMEM;
}
}


static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
static inline iopte_t *alloc_npages(struct device *dev,
				    struct iommu *iommu,
				    unsigned long npages)
				    unsigned long npages)
{
{
	unsigned long entry;
	unsigned long entry;


	entry = iommu_range_alloc(dev, iommu, npages, NULL);
	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
				      __this_cpu_read(iommu_pool_hash));
	if (unlikely(entry == DMA_ERROR_CODE))
	if (unlikely(entry == DMA_ERROR_CODE))
		return NULL;
		return NULL;


@@ -284,7 +220,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
				   dma_addr_t *dma_addrp, gfp_t gfp,
				   dma_addr_t *dma_addrp, gfp_t gfp,
				   struct dma_attrs *attrs)
				   struct dma_attrs *attrs)
{
{
	unsigned long flags, order, first_page;
	unsigned long order, first_page;
	struct iommu *iommu;
	struct iommu *iommu;
	struct page *page;
	struct page *page;
	int npages, nid;
	int npages, nid;
@@ -306,16 +242,14 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,


	iommu = dev->archdata.iommu;
	iommu = dev->archdata.iommu;


	spin_lock_irqsave(&iommu->lock, flags);
	iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
	iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
	spin_unlock_irqrestore(&iommu->lock, flags);


	if (unlikely(iopte == NULL)) {
	if (unlikely(iopte == NULL)) {
		free_pages(first_page, order);
		free_pages(first_page, order);
		return NULL;
		return NULL;
	}
	}


	*dma_addrp = (iommu->page_table_map_base +
	*dma_addrp = (iommu->tbl.page_table_map_base +
		      ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
		      ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
	ret = (void *) first_page;
	ret = (void *) first_page;
	npages = size >> IO_PAGE_SHIFT;
	npages = size >> IO_PAGE_SHIFT;
@@ -336,16 +270,12 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
				 struct dma_attrs *attrs)
				 struct dma_attrs *attrs)
{
{
	struct iommu *iommu;
	struct iommu *iommu;
	unsigned long flags, order, npages;
	unsigned long order, npages;


	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
	iommu = dev->archdata.iommu;
	iommu = dev->archdata.iommu;


	spin_lock_irqsave(&iommu->lock, flags);
	iommu_tbl_range_free(&iommu->tbl, dvma, npages, false, NULL);

	iommu_range_free(iommu, dvma, npages);

	spin_unlock_irqrestore(&iommu->lock, flags);


	order = get_order(size);
	order = get_order(size);
	if (order < 10)
	if (order < 10)
@@ -375,8 +305,8 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
	npages >>= IO_PAGE_SHIFT;
	npages >>= IO_PAGE_SHIFT;


	spin_lock_irqsave(&iommu->lock, flags);
	base = alloc_npages(dev, iommu, npages);
	base = alloc_npages(dev, iommu, npages);
	spin_lock_irqsave(&iommu->lock, flags);
	ctx = 0;
	ctx = 0;
	if (iommu->iommu_ctxflush)
	if (iommu->iommu_ctxflush)
		ctx = iommu_alloc_ctx(iommu);
		ctx = iommu_alloc_ctx(iommu);
@@ -385,7 +315,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
	if (unlikely(!base))
	if (unlikely(!base))
		goto bad;
		goto bad;


	bus_addr = (iommu->page_table_map_base +
	bus_addr = (iommu->tbl.page_table_map_base +
		    ((base - iommu->page_table) << IO_PAGE_SHIFT));
		    ((base - iommu->page_table) << IO_PAGE_SHIFT));
	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
	base_paddr = __pa(oaddr & IO_PAGE_MASK);
	base_paddr = __pa(oaddr & IO_PAGE_MASK);
@@ -496,7 +426,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
	npages >>= IO_PAGE_SHIFT;
	npages >>= IO_PAGE_SHIFT;
	base = iommu->page_table +
	base = iommu->page_table +
		((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
		((bus_addr - iommu->tbl.page_table_map_base) >> IO_PAGE_SHIFT);
	bus_addr &= IO_PAGE_MASK;
	bus_addr &= IO_PAGE_MASK;


	spin_lock_irqsave(&iommu->lock, flags);
	spin_lock_irqsave(&iommu->lock, flags);
@@ -515,11 +445,11 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
	for (i = 0; i < npages; i++)
	for (i = 0; i < npages; i++)
		iopte_make_dummy(iommu, base + i);
		iopte_make_dummy(iommu, base + i);


	iommu_range_free(iommu, bus_addr, npages);

	iommu_free_ctx(iommu, ctx);
	iommu_free_ctx(iommu, ctx);

	spin_unlock_irqrestore(&iommu->lock, flags);
	spin_unlock_irqrestore(&iommu->lock, flags);

	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages,
			     false, NULL);
}
}


static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -567,7 +497,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
	max_seg_size = dma_get_max_seg_size(dev);
	max_seg_size = dma_get_max_seg_size(dev);
	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
	base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
	base_shift = iommu->tbl.page_table_map_base >> IO_PAGE_SHIFT;
	for_each_sg(sglist, s, nelems, i) {
	for_each_sg(sglist, s, nelems, i) {
		unsigned long paddr, npages, entry, out_entry = 0, slen;
		unsigned long paddr, npages, entry, out_entry = 0, slen;
		iopte_t *base;
		iopte_t *base;
@@ -581,7 +511,8 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
		/* Allocate iommu entries for that segment */
		/* Allocate iommu entries for that segment */
		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
		entry = iommu_range_alloc(dev, iommu, npages, &handle);
		entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, &handle,
					      __this_cpu_read(iommu_pool_hash));


		/* Handle failure */
		/* Handle failure */
		if (unlikely(entry == DMA_ERROR_CODE)) {
		if (unlikely(entry == DMA_ERROR_CODE)) {
@@ -594,7 +525,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
		base = iommu->page_table + entry;
		base = iommu->page_table + entry;


		/* Convert entry to a dma_addr_t */
		/* Convert entry to a dma_addr_t */
		dma_addr = iommu->page_table_map_base +
		dma_addr = iommu->tbl.page_table_map_base +
			(entry << IO_PAGE_SHIFT);
			(entry << IO_PAGE_SHIFT);
		dma_addr |= (s->offset & ~IO_PAGE_MASK);
		dma_addr |= (s->offset & ~IO_PAGE_MASK);


@@ -654,15 +585,17 @@ iommu_map_failed:
			vaddr = s->dma_address & IO_PAGE_MASK;
			vaddr = s->dma_address & IO_PAGE_MASK;
			npages = iommu_num_pages(s->dma_address, s->dma_length,
			npages = iommu_num_pages(s->dma_address, s->dma_length,
						 IO_PAGE_SIZE);
						 IO_PAGE_SIZE);
			iommu_range_free(iommu, vaddr, npages);


			entry = (vaddr - iommu->page_table_map_base)
			entry = (vaddr - iommu->tbl.page_table_map_base)
				>> IO_PAGE_SHIFT;
				>> IO_PAGE_SHIFT;
			base = iommu->page_table + entry;
			base = iommu->page_table + entry;


			for (j = 0; j < npages; j++)
			for (j = 0; j < npages; j++)
				iopte_make_dummy(iommu, base + j);
				iopte_make_dummy(iommu, base + j);


			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
					     false, NULL);

			s->dma_address = DMA_ERROR_CODE;
			s->dma_address = DMA_ERROR_CODE;
			s->dma_length = 0;
			s->dma_length = 0;
		}
		}
@@ -677,17 +610,19 @@ iommu_map_failed:
/* If contexts are being used, they are the same in all of the mappings
/* If contexts are being used, they are the same in all of the mappings
 * we make for a particular SG.
 * we make for a particular SG.
 */
 */
static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
static unsigned long fetch_sg_ctx(struct iommu *iommu,
				  struct scatterlist *sg)
{
{
	unsigned long ctx = 0;
	unsigned long ctx = 0;


	if (iommu->iommu_ctxflush) {
	if (iommu->iommu_ctxflush) {
		iopte_t *base;
		iopte_t *base;
		u32 bus_addr;
		u32 bus_addr;
		struct iommu_table *tbl = &iommu->tbl;


		bus_addr = sg->dma_address & IO_PAGE_MASK;
		bus_addr = sg->dma_address & IO_PAGE_MASK;
		base = iommu->page_table +
		base = iommu->page_table +
			((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
		       ((bus_addr - tbl->page_table_map_base) >> IO_PAGE_SHIFT);


		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
	}
	}
@@ -723,9 +658,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
		if (!len)
		if (!len)
			break;
			break;
		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
		iommu_range_free(iommu, dma_handle, npages);


		entry = ((dma_handle - iommu->page_table_map_base)
		entry = ((dma_handle - iommu->tbl.page_table_map_base)
			 >> IO_PAGE_SHIFT);
			 >> IO_PAGE_SHIFT);
		base = iommu->page_table + entry;
		base = iommu->page_table + entry;


@@ -737,6 +671,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
		for (i = 0; i < npages; i++)
		for (i = 0; i < npages; i++)
			iopte_make_dummy(iommu, base + i);
			iopte_make_dummy(iommu, base + i);


		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, false,
				     NULL);
		sg = sg_next(sg);
		sg = sg_next(sg);
	}
	}


@@ -770,9 +706,10 @@ static void dma_4u_sync_single_for_cpu(struct device *dev,
	if (iommu->iommu_ctxflush &&
	if (iommu->iommu_ctxflush &&
	    strbuf->strbuf_ctxflush) {
	    strbuf->strbuf_ctxflush) {
		iopte_t *iopte;
		iopte_t *iopte;
		struct iommu_table *tbl = &iommu->tbl;


		iopte = iommu->page_table +
		iopte = iommu->page_table +
			((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT);
			((bus_addr - tbl->page_table_map_base)>>IO_PAGE_SHIFT);
		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
	}
	}


@@ -805,9 +742,10 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
	if (iommu->iommu_ctxflush &&
	if (iommu->iommu_ctxflush &&
	    strbuf->strbuf_ctxflush) {
	    strbuf->strbuf_ctxflush) {
		iopte_t *iopte;
		iopte_t *iopte;
		struct iommu_table *tbl = &iommu->tbl;


		iopte = iommu->page_table +
		iopte = iommu->page_table + ((sglist[0].dma_address -
			((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
			tbl->page_table_map_base) >> IO_PAGE_SHIFT);
		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
	}
	}


+0 −8
Original line number Original line Diff line number Diff line
@@ -48,12 +48,4 @@ static inline int is_span_boundary(unsigned long entry,
	return iommu_is_span_boundary(entry, nr, shift, boundary_size);
	return iommu_is_span_boundary(entry, nr, shift, boundary_size);
}
}


unsigned long iommu_range_alloc(struct device *dev,
				struct iommu *iommu,
				unsigned long npages,
				unsigned long *handle);
void iommu_range_free(struct iommu *iommu,
		      dma_addr_t dma_addr,
		      unsigned long npages);

#endif /* _IOMMU_COMMON_H */
#endif /* _IOMMU_COMMON_H */
+97 −88
Original line number Original line Diff line number Diff line
@@ -15,6 +15,8 @@
#include <linux/list.h>
#include <linux/list.h>
#include <linux/init.h>
#include <linux/init.h>
#include <linux/bitmap.h>
#include <linux/bitmap.h>
#include <linux/hash.h>
#include <linux/iommu-common.h>


#include <asm/hypervisor.h>
#include <asm/hypervisor.h>
#include <asm/iommu.h>
#include <asm/iommu.h>
@@ -27,6 +29,11 @@
#define DRV_MODULE_VERSION	"1.1"
#define DRV_MODULE_VERSION	"1.1"
#define DRV_MODULE_RELDATE	"July 22, 2008"
#define DRV_MODULE_RELDATE	"July 22, 2008"


#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
#define COOKIE_PGSZ_CODE_SHIFT	60ULL

static DEFINE_PER_CPU(unsigned int, ldc_pool_hash);

static char version[] =
static char version[] =
	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
#define LDC_PACKET_SIZE		64
#define LDC_PACKET_SIZE		64
@@ -98,10 +105,10 @@ static const struct ldc_mode_ops stream_ops;
int ldom_domaining_enabled;
int ldom_domaining_enabled;


struct ldc_iommu {
struct ldc_iommu {
	/* Protects arena alloc/free.  */
	/* Protects ldc_unmap.  */
	spinlock_t			lock;
	spinlock_t			lock;
	struct iommu_arena		arena;
	struct ldc_mtable_entry		*page_table;
	struct ldc_mtable_entry		*page_table;
	struct iommu_table		iommu_table;
};
};


struct ldc_channel {
struct ldc_channel {
@@ -998,31 +1005,85 @@ static void free_queue(unsigned long num_entries, struct ldc_packet *q)
	free_pages((unsigned long)q, order);
	free_pages((unsigned long)q, order);
}
}


static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
{
	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
	/* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */

	cookie &= ~COOKIE_PGSZ_CODE;

	return (cookie >> (13ULL + (szcode * 3ULL)));
}

struct ldc_demap_arg {
	struct ldc_iommu *ldc_iommu;
	u64 cookie;
	unsigned long id;
};

static void ldc_demap(void *arg, unsigned long entry, unsigned long npages)
{
	struct ldc_demap_arg *ldc_demap_arg = arg;
	struct ldc_iommu *iommu = ldc_demap_arg->ldc_iommu;
	unsigned long id = ldc_demap_arg->id;
	u64 cookie = ldc_demap_arg->cookie;
	struct ldc_mtable_entry *base;
	unsigned long i, shift;

	shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
	base = iommu->page_table + entry;
	for (i = 0; i < npages; i++) {
		if (base->cookie)
			sun4v_ldc_revoke(id, cookie + (i << shift),
					 base->cookie);
		base->mte = 0;
	}
}

/* XXX Make this configurable... XXX */
/* XXX Make this configurable... XXX */
#define LDC_IOTABLE_SIZE	(8 * 1024)
#define LDC_IOTABLE_SIZE	(8 * 1024)


static int ldc_iommu_init(struct ldc_channel *lp)
struct iommu_tbl_ops ldc_iommu_ops = {
	.cookie_to_index = ldc_cookie_to_index,
	.demap = ldc_demap,
};

static void setup_ldc_pool_hash(void)
{
	unsigned int i;
	static bool do_once;

	if (do_once)
		return;
	do_once = true;
	for_each_possible_cpu(i)
		per_cpu(ldc_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
}


static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
{
{
	unsigned long sz, num_tsb_entries, tsbsize, order;
	unsigned long sz, num_tsb_entries, tsbsize, order;
	struct ldc_iommu *iommu = &lp->iommu;
	struct ldc_iommu *ldc_iommu = &lp->iommu;
	struct iommu_table *iommu = &ldc_iommu->iommu_table;
	struct ldc_mtable_entry *table;
	struct ldc_mtable_entry *table;
	unsigned long hv_err;
	unsigned long hv_err;
	int err;
	int err;


	num_tsb_entries = LDC_IOTABLE_SIZE;
	num_tsb_entries = LDC_IOTABLE_SIZE;
	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);

	setup_ldc_pool_hash();
	spin_lock_init(&iommu->lock);
	spin_lock_init(&ldc_iommu->lock);


	sz = num_tsb_entries / 8;
	sz = num_tsb_entries / 8;
	sz = (sz + 7UL) & ~7UL;
	sz = (sz + 7UL) & ~7UL;
	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
	iommu->map = kzalloc(sz, GFP_KERNEL);
	if (!iommu->arena.map) {
	if (!iommu->map) {
		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
		return -ENOMEM;
		return -ENOMEM;
	}
	}

	iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
	iommu->arena.limit = num_tsb_entries;
			    &ldc_iommu_ops, false, 1);


	order = get_order(tsbsize);
	order = get_order(tsbsize);


@@ -1037,7 +1098,7 @@ static int ldc_iommu_init(struct ldc_channel *lp)


	memset(table, 0, PAGE_SIZE << order);
	memset(table, 0, PAGE_SIZE << order);


	iommu->page_table = table;
	ldc_iommu->page_table = table;


	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
					 num_tsb_entries);
					 num_tsb_entries);
@@ -1049,31 +1110,32 @@ static int ldc_iommu_init(struct ldc_channel *lp)


out_free_table:
out_free_table:
	free_pages((unsigned long) table, order);
	free_pages((unsigned long) table, order);
	iommu->page_table = NULL;
	ldc_iommu->page_table = NULL;


out_free_map:
out_free_map:
	kfree(iommu->arena.map);
	kfree(iommu->map);
	iommu->arena.map = NULL;
	iommu->map = NULL;


	return err;
	return err;
}
}


static void ldc_iommu_release(struct ldc_channel *lp)
static void ldc_iommu_release(struct ldc_channel *lp)
{
{
	struct ldc_iommu *iommu = &lp->iommu;
	struct ldc_iommu *ldc_iommu = &lp->iommu;
	struct iommu_table *iommu = &ldc_iommu->iommu_table;
	unsigned long num_tsb_entries, tsbsize, order;
	unsigned long num_tsb_entries, tsbsize, order;


	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);


	num_tsb_entries = iommu->arena.limit;
	num_tsb_entries = iommu->poolsize * iommu->nr_pools;
	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
	order = get_order(tsbsize);
	order = get_order(tsbsize);


	free_pages((unsigned long) iommu->page_table, order);
	free_pages((unsigned long) ldc_iommu->page_table, order);
	iommu->page_table = NULL;
	ldc_iommu->page_table = NULL;


	kfree(iommu->arena.map);
	kfree(iommu->map);
	iommu->arena.map = NULL;
	iommu->map = NULL;
}
}


struct ldc_channel *ldc_alloc(unsigned long id,
struct ldc_channel *ldc_alloc(unsigned long id,
@@ -1140,7 +1202,7 @@ struct ldc_channel *ldc_alloc(unsigned long id,


	lp->id = id;
	lp->id = id;


	err = ldc_iommu_init(lp);
	err = ldc_iommu_init(name, lp);
	if (err)
	if (err)
		goto out_free_ldc;
		goto out_free_ldc;


@@ -1885,40 +1947,6 @@ int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
}
}
EXPORT_SYMBOL(ldc_read);
EXPORT_SYMBOL(ldc_read);


static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
{
	struct iommu_arena *arena = &iommu->arena;
	unsigned long n, start, end, limit;
	int pass;

	limit = arena->limit;
	start = arena->hint;
	pass = 0;

again:
	n = bitmap_find_next_zero_area(arena->map, limit, start, npages, 0);
	end = n + npages;
	if (unlikely(end >= limit)) {
		if (likely(pass < 1)) {
			limit = start;
			start = 0;
			pass++;
			goto again;
		} else {
			/* Scanned the whole thing, give up. */
			return -1;
		}
	}
	bitmap_set(arena->map, n, npages);

	arena->hint = end;

	return n;
}

#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
#define COOKIE_PGSZ_CODE_SHIFT	60ULL

static u64 pagesize_code(void)
static u64 pagesize_code(void)
{
{
	switch (PAGE_SIZE) {
	switch (PAGE_SIZE) {
@@ -1945,23 +1973,14 @@ static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
		page_offset);
		page_offset);
}
}


static u64 cookie_to_index(u64 cookie, unsigned long *shift)
{
	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;

	cookie &= ~COOKIE_PGSZ_CODE;

	*shift = szcode * 3;

	return (cookie >> (13ULL + (szcode * 3ULL)));
}


static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
					     unsigned long npages)
					     unsigned long npages)
{
{
	long entry;
	long entry;


	entry = arena_alloc(iommu, npages);
	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_table, npages,
				     NULL, __this_cpu_read(ldc_pool_hash));
	if (unlikely(entry < 0))
	if (unlikely(entry < 0))
		return NULL;
		return NULL;


@@ -2090,7 +2109,7 @@ int ldc_map_sg(struct ldc_channel *lp,
	       struct ldc_trans_cookie *cookies, int ncookies,
	       struct ldc_trans_cookie *cookies, int ncookies,
	       unsigned int map_perm)
	       unsigned int map_perm)
{
{
	unsigned long i, npages, flags;
	unsigned long i, npages;
	struct ldc_mtable_entry *base;
	struct ldc_mtable_entry *base;
	struct cookie_state state;
	struct cookie_state state;
	struct ldc_iommu *iommu;
	struct ldc_iommu *iommu;
@@ -2109,9 +2128,7 @@ int ldc_map_sg(struct ldc_channel *lp,


	iommu = &lp->iommu;
	iommu = &lp->iommu;


	spin_lock_irqsave(&iommu->lock, flags);
	base = alloc_npages(iommu, npages);
	base = alloc_npages(iommu, npages);
	spin_unlock_irqrestore(&iommu->lock, flags);


	if (!base)
	if (!base)
		return -ENOMEM;
		return -ENOMEM;
@@ -2136,7 +2153,7 @@ int ldc_map_single(struct ldc_channel *lp,
		   struct ldc_trans_cookie *cookies, int ncookies,
		   struct ldc_trans_cookie *cookies, int ncookies,
		   unsigned int map_perm)
		   unsigned int map_perm)
{
{
	unsigned long npages, pa, flags;
	unsigned long npages, pa;
	struct ldc_mtable_entry *base;
	struct ldc_mtable_entry *base;
	struct cookie_state state;
	struct cookie_state state;
	struct ldc_iommu *iommu;
	struct ldc_iommu *iommu;
@@ -2152,9 +2169,7 @@ int ldc_map_single(struct ldc_channel *lp,


	iommu = &lp->iommu;
	iommu = &lp->iommu;


	spin_lock_irqsave(&iommu->lock, flags);
	base = alloc_npages(iommu, npages);
	base = alloc_npages(iommu, npages);
	spin_unlock_irqrestore(&iommu->lock, flags);


	if (!base)
	if (!base)
		return -ENOMEM;
		return -ENOMEM;
@@ -2172,35 +2187,29 @@ int ldc_map_single(struct ldc_channel *lp,
}
}
EXPORT_SYMBOL(ldc_map_single);
EXPORT_SYMBOL(ldc_map_single);



static void free_npages(unsigned long id, struct ldc_iommu *iommu,
static void free_npages(unsigned long id, struct ldc_iommu *iommu,
			u64 cookie, u64 size)
			u64 cookie, u64 size)
{
{
	struct iommu_arena *arena = &iommu->arena;
	unsigned long npages;
	unsigned long i, shift, index, npages;
	struct ldc_demap_arg demap_arg;
	struct ldc_mtable_entry *base;


	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
	demap_arg.ldc_iommu = iommu;
	index = cookie_to_index(cookie, &shift);
	demap_arg.cookie = cookie;
	base = iommu->page_table + index;
	demap_arg.id = id;


	BUG_ON(index > arena->limit ||
	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
	       (index + npages) > arena->limit);
	iommu_tbl_range_free(&iommu->iommu_table, cookie, npages, true,
			     &demap_arg);


	for (i = 0; i < npages; i++) {
		if (base->cookie)
			sun4v_ldc_revoke(id, cookie + (i << shift),
					 base->cookie);
		base->mte = 0;
		__clear_bit(index + i, arena->map);
	}
}
}


void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
	       int ncookies)
	       int ncookies)
{
{
	struct ldc_iommu *iommu = &lp->iommu;
	struct ldc_iommu *iommu = &lp->iommu;
	unsigned long flags;
	int i;
	int i;
	unsigned long flags;


	spin_lock_irqsave(&iommu->lock, flags);
	spin_lock_irqsave(&iommu->lock, flags);
	for (i = 0; i < ncookies; i++) {
	for (i = 0; i < ncookies; i++) {
+94 −99

File changed.

Preview size limit exceeded, changes collapsed.

Loading