Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 83c50939 authored by Mitchel Humpherys's avatar Mitchel Humpherys
Browse files

gpu: ion: optimize zero'ing of pages



Instead of using __GFP_ZERO, vmap high-order pages for zero'ing all at
once (or in a few large chunks).

Test results show a ~25% speedup for allocations that are fulfilled from
lowmem, and ~100% speedup for allocations that are fulfilled from
highmem.

Change-Id: I57270361ff195780370e1c6f458985bdbb641efe
Signed-off-by: default avatarMitchel Humpherys <mitchelh@codeaurora.org>
parent 9d94828a
Loading
Loading
Loading
Loading
+148 −0
Original line number Diff line number Diff line
@@ -24,6 +24,8 @@
#include <linux/sched.h>
#include <linux/scatterlist.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include "ion_priv.h"

void *ion_heap_map_kernel(struct ion_heap *heap,
@@ -98,7 +100,153 @@ int ion_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer,
	return 0;
}

#define MAX_VMAP_RETRIES 10

/**
 * An optimized page-zero'ing function. vmaps arrays of pages in large
 * chunks to minimize the number of memsets and vmaps/vunmaps.
 *
 * Note that the `pages' array should be composed of all 4K pages.
 */
int ion_heap_pages_zero(struct page **pages, int num_pages,
				bool should_invalidate)
{
	int i, j, k, npages_to_vmap;
	void *ptr = NULL;
	/*
	 * It's cheaper just to use writecombine memory and skip the
	 * cache vs. using a cache memory and trying to flush it afterwards
	 */
	pgprot_t pgprot = pgprot_writecombine(pgprot_kernel);

	/*
	 * As an optimization, we manually zero out all of the pages
	 * in one fell swoop here. To safeguard against insufficient
	 * vmalloc space, we only vmap `npages_to_vmap' at a time,
	 * starting with a conservative estimate of 1/8 of the total
	 * number of vmalloc pages available.
	 */
	npages_to_vmap = ((VMALLOC_END - VMALLOC_START)/8)
			>> PAGE_SHIFT;
	for (i = 0; i < num_pages; i += npages_to_vmap) {
		npages_to_vmap = min(npages_to_vmap, num_pages - i);
		for (j = 0; j < MAX_VMAP_RETRIES && npages_to_vmap;
			++j) {
			ptr = vmap(&pages[i], npages_to_vmap,
					VM_IOREMAP, pgprot);
			if (ptr)
				break;
			else
				npages_to_vmap >>= 1;
		}
		if (!ptr)
			return -ENOMEM;

		memset(ptr, 0, npages_to_vmap * PAGE_SIZE);
		if (should_invalidate) {
			/*
			 * invalidate the cache to pick up the zeroing
			 */
			for (k = 0; k < npages_to_vmap; k++) {
				void *p = kmap_atomic(pages[i + k]);
				phys_addr_t phys = page_to_phys(
							pages[i + k]);

				dmac_inv_range(p, p + PAGE_SIZE);
				outer_inv_range(phys, phys + PAGE_SIZE);
				kunmap_atomic(p);
			}
		}
		vunmap(ptr);
	}

	return 0;
}

static int ion_heap_alloc_pages_mem(int page_tbl_size,
				struct pages_mem *pages_mem)
{
	struct page **pages;
	pages_mem->free_fn = kfree;
	if (page_tbl_size > SZ_8K) {
		/*
		 * Do fallback to ensure we have a balance between
		 * performance and availability.
		 */
		pages = kmalloc(page_tbl_size,
				__GFP_COMP | __GFP_NORETRY |
				__GFP_NO_KSWAPD | __GFP_NOWARN);
		if (!pages) {
			pages = vmalloc(page_tbl_size);
			pages_mem->free_fn = vfree;
		}
	} else {
		pages = kmalloc(page_tbl_size, GFP_KERNEL);
	}

	if (!pages)
		return -ENOMEM;

	pages_mem->pages = pages;
	return 0;
}

static void ion_heap_free_pages_mem(struct pages_mem *pages_mem)
{
	pages_mem->free_fn(pages_mem->pages);
}

int ion_heap_high_order_page_zero(struct page *page,
				int order, bool should_invalidate)
{
	int i, ret;
	struct pages_mem pages_mem;
	int npages = 1 << order;
	int page_tbl_size = sizeof(struct page *) * npages;

	if (ion_heap_alloc_pages_mem(page_tbl_size, &pages_mem))
		return -ENOMEM;

	for (i = 0; i < (1 << order); ++i)
		pages_mem.pages[i] = page + i;

	ret = ion_heap_pages_zero(pages_mem.pages, npages,
				should_invalidate);
	ion_heap_free_pages_mem(&pages_mem);
	return ret;
}

int ion_heap_buffer_zero(struct ion_buffer *buffer)
{
	struct sg_table *table = buffer->sg_table;
	struct scatterlist *sg;
	int i, j, ret = 0, npages = 0, page_tbl_size = 0;
	struct pages_mem pages_mem;

	for_each_sg(table->sgl, sg, table->nents, i) {
		unsigned long len = sg_dma_len(sg);
		int nrpages = len >> PAGE_SHIFT;
		page_tbl_size += sizeof(struct page *) * nrpages;
	}

	if (ion_heap_alloc_pages_mem(page_tbl_size, &pages_mem))
		return -ENOMEM;

	for_each_sg(table->sgl, sg, table->nents, i) {
		struct page *page = sg_page(sg);
		unsigned long len = sg_dma_len(sg);

		for (j = 0; j < len / PAGE_SIZE; j++)
			pages_mem.pages[npages++] = page + j;
	}

	ret = ion_heap_pages_zero(pages_mem.pages, npages,
				ion_buffer_cached(buffer));
	ion_heap_free_pages_mem(&pages_mem);
	return ret;
}

int ion_heap_buffer_zero_old(struct ion_buffer *buffer)
{
	struct sg_table *table = buffer->sg_table;
	pgprot_t pgprot;
+2 −2
Original line number Diff line number Diff line
@@ -537,7 +537,7 @@ struct ion_heap *ion_iommu_heap_create(struct ion_platform_heap *heap_data)
			gfp_flags = high_gfp_flags | __GFP_ZERO;
		else
			gfp_flags = low_gfp_flags | __GFP_ZERO;
		pool = ion_page_pool_create(gfp_flags, orders[i]);
		pool = ion_page_pool_create(gfp_flags, orders[i], true);
		if (!pool)
			goto err_create_cached_pool;
		iommu_heap->cached_pools[i] = pool;
@@ -551,7 +551,7 @@ struct ion_heap *ion_iommu_heap_create(struct ion_platform_heap *heap_data)
			gfp_flags = high_gfp_flags | __GFP_ZERO;
		else
			gfp_flags = low_gfp_flags | __GFP_ZERO;
		pool = ion_page_pool_create(gfp_flags, orders[i]);
		pool = ion_page_pool_create(gfp_flags, orders[i], false);
		if (!pool)
			goto err_create_uncached_pool;
		iommu_heap->uncached_pools[i] = pool;
+15 −2
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "ion_priv.h"

struct ion_page_pool_item {
@@ -30,18 +31,28 @@ struct ion_page_pool_item {

static void *ion_page_pool_alloc_pages(struct ion_page_pool *pool)
{
	struct page *page = alloc_pages(pool->gfp_mask, pool->order);
	struct page *page;
	struct scatterlist sg;

	page = alloc_pages(pool->gfp_mask & ~__GFP_ZERO, pool->order);

	if (!page)
		return NULL;

	if (pool->gfp_mask & __GFP_ZERO)
		if (ion_heap_high_order_page_zero(
				page, pool->order, pool->should_invalidate))
			goto error_free_pages;

	sg_init_table(&sg, 1);
	sg_set_page(&sg, page, PAGE_SIZE << pool->order, 0);
	sg_dma_address(&sg) = sg_phys(&sg);
	dma_sync_sg_for_device(NULL, &sg, 1, DMA_BIDIRECTIONAL);

	return page;
error_free_pages:
	__free_pages(page, pool->order);
	return NULL;
}

static void ion_page_pool_free_pages(struct ion_page_pool *pool,
@@ -164,7 +175,8 @@ int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask,
	return nr_freed;
}

struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order)
struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order,
	bool should_invalidate)
{
	struct ion_page_pool *pool = kmalloc(sizeof(struct ion_page_pool),
					     GFP_KERNEL);
@@ -176,6 +188,7 @@ struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order)
	INIT_LIST_HEAD(&pool->high_items);
	pool->gfp_mask = gfp_mask;
	pool->order = order;
	pool->should_invalidate = should_invalidate;
	mutex_init(&pool->mutex);
	plist_node_init(&pool->list, order);

+14 −1
Original line number Diff line number Diff line
@@ -221,6 +221,11 @@ void ion_device_destroy(struct ion_device *dev);
 */
void ion_device_add_heap(struct ion_device *dev, struct ion_heap *heap);

struct pages_mem {
	struct page **pages;
	void (*free_fn) (const void *);
};

/**
 * some helpers for common operations on buffers using the sg_table
 * and vaddr fields
@@ -229,7 +234,11 @@ void *ion_heap_map_kernel(struct ion_heap *, struct ion_buffer *);
void ion_heap_unmap_kernel(struct ion_heap *, struct ion_buffer *);
int ion_heap_map_user(struct ion_heap *, struct ion_buffer *,
			struct vm_area_struct *);
int ion_heap_pages_zero(struct page **pages, int num_pages,
			bool should_invalidate);
int ion_heap_buffer_zero(struct ion_buffer *buffer);
int ion_heap_high_order_page_zero(struct page *page,
				int order, bool should_invalidate);

/**
 * ion_heap_init_deferred_free -- initialize deferred free functionality
@@ -348,6 +357,8 @@ void ion_carveout_free(struct ion_heap *heap, ion_phys_addr_t addr,
 * @gfp_mask:		gfp_mask to use from alloc
 * @order:		order of pages in the pool
 * @list:		plist node for list of pools
 * @should_invalidate:	whether or not the cache needs to be invalidated at
 *			page allocation time.
 *
 * Allows you to keep a pool of pre allocated pages to use from your heap.
 * Keeping a pool of pages that is ready for dma, ie any cached mapping have
@@ -363,9 +374,11 @@ struct ion_page_pool {
	gfp_t gfp_mask;
	unsigned int order;
	struct plist_node list;
	bool should_invalidate;
};

struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order);
struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order,
	bool should_invalidate);
void ion_page_pool_destroy(struct ion_page_pool *);
void *ion_page_pool_alloc(struct ion_page_pool *);
void ion_page_pool_free(struct ion_page_pool *, struct page *);
+9 −2
Original line number Diff line number Diff line
@@ -80,12 +80,19 @@ static struct page *alloc_buffer_page(struct ion_system_heap *heap,
		if (order > 4)
			gfp_flags = high_order_gfp_flags;
		trace_alloc_pages_sys_start(gfp_flags, order);
		page = alloc_pages(gfp_flags, order);
		page = alloc_pages(gfp_flags & ~__GFP_ZERO, order);
		trace_alloc_pages_sys_end(gfp_flags, order);
		if (!page) {
			trace_alloc_pages_sys_fail(gfp_flags, order);
			return 0;
		}
		if (gfp_flags & __GFP_ZERO) {
			if (ion_heap_high_order_page_zero(
					page, order, false)) {
				__free_pages(page, order);
				return NULL;
			}
		}
		sg_init_table(&sg, 1);
		sg_set_page(&sg, page, PAGE_SIZE << order, 0);
		sg_dma_address(&sg) = sg_phys(&sg);
@@ -351,7 +358,7 @@ struct ion_heap *ion_system_heap_create(struct ion_platform_heap *unused)

		if (orders[i] > 4)
			gfp_flags = high_order_gfp_flags;
		pool = ion_page_pool_create(gfp_flags, orders[i]);
		pool = ion_page_pool_create(gfp_flags, orders[i], false);
		if (!pool)
			goto err_create_pool;
		heap->pools[i] = pool;