Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 69e50ada authored by Linux Build Service Account's avatar Linux Build Service Account Committed by Gerrit - the friendly Code Review server
Browse files

Merge "msm: kgsl: Use a page pool to reduce allocation time"

parents 0fa11ec3 afc87641
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -10,7 +10,8 @@ msm_kgsl_core-y = \
	kgsl_pwrscale.o \
	kgsl_mmu.o \
	kgsl_snapshot.o \
	kgsl_events.o
	kgsl_events.o \
	kgsl_pool.o

msm_kgsl_core-$(CONFIG_MSM_KGSL_IOMMU) += kgsl_iommu.o
msm_kgsl_core-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o
+6 −0
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@
#include "kgsl_trace.h"
#include "kgsl_sync.h"
#include "kgsl_compat.h"
#include "kgsl_pool.h"

#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "kgsl."
@@ -3954,6 +3955,9 @@ int kgsl_device_platform_probe(struct kgsl_device *device)
	/* Initialize common sysfs entries */
	kgsl_pwrctrl_init_sysfs(device);

	/* Initialize the memory pools */
	kgsl_init_page_pools();

	return 0;

error_close_mmu:
@@ -3972,6 +3976,8 @@ void kgsl_device_platform_remove(struct kgsl_device *device)

	kgsl_device_snapshot_close(device);

	kgsl_exit_page_pools();

	kgsl_pwrctrl_uninit_sysfs(device);

	pm_qos_remove_request(&device->pwrctrl.pm_qos_req_dma);
+341 −0
Original line number Diff line number Diff line
/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 */

#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/version.h>

#include "kgsl.h"
#include "kgsl_device.h"
#include "kgsl_pool.h"

/*
 * Maximum pool size in terms of pages
 * = (Number of pools * Max size per pool)
 */
#define KGSL_POOL_MAX_PAGES (2 * 4096)

/* Set the max pool size to 8192 pages */
static unsigned int kgsl_pool_max_pages = KGSL_POOL_MAX_PAGES;

struct kgsl_page_pool {
	unsigned int pool_order;
	int page_count;
	spinlock_t list_lock;
	struct list_head page_list;
};

static struct kgsl_page_pool kgsl_pools[] = {
	{
		.pool_order = 0,
		.list_lock = __SPIN_LOCK_UNLOCKED(kgsl_pools[0].list_lock),
		.page_list = LIST_HEAD_INIT(kgsl_pools[0].page_list),
	},
#ifndef CONFIG_ALLOC_BUFFERS_IN_4K_CHUNKS
	{
		.pool_order = 4,
		.list_lock = __SPIN_LOCK_UNLOCKED(kgsl_pools[1].list_lock),
		.page_list = LIST_HEAD_INIT(kgsl_pools[1].page_list),
	},
#endif
};

#define KGSL_NUM_POOLS ARRAY_SIZE(kgsl_pools)

/* Returns KGSL pool corresponding to input page order*/
static struct kgsl_page_pool *
_kgsl_get_pool_from_order(unsigned int order)
{
	int i;

	for (i = 0; i < KGSL_NUM_POOLS; i++) {
		if (kgsl_pools[i].pool_order == order)
			return &kgsl_pools[i];
	}

	return NULL;
}

/* Add a page to specified pool */
static void
_kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p)
{
	spin_lock(&pool->list_lock);
	list_add_tail(&p->lru, &pool->page_list);
	pool->page_count++;
	spin_unlock(&pool->list_lock);
}

/* Returns a page from specified pool */
static struct page *
_kgsl_pool_get_page(struct kgsl_page_pool *pool)
{
	struct page *p = NULL;

	spin_lock(&pool->list_lock);
	if (pool->page_count) {
		p = list_first_entry(&pool->page_list, struct page, lru);
		pool->page_count--;
		list_del(&p->lru);
	}
	spin_unlock(&pool->list_lock);

	return p;
}

/* Returns the number of pages in specified pool */
static int
kgsl_pool_size(struct kgsl_page_pool *kgsl_pool)
{
	int size;

	spin_lock(&kgsl_pool->list_lock);
	size = kgsl_pool->page_count * (1 << kgsl_pool->pool_order);
	spin_unlock(&kgsl_pool->list_lock);

	return size;
}

/* Returns the number of pages in all kgsl page pools */
static int kgsl_pool_size_total(void)
{
	int i;
	int total = 0;

	for (i = 0; i < KGSL_NUM_POOLS; i++)
		total += kgsl_pool_size(&kgsl_pools[i]);
	return total;
}

/*
 * This will shrink the specified pool by num_pages or its pool_size,
 * whichever is smaller.
 */
static unsigned int
_kgsl_pool_shrink(struct kgsl_page_pool *pool, int num_pages)
{
	int j;
	unsigned int pcount = 0;

	if (pool == NULL || num_pages <= 0)
		return pcount;

	for (j = 0; j < num_pages >> pool->pool_order; j++) {
		struct page *page = _kgsl_pool_get_page(pool);

		if (page != NULL) {
			__free_pages(page, pool->pool_order);
			pcount += (1 << pool->pool_order);
		} else {
			/* Break as this pool is empty */
			break;
		}
	}

	return pcount;
}

/*
 * This function reduces the total pool size
 * to number of pages specified by target_pages.
 *
 * If target_pages are greater than current pool size
 * nothing needs to be done otherwise remove
 * (current_pool_size - target_pages) pages from pool
 * starting from higher order pool.
 */
static int
kgsl_pool_reduce(unsigned int target_pages)
{
	int total_pages = 0;
	int i;
	int nr_removed;
	struct kgsl_page_pool *pool;
	unsigned int pcount = 0;

	total_pages = kgsl_pool_size_total();

	for (i = (KGSL_NUM_POOLS - 1); i >= 0; i--) {
		pool = &kgsl_pools[i];

		total_pages -= pcount;

		nr_removed = total_pages - target_pages;
		if (nr_removed <= 0)
			return pcount;

		/* Round up to integral number of pages in this pool */
		nr_removed = ALIGN(nr_removed, 1 << pool->pool_order);

		/* Remove nr_removed pages from this pool*/
		pcount += _kgsl_pool_shrink(pool, nr_removed);
	}

	return pcount;
}

/**
 * kgsl_pool_free_sgt() - Free scatter-gather list
 * @sgt: pointer of the sg list
 *
 * Free the sg list by collapsing any physical adjacent pages.
 * Pages are added back to the pool, if pool has sufficient space
 * otherwise they are given back to system.
 */

void kgsl_pool_free_sgt(struct sg_table *sgt)
{
	int i;
	struct scatterlist *sg;

	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
		/*
		 * sg_alloc_table_from_pages() will collapse any physically
		 * adjacent pages into a single scatterlist entry. We cannot
		 * just call __free_pages() on the entire set since we cannot
		 * ensure that the size is a whole order. Instead, free each
		 * page or compound page group individually.
		 */
		struct page *p = sg_page(sg), *next;
		unsigned int count;
		unsigned int j = 0;

		while (j < (sg->length/PAGE_SIZE)) {
			count = 1 << compound_order(p);
			next = nth_page(p, count);
			kgsl_pool_free_page(p);

			p = next;
			j += count;
		}
	}
}

/**
 * kgsl_pool_alloc_page() - Allocate a page of requested size
 * @page_size: Size of the page to be allocated
 * @pages: pointer to hold list of pages, should be big enough to hold
 * requested page
 * @len: Length of array pages.
 *
 * Return total page count on success and negative value on failure
 */
int kgsl_pool_alloc_page(int page_size, struct page **pages,
					unsigned int pages_len)
{
	int j;
	int pcount = 0;
	struct kgsl_page_pool *pool;
	struct page *page = NULL;
	struct page *p = NULL;

	if ((pages == NULL) || pages_len < (page_size >> PAGE_SHIFT))
		return -EINVAL;

	pool = _kgsl_get_pool_from_order(get_order(page_size));

	if (pool != NULL)
		page = _kgsl_pool_get_page(pool);

	/* Allocate a new page if not allocated from pool */
	if (page == NULL) {
		gfp_t gfp_mask = kgsl_gfp_mask(get_order(page_size));

		page = alloc_pages(gfp_mask,
					get_order(page_size));

		if (!page)
			return -ENOMEM;
	}

	for (j = 0; j < (page_size >> PAGE_SHIFT); j++) {
		p = nth_page(page, j);
		pages[pcount] = p;
		pcount++;
	}

	return pcount;
}

void kgsl_pool_free_page(struct page *page)
{
	struct kgsl_page_pool *pool;
	int page_order;

	if (page == NULL)
		return;

	page_order = compound_order(page);

	if (kgsl_pool_size_total() < kgsl_pool_max_pages) {
		pool = _kgsl_get_pool_from_order(page_order);
		if (pool != NULL) {
			_kgsl_pool_add_page(pool, page);
			return;
		}
	}

	/* Give back to system as not added to pool */
	__free_pages(page, page_order);
}

/* Functions for the shrinker */

static unsigned long
kgsl_pool_shrink_scan_objects(struct shrinker *shrinker,
					struct shrink_control *sc)
{
	/* nr represents number of pages to be removed*/
	int nr = sc->nr_to_scan;
	int total_pages = kgsl_pool_size_total();

	/* Target pages represents new  pool size */
	int target_pages = (nr > total_pages) ? 0 : (total_pages - nr);

	/* Reduce pool size to target_pages */
	return kgsl_pool_reduce(target_pages);
}

static unsigned long
kgsl_pool_shrink_count_objects(struct shrinker *shrinker,
					struct shrink_control *sc)
{
	/* Return total pool size as everything in pool can be freed */
	return kgsl_pool_size_total();
}

/* Shrinker callback data*/
static struct shrinker kgsl_pool_shrinker = {
	.count_objects = kgsl_pool_shrink_count_objects,
	.scan_objects = kgsl_pool_shrink_scan_objects,
	.seeks = DEFAULT_SEEKS,
	.batch = 0,
};

void kgsl_init_page_pools(void)
{
	/* Initialize shrinker */
	register_shrinker(&kgsl_pool_shrinker);
}

void kgsl_exit_page_pools(void)
{
	/* Release all pages in pools, if any.*/
	kgsl_pool_reduce(0);

	/* Unregister shrinker */
	unregister_shrinker(&kgsl_pool_shrinker);
}
+43 −0
Original line number Diff line number Diff line
/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 */
#ifndef __KGSL_POOL_H
#define __KGSL_POOL_H

#include <linux/mm_types.h>
#include "kgsl_sharedmem.h"

static inline unsigned int
kgsl_gfp_mask(unsigned int page_order)
{
	unsigned int gfp_mask = __GFP_HIGHMEM;

	if (page_order > 0)
		gfp_mask |= __GFP_COMP | __GFP_NORETRY |
			__GFP_NO_KSWAPD | __GFP_NOWARN;
	else
		gfp_mask |= GFP_KERNEL;

	if (kgsl_sharedmem_get_noretry() == true)
		gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;

	return gfp_mask;
}

void kgsl_pool_free_sgt(struct sg_table *sgt);
void kgsl_init_page_pools(void);
void kgsl_exit_page_pools(void);
int kgsl_pool_alloc_page(int page_size, struct page **pages,
						unsigned int pages_len);
void kgsl_pool_free_page(struct page *p);
#endif /* __KGSL_POOL_H */
+77 −94
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include "kgsl_device.h"
#include "kgsl_log.h"
#include "kgsl_mmu.h"
#include "kgsl_pool.h"

/*
 * The user can set this from debugfs to force failed memory allocations to
@@ -424,9 +425,6 @@ done:

static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc)
{
	unsigned int i = 0;
	struct scatterlist *sg;

	kgsl_page_alloc_unmap_kernel(memdesc);
	/* we certainly do not expect the hostptr to still be mapped */
	BUG_ON(memdesc->hostptr);
@@ -451,28 +449,15 @@ static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc)
		atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc);
	}

	for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) {
		/*
		 * sg_alloc_table_from_pages() will collapse any physically
		 * adjacent pages into a single scatterlist entry. We cannot
		 * just call __free_pages() on the entire set since we cannot
		 * ensure that the size is a whole order. Instead, free each
		 * page or compound page group individually.
		 */
		struct page *p = sg_page(sg), *next;
		unsigned int j = 0, count;
		while (j < (sg->length/PAGE_SIZE)) {
			if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED)
				ClearPagePrivate(p);

			count = 1 << compound_order(p);
			next = nth_page(p, count);
			__free_pages(p, compound_order(p));
			p = next;
			j += count;
	if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) {
		struct sg_page_iter sg_iter;

		for_each_sg_page(memdesc->sgt->sgl, &sg_iter,
					memdesc->sgt->nents, 0)
			ClearPagePrivate(sg_page_iter_page(&sg_iter));
	}
	}

	kgsl_pool_free_sgt(memdesc->sgt);
}

/*
@@ -681,19 +666,67 @@ static inline int get_page_size(size_t size, unsigned int align)
}
#endif

static void kgsl_zero_pages(struct page **pages, unsigned int pcount)
{
	unsigned int j;
	unsigned int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;
	pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
	void *ptr;

	/*
	 * All memory that goes to the user has to be zeroed out before it gets
	 * exposed to userspace. This means that the memory has to be mapped in
	 * the kernel, zeroed (memset) and then unmapped.  This also means that
	 * the dcache has to be flushed to ensure coherency between the kernel
	 * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped
	 * zeroed and unmaped each individual page, and then we had to turn
	 * around and call flush_dcache_page() on that page to clear the caches.
	 * This was killing us for performance. Instead, we found it is much
	 * faster to allocate the pages without GFP_ZERO, map a chunk of the
	 * range ('step' pages), memset it, flush it and then unmap
	 * - this results in a factor of 4 improvement for speed for large
	 * buffers. There is a small decrease in speed for small buffers,
	 * but only on the order of a few microseconds at best. The 'step'
	 * size is based on a guess at the amount of free vmalloc space, but
	 * will scale down if there's not enough free space.
	 */
	for (j = 0; j < pcount; j += step) {
		step = min(step, pcount - j);

		ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);

		if (ptr != NULL) {
			memset(ptr, 0, step * PAGE_SIZE);
			dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
			vunmap(ptr);
		} else {
			int k;
			/* Very, very, very slow path */

			for (k = j; k < j + step; k++) {
				ptr = kmap_atomic(pages[k]);
				memset(ptr, 0, PAGE_SIZE);
				dmac_flush_range(ptr, ptr + PAGE_SIZE);
				kunmap_atomic(ptr);
			}
			/* scale down the step size to avoid this path */
			if (step > 1)
				step >>= 1;
		}
	}
}

static int
kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
			struct kgsl_pagetable *pagetable,
			uint64_t size)
{
	int ret = 0;
	unsigned int j, pcount = 0, page_size, len_alloc;
	unsigned int j, page_size, len_alloc;
	unsigned int pcount = 0;
	size_t len;
	struct page **pages = NULL;
	pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
	void *ptr;
	unsigned int align;
	unsigned int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;

	size = PAGE_ALIGN(size);
	if (size == 0 || size > UINT_MAX)
@@ -740,30 +773,16 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
	len = size;

	while (len > 0) {
		struct page *page;
		gfp_t gfp_mask = __GFP_HIGHMEM;
		int j;
		int page_count;

		/* don't waste space at the end of the allocation*/
		if (len < page_size)
			page_size = PAGE_SIZE;

		/*
		 * Don't do some of the more aggressive memory recovery
		 * techniques for large order allocations
		 */
		if (page_size != PAGE_SIZE)
			gfp_mask |= __GFP_COMP | __GFP_NORETRY |
				__GFP_NO_KSWAPD | __GFP_NOWARN;
		else
			gfp_mask |= GFP_KERNEL;

		if (sharedmem_noretry_flag == true)
			gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;
		page_count = kgsl_pool_alloc_page(page_size,
					pages + pcount, len_alloc - pcount);

		page = alloc_pages(gfp_mask, get_order(page_size));

		if (page == NULL) {
		if (page_count <= 0) {
			if (page_size != PAGE_SIZE) {
				page_size = PAGE_SIZE;
				continue;
@@ -785,9 +804,7 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
			goto done;
		}

		for (j = 0; j < page_size >> PAGE_SHIFT; j++)
			pages[pcount++] = nth_page(page, j);

		pcount += page_count;
		len -= page_size;
		memdesc->size += page_size;
	}
@@ -824,57 +841,23 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
		goto done;
	}

	/*
	 * All memory that goes to the user has to be zeroed out before it gets
	 * exposed to userspace. This means that the memory has to be mapped in
	 * the kernel, zeroed (memset) and then unmapped.  This also means that
	 * the dcache has to be flushed to ensure coherency between the kernel
	 * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped
	 * zeroed and unmaped each individual page, and then we had to turn
	 * around and call flush_dcache_page() on that page to clear the caches.
	 * This was killing us for performance. Instead, we found it is much
	 * faster to allocate the pages without GFP_ZERO, map a chunk of the
	 * range ('step' pages), memset it, flush it and then unmap
	 * - this results in a factor of 4 improvement for speed for large
	 * buffers. There is a small decrease in speed for small buffers,
	 * but only on the order of a few microseconds at best. The 'step'
	 * size is based on a guess at the amount of free vmalloc space, but
	 * will scale down if there's not enough free space.
	 */
	for (j = 0; j < pcount; j += step) {
		step = min(step, pcount - j);

		ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);

		if (ptr != NULL) {
			memset(ptr, 0, step * PAGE_SIZE);
			dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
			vunmap(ptr);
		} else {
			int k;
			/* Very, very, very slow path */

			for (k = j; k < j + step; k++) {
				ptr = kmap_atomic(pages[k]);
				memset(ptr, 0, PAGE_SIZE);
				dmac_flush_range(ptr, ptr + PAGE_SIZE);
				kunmap_atomic(ptr);
			}
			/* scale down the step size to avoid this path */
			if (step > 1)
				step >>= 1;
		}
	}

	KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.page_alloc,
		&kgsl_driver.stats.page_alloc_max);

	/*
	 * Zero out the pages.
	 */
	kgsl_zero_pages(pages, pcount);

done:
	if (ret) {
		if (pages) {
			unsigned int count = 1;

			for (j = 0; j < pcount; j += count) {
				count = 1 << compound_order(pages[j]);
			__free_pages(pages[j], compound_order(pages[j]));
				kgsl_pool_free_page(pages[j]);
			}
		}

		kfree(memdesc->sgt);