Merge "msm: kgsl: Use a page pool to reduce allocation time" (69e50ada) · Commits · e / devices / android_kernel_xiaomi_markw

drivers/gpu/msm/Makefile

+2 −1

Original line number	Diff line number	Diff line
		@@ -10,7 +10,8 @@ msm_kgsl_core-y = \
		kgsl_pwrscale.o \
		kgsl_mmu.o \
		kgsl_snapshot.o \
		kgsl_events.o
		kgsl_events.o \
		kgsl_pool.o

		msm_kgsl_core-$(CONFIG_MSM_KGSL_IOMMU) += kgsl_iommu.o
		msm_kgsl_core-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o

drivers/gpu/msm/kgsl.c

+6 −0

Original line number	Diff line number	Diff line
		@@ -41,6 +41,7 @@
		#include "kgsl_trace.h"
		#include "kgsl_sync.h"
		#include "kgsl_compat.h"
		#include "kgsl_pool.h"

		#undef MODULE_PARAM_PREFIX
		#define MODULE_PARAM_PREFIX "kgsl."
		@@ -3954,6 +3955,9 @@ int kgsl_device_platform_probe(struct kgsl_device *device)
		/* Initialize common sysfs entries */
		kgsl_pwrctrl_init_sysfs(device);

		/* Initialize the memory pools */
		kgsl_init_page_pools();

		return 0;

		error_close_mmu:
		@@ -3972,6 +3976,8 @@ void kgsl_device_platform_remove(struct kgsl_device *device)

		kgsl_device_snapshot_close(device);

		kgsl_exit_page_pools();

		kgsl_pwrctrl_uninit_sysfs(device);

		pm_qos_remove_request(&device->pwrctrl.pm_qos_req_dma);

drivers/gpu/msm/kgsl_pool.c

0 → 100644

+341 −0

Original line number	Diff line number	Diff line
		/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
		*
		* This program is free software; you can redistribute it and/or modify
		* it under the terms of the GNU General Public License version 2 and
		* only version 2 as published by the Free Software Foundation.
		*
		* This program is distributed in the hope that it will be useful,
		* but WITHOUT ANY WARRANTY; without even the implied warranty of
		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		* GNU General Public License for more details.
		*
		*/

		#include <linux/vmalloc.h>
		#include <asm/cacheflush.h>
		#include <linux/slab.h>
		#include <linux/highmem.h>
		#include <linux/version.h>

		#include "kgsl.h"
		#include "kgsl_device.h"
		#include "kgsl_pool.h"

		/*
		* Maximum pool size in terms of pages
		* = (Number of pools * Max size per pool)
		*/
		#define KGSL_POOL_MAX_PAGES (2 * 4096)

		/* Set the max pool size to 8192 pages */
		static unsigned int kgsl_pool_max_pages = KGSL_POOL_MAX_PAGES;

		struct kgsl_page_pool {
		unsigned int pool_order;
		int page_count;
		spinlock_t list_lock;
		struct list_head page_list;
		};

		static struct kgsl_page_pool kgsl_pools[] = {
		{
		.pool_order = 0,
		.list_lock = __SPIN_LOCK_UNLOCKED(kgsl_pools[0].list_lock),
		.page_list = LIST_HEAD_INIT(kgsl_pools[0].page_list),
		},
		#ifndef CONFIG_ALLOC_BUFFERS_IN_4K_CHUNKS
		{
		.pool_order = 4,
		.list_lock = __SPIN_LOCK_UNLOCKED(kgsl_pools[1].list_lock),
		.page_list = LIST_HEAD_INIT(kgsl_pools[1].page_list),
		},
		#endif
		};

		#define KGSL_NUM_POOLS ARRAY_SIZE(kgsl_pools)

		/* Returns KGSL pool corresponding to input page order*/
		static struct kgsl_page_pool *
		_kgsl_get_pool_from_order(unsigned int order)
		{
		int i;

		for (i = 0; i < KGSL_NUM_POOLS; i++) {
		if (kgsl_pools[i].pool_order == order)
		return &kgsl_pools[i];
		}

		return NULL;
		}

		/* Add a page to specified pool */
		static void
		_kgsl_pool_add_page(struct kgsl_page_pool pool, struct page p)
		{
		spin_lock(&pool->list_lock);
		list_add_tail(&p->lru, &pool->page_list);
		pool->page_count++;
		spin_unlock(&pool->list_lock);
		}

		/* Returns a page from specified pool */
		static struct page *
		_kgsl_pool_get_page(struct kgsl_page_pool *pool)
		{
		struct page *p = NULL;

		spin_lock(&pool->list_lock);
		if (pool->page_count) {
		p = list_first_entry(&pool->page_list, struct page, lru);
		pool->page_count--;
		list_del(&p->lru);
		}
		spin_unlock(&pool->list_lock);

		return p;
		}

		/* Returns the number of pages in specified pool */
		static int
		kgsl_pool_size(struct kgsl_page_pool *kgsl_pool)
		{
		int size;

		spin_lock(&kgsl_pool->list_lock);
		size = kgsl_pool->page_count * (1 << kgsl_pool->pool_order);
		spin_unlock(&kgsl_pool->list_lock);

		return size;
		}

		/* Returns the number of pages in all kgsl page pools */
		static int kgsl_pool_size_total(void)
		{
		int i;
		int total = 0;

		for (i = 0; i < KGSL_NUM_POOLS; i++)
		total += kgsl_pool_size(&kgsl_pools[i]);
		return total;
		}

		/*
		* This will shrink the specified pool by num_pages or its pool_size,
		* whichever is smaller.
		*/
		static unsigned int
		_kgsl_pool_shrink(struct kgsl_page_pool *pool, int num_pages)
		{
		int j;
		unsigned int pcount = 0;

		if (pool == NULL \|\| num_pages <= 0)
		return pcount;

		for (j = 0; j < num_pages >> pool->pool_order; j++) {
		struct page *page = _kgsl_pool_get_page(pool);

		if (page != NULL) {
		__free_pages(page, pool->pool_order);
		pcount += (1 << pool->pool_order);
		} else {
		/* Break as this pool is empty */
		break;
		}
		}

		return pcount;
		}

		/*
		* This function reduces the total pool size
		* to number of pages specified by target_pages.
		*
		* If target_pages are greater than current pool size
		* nothing needs to be done otherwise remove
		* (current_pool_size - target_pages) pages from pool
		* starting from higher order pool.
		*/
		static int
		kgsl_pool_reduce(unsigned int target_pages)
		{
		int total_pages = 0;
		int i;
		int nr_removed;
		struct kgsl_page_pool *pool;
		unsigned int pcount = 0;

		total_pages = kgsl_pool_size_total();

		for (i = (KGSL_NUM_POOLS - 1); i >= 0; i--) {
		pool = &kgsl_pools[i];

		total_pages -= pcount;

		nr_removed = total_pages - target_pages;
		if (nr_removed <= 0)
		return pcount;

		/* Round up to integral number of pages in this pool */
		nr_removed = ALIGN(nr_removed, 1 << pool->pool_order);

		/* Remove nr_removed pages from this pool*/
		pcount += _kgsl_pool_shrink(pool, nr_removed);
		}

		return pcount;
		}

		/**
		* kgsl_pool_free_sgt() - Free scatter-gather list
		* @sgt: pointer of the sg list
		*
		* Free the sg list by collapsing any physical adjacent pages.
		* Pages are added back to the pool, if pool has sufficient space
		* otherwise they are given back to system.
		*/

		void kgsl_pool_free_sgt(struct sg_table *sgt)
		{
		int i;
		struct scatterlist *sg;

		for_each_sg(sgt->sgl, sg, sgt->nents, i) {
		/*
		* sg_alloc_table_from_pages() will collapse any physically
		* adjacent pages into a single scatterlist entry. We cannot
		* just call __free_pages() on the entire set since we cannot
		* ensure that the size is a whole order. Instead, free each
		* page or compound page group individually.
		*/
		struct page p = sg_page(sg), next;
		unsigned int count;
		unsigned int j = 0;

		while (j < (sg->length/PAGE_SIZE)) {
		count = 1 << compound_order(p);
		next = nth_page(p, count);
		kgsl_pool_free_page(p);

		p = next;
		j += count;
		}
		}
		}

		/**
		* kgsl_pool_alloc_page() - Allocate a page of requested size
		* @page_size: Size of the page to be allocated
		* @pages: pointer to hold list of pages, should be big enough to hold
		* requested page
		* @len: Length of array pages.
		*
		* Return total page count on success and negative value on failure
		*/
		int kgsl_pool_alloc_page(int page_size, struct page **pages,
		unsigned int pages_len)
		{
		int j;
		int pcount = 0;
		struct kgsl_page_pool *pool;
		struct page *page = NULL;
		struct page *p = NULL;

		if ((pages == NULL) \|\| pages_len < (page_size >> PAGE_SHIFT))
		return -EINVAL;

		pool = _kgsl_get_pool_from_order(get_order(page_size));

		if (pool != NULL)
		page = _kgsl_pool_get_page(pool);

		/* Allocate a new page if not allocated from pool */
		if (page == NULL) {
		gfp_t gfp_mask = kgsl_gfp_mask(get_order(page_size));

		page = alloc_pages(gfp_mask,
		get_order(page_size));

		if (!page)
		return -ENOMEM;
		}

		for (j = 0; j < (page_size >> PAGE_SHIFT); j++) {
		p = nth_page(page, j);
		pages[pcount] = p;
		pcount++;
		}

		return pcount;
		}

		void kgsl_pool_free_page(struct page *page)
		{
		struct kgsl_page_pool *pool;
		int page_order;

		if (page == NULL)
		return;

		page_order = compound_order(page);

		if (kgsl_pool_size_total() < kgsl_pool_max_pages) {
		pool = _kgsl_get_pool_from_order(page_order);
		if (pool != NULL) {
		_kgsl_pool_add_page(pool, page);
		return;
		}
		}

		/* Give back to system as not added to pool */
		__free_pages(page, page_order);
		}

		/* Functions for the shrinker */

		static unsigned long
		kgsl_pool_shrink_scan_objects(struct shrinker *shrinker,
		struct shrink_control *sc)
		{
		/* nr represents number of pages to be removed*/
		int nr = sc->nr_to_scan;
		int total_pages = kgsl_pool_size_total();

		/* Target pages represents new pool size */
		int target_pages = (nr > total_pages) ? 0 : (total_pages - nr);

		/* Reduce pool size to target_pages */
		return kgsl_pool_reduce(target_pages);
		}

		static unsigned long
		kgsl_pool_shrink_count_objects(struct shrinker *shrinker,
		struct shrink_control *sc)
		{
		/* Return total pool size as everything in pool can be freed */
		return kgsl_pool_size_total();
		}

		/* Shrinker callback data*/
		static struct shrinker kgsl_pool_shrinker = {
		.count_objects = kgsl_pool_shrink_count_objects,
		.scan_objects = kgsl_pool_shrink_scan_objects,
		.seeks = DEFAULT_SEEKS,
		.batch = 0,
		};

		void kgsl_init_page_pools(void)
		{
		/* Initialize shrinker */
		register_shrinker(&kgsl_pool_shrinker);
		}

		void kgsl_exit_page_pools(void)
		{
		/* Release all pages in pools, if any.*/
		kgsl_pool_reduce(0);

		/* Unregister shrinker */
		unregister_shrinker(&kgsl_pool_shrinker);
		}

drivers/gpu/msm/kgsl_pool.h

0 → 100644

+43 −0

Original line number	Diff line number	Diff line
		/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
		*
		* This program is free software; you can redistribute it and/or modify
		* it under the terms of the GNU General Public License version 2 and
		* only version 2 as published by the Free Software Foundation.
		*
		* This program is distributed in the hope that it will be useful,
		* but WITHOUT ANY WARRANTY; without even the implied warranty of
		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		* GNU General Public License for more details.
		*
		*/
		#ifndef __KGSL_POOL_H
		#define __KGSL_POOL_H

		#include <linux/mm_types.h>
		#include "kgsl_sharedmem.h"

		static inline unsigned int
		kgsl_gfp_mask(unsigned int page_order)
		{
		unsigned int gfp_mask = __GFP_HIGHMEM;

		if (page_order > 0)
		gfp_mask \|= __GFP_COMP \| __GFP_NORETRY \|
		__GFP_NO_KSWAPD \| __GFP_NOWARN;
		else
		gfp_mask \|= GFP_KERNEL;

		if (kgsl_sharedmem_get_noretry() == true)
		gfp_mask \|= __GFP_NORETRY \| __GFP_NOWARN;

		return gfp_mask;
		}

		void kgsl_pool_free_sgt(struct sg_table *sgt);
		void kgsl_init_page_pools(void);
		void kgsl_exit_page_pools(void);
		int kgsl_pool_alloc_page(int page_size, struct page **pages,
		unsigned int pages_len);
		void kgsl_pool_free_page(struct page *p);
		#endif /* __KGSL_POOL_H */

drivers/gpu/msm/kgsl_sharedmem.c

+77 −94

Original line number	Diff line number	Diff line
		@@ -27,6 +27,7 @@
		#include "kgsl_device.h"
		#include "kgsl_log.h"
		#include "kgsl_mmu.h"
		#include "kgsl_pool.h"

		/*
		* The user can set this from debugfs to force failed memory allocations to
		@@ -424,9 +425,6 @@ done:

		static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc)
		{
		unsigned int i = 0;
		struct scatterlist *sg;

		kgsl_page_alloc_unmap_kernel(memdesc);
		/* we certainly do not expect the hostptr to still be mapped */
		BUG_ON(memdesc->hostptr);
		@@ -451,28 +449,15 @@ static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc)
		atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc);
		}

		for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) {
		/*
		* sg_alloc_table_from_pages() will collapse any physically
		* adjacent pages into a single scatterlist entry. We cannot
		* just call __free_pages() on the entire set since we cannot
		* ensure that the size is a whole order. Instead, free each
		* page or compound page group individually.
		*/
		struct page p = sg_page(sg), next;
		unsigned int j = 0, count;
		while (j < (sg->length/PAGE_SIZE)) {
		if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED)
		ClearPagePrivate(p);

		count = 1 << compound_order(p);
		next = nth_page(p, count);
		__free_pages(p, compound_order(p));
		p = next;
		j += count;
		if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) {
		struct sg_page_iter sg_iter;

		for_each_sg_page(memdesc->sgt->sgl, &sg_iter,
		memdesc->sgt->nents, 0)
		ClearPagePrivate(sg_page_iter_page(&sg_iter));
		}
		}

		kgsl_pool_free_sgt(memdesc->sgt);
		}

		/*
		@@ -681,19 +666,67 @@ static inline int get_page_size(size_t size, unsigned int align)
		}
		#endif

		static void kgsl_zero_pages(struct page **pages, unsigned int pcount)
		{
		unsigned int j;
		unsigned int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;
		pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
		void *ptr;

		/*
		* All memory that goes to the user has to be zeroed out before it gets
		* exposed to userspace. This means that the memory has to be mapped in
		* the kernel, zeroed (memset) and then unmapped. This also means that
		* the dcache has to be flushed to ensure coherency between the kernel
		* and user pages. We used to pass __GFP_ZERO to alloc_page which mapped
		* zeroed and unmaped each individual page, and then we had to turn
		* around and call flush_dcache_page() on that page to clear the caches.
		* This was killing us for performance. Instead, we found it is much
		* faster to allocate the pages without GFP_ZERO, map a chunk of the
		* range ('step' pages), memset it, flush it and then unmap
		* - this results in a factor of 4 improvement for speed for large
		* buffers. There is a small decrease in speed for small buffers,
		* but only on the order of a few microseconds at best. The 'step'
		* size is based on a guess at the amount of free vmalloc space, but
		* will scale down if there's not enough free space.
		*/
		for (j = 0; j < pcount; j += step) {
		step = min(step, pcount - j);

		ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);

		if (ptr != NULL) {
		memset(ptr, 0, step * PAGE_SIZE);
		dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
		vunmap(ptr);
		} else {
		int k;
		/* Very, very, very slow path */

		for (k = j; k < j + step; k++) {
		ptr = kmap_atomic(pages[k]);
		memset(ptr, 0, PAGE_SIZE);
		dmac_flush_range(ptr, ptr + PAGE_SIZE);
		kunmap_atomic(ptr);
		}
		/* scale down the step size to avoid this path */
		if (step > 1)
		step >>= 1;
		}
		}
		}

		static int
		kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
		struct kgsl_pagetable *pagetable,
		uint64_t size)
		{
		int ret = 0;
		unsigned int j, pcount = 0, page_size, len_alloc;
		unsigned int j, page_size, len_alloc;
		unsigned int pcount = 0;
		size_t len;
		struct page **pages = NULL;
		pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
		void *ptr;
		unsigned int align;
		unsigned int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;

		size = PAGE_ALIGN(size);
		if (size == 0 \|\| size > UINT_MAX)
		@@ -740,30 +773,16 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
		len = size;

		while (len > 0) {
		struct page *page;
		gfp_t gfp_mask = __GFP_HIGHMEM;
		int j;
		int page_count;

		/* don't waste space at the end of the allocation*/
		if (len < page_size)
		page_size = PAGE_SIZE;

		/*
		* Don't do some of the more aggressive memory recovery
		* techniques for large order allocations
		*/
		if (page_size != PAGE_SIZE)
		gfp_mask \|= __GFP_COMP \| __GFP_NORETRY \|
		__GFP_NO_KSWAPD \| __GFP_NOWARN;
		else
		gfp_mask \|= GFP_KERNEL;

		if (sharedmem_noretry_flag == true)
		gfp_mask \|= __GFP_NORETRY \| __GFP_NOWARN;
		page_count = kgsl_pool_alloc_page(page_size,
		pages + pcount, len_alloc - pcount);

		page = alloc_pages(gfp_mask, get_order(page_size));

		if (page == NULL) {
		if (page_count <= 0) {
		if (page_size != PAGE_SIZE) {
		page_size = PAGE_SIZE;
		continue;
		@@ -785,9 +804,7 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
		goto done;
		}

		for (j = 0; j < page_size >> PAGE_SHIFT; j++)
		pages[pcount++] = nth_page(page, j);

		pcount += page_count;
		len -= page_size;
		memdesc->size += page_size;
		}
		@@ -824,57 +841,23 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
		goto done;
		}

		/*
		* All memory that goes to the user has to be zeroed out before it gets
		* exposed to userspace. This means that the memory has to be mapped in
		* the kernel, zeroed (memset) and then unmapped. This also means that
		* the dcache has to be flushed to ensure coherency between the kernel
		* and user pages. We used to pass __GFP_ZERO to alloc_page which mapped
		* zeroed and unmaped each individual page, and then we had to turn
		* around and call flush_dcache_page() on that page to clear the caches.
		* This was killing us for performance. Instead, we found it is much
		* faster to allocate the pages without GFP_ZERO, map a chunk of the
		* range ('step' pages), memset it, flush it and then unmap
		* - this results in a factor of 4 improvement for speed for large
		* buffers. There is a small decrease in speed for small buffers,
		* but only on the order of a few microseconds at best. The 'step'
		* size is based on a guess at the amount of free vmalloc space, but
		* will scale down if there's not enough free space.
		*/
		for (j = 0; j < pcount; j += step) {
		step = min(step, pcount - j);

		ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);

		if (ptr != NULL) {
		memset(ptr, 0, step * PAGE_SIZE);
		dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
		vunmap(ptr);
		} else {
		int k;
		/* Very, very, very slow path */

		for (k = j; k < j + step; k++) {
		ptr = kmap_atomic(pages[k]);
		memset(ptr, 0, PAGE_SIZE);
		dmac_flush_range(ptr, ptr + PAGE_SIZE);
		kunmap_atomic(ptr);
		}
		/* scale down the step size to avoid this path */
		if (step > 1)
		step >>= 1;
		}
		}

		KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.page_alloc,
		&kgsl_driver.stats.page_alloc_max);

		/*
		* Zero out the pages.
		*/
		kgsl_zero_pages(pages, pcount);

		done:
		if (ret) {
		if (pages) {
		unsigned int count = 1;

		for (j = 0; j < pcount; j += count) {
		count = 1 << compound_order(pages[j]);
		__free_pages(pages[j], compound_order(pages[j]));
		kgsl_pool_free_page(pages[j]);
		}
		}

		kfree(memdesc->sgt);