Loading drivers/gpu/msm/adreno_ringbuffer.c +46 −12 Original line number Diff line number Diff line /* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved. /* Copyright (c) 2002,2007-2017,2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and Loading Loading @@ -259,6 +259,11 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev, PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc"); if (ret) return ret; /* allocate a chunk of memory to create user profiling IB1s */ kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->profile_desc, PAGE_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc"); return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc, KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "ringbuffer"); Loading @@ -272,7 +277,7 @@ int adreno_ringbuffer_probe(struct adreno_device *adreno_dev, bool nopreempt) if (!adreno_is_a3xx(adreno_dev)) { status = kgsl_allocate_global(device, &device->scratch, PAGE_SIZE, 0, 0, "scratch"); PAGE_SIZE, 0, KGSL_MEMDESC_RANDOM, "scratch"); if (status != 0) return status; } Loading Loading @@ -303,7 +308,7 @@ static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev, kgsl_free_global(device, &rb->pagetable_desc); kgsl_free_global(device, &rb->preemption_desc); kgsl_free_global(device, &rb->profile_desc); kgsl_free_global(device, &rb->buffer_desc); kgsl_del_event_group(&rb->events); memset(rb, 0, sizeof(struct adreno_ringbuffer)); Loading Loading @@ -737,6 +742,37 @@ static inline int _get_alwayson_counter(struct adreno_device *adreno_dev, return (unsigned int)(p - cmds); } /* This is the maximum possible size for 64 bit targets */ #define PROFILE_IB_DWORDS 4 #define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) static int set_user_profiling(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, u32 *cmds, u64 gpuaddr) { int dwords, index = 0; u64 ib_gpuaddr; u32 *ib; if (!rb->profile_desc.hostptr) return 0; ib = ((u32 *) rb->profile_desc.hostptr) + (rb->profile_index * PROFILE_IB_DWORDS); ib_gpuaddr = rb->profile_desc.gpuaddr + (rb->profile_index * (PROFILE_IB_DWORDS << 2)); dwords = _get_alwayson_counter(adreno_dev, ib, gpuaddr); /* Make an indirect buffer for the request */ cmds[index++] = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1); index += cp_gpuaddr(adreno_dev, &cmds[index], ib_gpuaddr); cmds[index++] = dwords; rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; return index; } /* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, struct kgsl_drawobj_cmd *cmdobj, Loading Loading @@ -836,14 +872,12 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, if (drawobj->flags & KGSL_DRAWOBJ_PROFILING && !adreno_is_a3xx(adreno_dev) && profile_buffer) { user_profiling = true; dwords += 6; /* * REG_TO_MEM packet on A5xx and above needs another ordinal. * Add 2 more dwords since we do profiling before and after. * User side profiling uses two IB1s, one before with 4 dwords * per INDIRECT_BUFFER_PFE call */ if (!ADRENO_LEGACY_PM4(adreno_dev)) dwords += 2; dwords += 8; /* * we want to use an adreno_submit_time struct to get the Loading Loading @@ -886,11 +920,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, } /* * Add cmds to read the GPU ticks at the start of command obj and * Add IB1 to read the GPU ticks at the start of command obj and * write it into the appropriate command obj profiling buffer offset */ if (user_profiling) { cmds += _get_alwayson_counter(adreno_dev, cmds, cmds += set_user_profiling(adreno_dev, rb, cmds, cmdobj->profiling_buffer_gpuaddr + offsetof(struct kgsl_drawobj_profiling_buffer, gpu_ticks_submitted)); Loading Loading @@ -929,11 +963,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, } /* * Add cmds to read the GPU ticks at the end of command obj and * Add IB1 to read the GPU ticks at the end of command obj and * write it into the appropriate command obj profiling buffer offset */ if (user_profiling) { cmds += _get_alwayson_counter(adreno_dev, cmds, cmds += set_user_profiling(adreno_dev, rb, cmds, cmdobj->profiling_buffer_gpuaddr + offsetof(struct kgsl_drawobj_profiling_buffer, gpu_ticks_retired)); Loading drivers/gpu/msm/adreno_ringbuffer.h +13 −1 Original line number Diff line number Diff line /* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved. /* Copyright (c) 2002,2007-2016,2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and Loading Loading @@ -127,6 +127,18 @@ struct adreno_ringbuffer { unsigned long sched_timer; enum adreno_dispatcher_starve_timer_states starve_timer_state; spinlock_t preempt_lock; /** * @profile_desc: global memory to construct IB1s to do user side * profiling */ struct kgsl_memdesc profile_desc; /** * @profile_index: Pointer to the next "slot" in profile_desc for a user * profiling IB1. This allows for PAGE_SIZE / 16 = 256 simultaneous * commands per ringbuffer with user profiling enabled * enough. */ u32 profile_index; }; /* Returns the current ringbuffer */ Loading drivers/gpu/msm/kgsl.h +3 −1 Original line number Diff line number Diff line /* Copyright (c) 2008-2016, 2018, The Linux Foundation. All rights reserved. /* Copyright (c) 2008-2016,2018-2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and Loading Loading @@ -189,6 +189,8 @@ struct kgsl_memdesc_ops { #define KGSL_MEMDESC_TZ_LOCKED BIT(7) /* The memdesc is allocated through contiguous memory */ #define KGSL_MEMDESC_CONTIG BIT(8) /* For global buffers, randomly assign an address from the region */ #define KGSL_MEMDESC_RANDOM BIT(9) /** * struct kgsl_memdesc - GPU memory object descriptor Loading drivers/gpu/msm/kgsl_iommu.c +45 −15 Original line number Diff line number Diff line /* Copyright (c) 2011-2018, The Linux Foundation. All rights reserved. /* Copyright (c) 2011-2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and Loading @@ -20,6 +20,7 @@ #include <linux/msm_kgsl.h> #include <linux/ratelimit.h> #include <linux/of_platform.h> #include <linux/random.h> #include <soc/qcom/scm.h> #include <soc/qcom/secure_buffer.h> #include <stddef.h> Loading Loading @@ -84,15 +85,8 @@ static struct kmem_cache *addr_entry_cache; * * Here we define an array and a simple allocator to keep track of the currently * active global entries. Each entry is assigned a unique address inside of a * MMU implementation specific "global" region. The addresses are assigned * sequentially and never re-used to avoid having to go back and reprogram * existing pagetables. The entire list of active entries are mapped and * unmapped into every new pagetable as it is created and destroyed. * * Because there are relatively few entries and they are defined at boot time we * don't need to go over the top to define a dynamic allocation scheme. It will * be less wasteful to pick a static number with a little bit of growth * potential. * MMU implementation specific "global" region. We use a simple bitmap based * allocator for the region to allow for both fixed and dynamic addressing. */ #define GLOBAL_PT_ENTRIES 32 Loading @@ -102,10 +96,12 @@ struct global_pt_entry { char name[32]; }; #define GLOBAL_MAP_PAGES (KGSL_IOMMU_GLOBAL_MEM_SIZE >> PAGE_SHIFT) static struct global_pt_entry global_pt_entries[GLOBAL_PT_ENTRIES]; static struct kgsl_memdesc *kgsl_global_secure_pt_entry; static DECLARE_BITMAP(global_map, GLOBAL_MAP_PAGES); static int global_pt_count; uint64_t global_pt_alloc; static struct kgsl_memdesc gpu_qdss_desc; static struct kgsl_memdesc gpu_qtimer_desc; Loading Loading @@ -186,6 +182,12 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu, for (i = 0; i < global_pt_count; i++) { if (global_pt_entries[i].memdesc == memdesc) { u64 offset = memdesc->gpuaddr - KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); bitmap_clear(global_map, offset >> PAGE_SHIFT, kgsl_memdesc_footprint(memdesc) >> PAGE_SHIFT); memdesc->gpuaddr = 0; memdesc->priv &= ~KGSL_MEMDESC_GLOBAL; global_pt_entries[i].memdesc = NULL; Loading @@ -197,15 +199,43 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu, static void kgsl_iommu_add_global(struct kgsl_mmu *mmu, struct kgsl_memdesc *memdesc, const char *name) { u32 bit, start = 0; u64 size = kgsl_memdesc_footprint(memdesc); if (memdesc->gpuaddr != 0) return; BUG_ON(global_pt_count >= GLOBAL_PT_ENTRIES); BUG_ON((global_pt_alloc + memdesc->size) >= KGSL_IOMMU_GLOBAL_MEM_SIZE); if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES)) return; if (WARN_ON(size > KGSL_IOMMU_GLOBAL_MEM_SIZE)) return; if (memdesc->priv & KGSL_MEMDESC_RANDOM) { u32 range = GLOBAL_MAP_PAGES - (size >> PAGE_SHIFT); start = get_random_int() % range; } while (start >= 0) { bit = bitmap_find_next_zero_area(global_map, GLOBAL_MAP_PAGES, start, size >> PAGE_SHIFT, 0); if (bit < GLOBAL_MAP_PAGES) break; start--; } if (WARN_ON(start < 0)) return; memdesc->gpuaddr = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + (bit << PAGE_SHIFT); bitmap_set(global_map, bit, size >> PAGE_SHIFT); memdesc->gpuaddr = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + global_pt_alloc; memdesc->priv |= KGSL_MEMDESC_GLOBAL; global_pt_alloc += memdesc->size; global_pt_entries[global_pt_count].memdesc = memdesc; strlcpy(global_pt_entries[global_pt_count].name, name, Loading Loading
drivers/gpu/msm/adreno_ringbuffer.c +46 −12 Original line number Diff line number Diff line /* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved. /* Copyright (c) 2002,2007-2017,2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and Loading Loading @@ -259,6 +259,11 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev, PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc"); if (ret) return ret; /* allocate a chunk of memory to create user profiling IB1s */ kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->profile_desc, PAGE_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc"); return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc, KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "ringbuffer"); Loading @@ -272,7 +277,7 @@ int adreno_ringbuffer_probe(struct adreno_device *adreno_dev, bool nopreempt) if (!adreno_is_a3xx(adreno_dev)) { status = kgsl_allocate_global(device, &device->scratch, PAGE_SIZE, 0, 0, "scratch"); PAGE_SIZE, 0, KGSL_MEMDESC_RANDOM, "scratch"); if (status != 0) return status; } Loading Loading @@ -303,7 +308,7 @@ static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev, kgsl_free_global(device, &rb->pagetable_desc); kgsl_free_global(device, &rb->preemption_desc); kgsl_free_global(device, &rb->profile_desc); kgsl_free_global(device, &rb->buffer_desc); kgsl_del_event_group(&rb->events); memset(rb, 0, sizeof(struct adreno_ringbuffer)); Loading Loading @@ -737,6 +742,37 @@ static inline int _get_alwayson_counter(struct adreno_device *adreno_dev, return (unsigned int)(p - cmds); } /* This is the maximum possible size for 64 bit targets */ #define PROFILE_IB_DWORDS 4 #define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) static int set_user_profiling(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, u32 *cmds, u64 gpuaddr) { int dwords, index = 0; u64 ib_gpuaddr; u32 *ib; if (!rb->profile_desc.hostptr) return 0; ib = ((u32 *) rb->profile_desc.hostptr) + (rb->profile_index * PROFILE_IB_DWORDS); ib_gpuaddr = rb->profile_desc.gpuaddr + (rb->profile_index * (PROFILE_IB_DWORDS << 2)); dwords = _get_alwayson_counter(adreno_dev, ib, gpuaddr); /* Make an indirect buffer for the request */ cmds[index++] = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1); index += cp_gpuaddr(adreno_dev, &cmds[index], ib_gpuaddr); cmds[index++] = dwords; rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; return index; } /* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, struct kgsl_drawobj_cmd *cmdobj, Loading Loading @@ -836,14 +872,12 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, if (drawobj->flags & KGSL_DRAWOBJ_PROFILING && !adreno_is_a3xx(adreno_dev) && profile_buffer) { user_profiling = true; dwords += 6; /* * REG_TO_MEM packet on A5xx and above needs another ordinal. * Add 2 more dwords since we do profiling before and after. * User side profiling uses two IB1s, one before with 4 dwords * per INDIRECT_BUFFER_PFE call */ if (!ADRENO_LEGACY_PM4(adreno_dev)) dwords += 2; dwords += 8; /* * we want to use an adreno_submit_time struct to get the Loading Loading @@ -886,11 +920,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, } /* * Add cmds to read the GPU ticks at the start of command obj and * Add IB1 to read the GPU ticks at the start of command obj and * write it into the appropriate command obj profiling buffer offset */ if (user_profiling) { cmds += _get_alwayson_counter(adreno_dev, cmds, cmds += set_user_profiling(adreno_dev, rb, cmds, cmdobj->profiling_buffer_gpuaddr + offsetof(struct kgsl_drawobj_profiling_buffer, gpu_ticks_submitted)); Loading Loading @@ -929,11 +963,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, } /* * Add cmds to read the GPU ticks at the end of command obj and * Add IB1 to read the GPU ticks at the end of command obj and * write it into the appropriate command obj profiling buffer offset */ if (user_profiling) { cmds += _get_alwayson_counter(adreno_dev, cmds, cmds += set_user_profiling(adreno_dev, rb, cmds, cmdobj->profiling_buffer_gpuaddr + offsetof(struct kgsl_drawobj_profiling_buffer, gpu_ticks_retired)); Loading
drivers/gpu/msm/adreno_ringbuffer.h +13 −1 Original line number Diff line number Diff line /* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved. /* Copyright (c) 2002,2007-2016,2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and Loading Loading @@ -127,6 +127,18 @@ struct adreno_ringbuffer { unsigned long sched_timer; enum adreno_dispatcher_starve_timer_states starve_timer_state; spinlock_t preempt_lock; /** * @profile_desc: global memory to construct IB1s to do user side * profiling */ struct kgsl_memdesc profile_desc; /** * @profile_index: Pointer to the next "slot" in profile_desc for a user * profiling IB1. This allows for PAGE_SIZE / 16 = 256 simultaneous * commands per ringbuffer with user profiling enabled * enough. */ u32 profile_index; }; /* Returns the current ringbuffer */ Loading
drivers/gpu/msm/kgsl.h +3 −1 Original line number Diff line number Diff line /* Copyright (c) 2008-2016, 2018, The Linux Foundation. All rights reserved. /* Copyright (c) 2008-2016,2018-2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and Loading Loading @@ -189,6 +189,8 @@ struct kgsl_memdesc_ops { #define KGSL_MEMDESC_TZ_LOCKED BIT(7) /* The memdesc is allocated through contiguous memory */ #define KGSL_MEMDESC_CONTIG BIT(8) /* For global buffers, randomly assign an address from the region */ #define KGSL_MEMDESC_RANDOM BIT(9) /** * struct kgsl_memdesc - GPU memory object descriptor Loading
drivers/gpu/msm/kgsl_iommu.c +45 −15 Original line number Diff line number Diff line /* Copyright (c) 2011-2018, The Linux Foundation. All rights reserved. /* Copyright (c) 2011-2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and Loading @@ -20,6 +20,7 @@ #include <linux/msm_kgsl.h> #include <linux/ratelimit.h> #include <linux/of_platform.h> #include <linux/random.h> #include <soc/qcom/scm.h> #include <soc/qcom/secure_buffer.h> #include <stddef.h> Loading Loading @@ -84,15 +85,8 @@ static struct kmem_cache *addr_entry_cache; * * Here we define an array and a simple allocator to keep track of the currently * active global entries. Each entry is assigned a unique address inside of a * MMU implementation specific "global" region. The addresses are assigned * sequentially and never re-used to avoid having to go back and reprogram * existing pagetables. The entire list of active entries are mapped and * unmapped into every new pagetable as it is created and destroyed. * * Because there are relatively few entries and they are defined at boot time we * don't need to go over the top to define a dynamic allocation scheme. It will * be less wasteful to pick a static number with a little bit of growth * potential. * MMU implementation specific "global" region. We use a simple bitmap based * allocator for the region to allow for both fixed and dynamic addressing. */ #define GLOBAL_PT_ENTRIES 32 Loading @@ -102,10 +96,12 @@ struct global_pt_entry { char name[32]; }; #define GLOBAL_MAP_PAGES (KGSL_IOMMU_GLOBAL_MEM_SIZE >> PAGE_SHIFT) static struct global_pt_entry global_pt_entries[GLOBAL_PT_ENTRIES]; static struct kgsl_memdesc *kgsl_global_secure_pt_entry; static DECLARE_BITMAP(global_map, GLOBAL_MAP_PAGES); static int global_pt_count; uint64_t global_pt_alloc; static struct kgsl_memdesc gpu_qdss_desc; static struct kgsl_memdesc gpu_qtimer_desc; Loading Loading @@ -186,6 +182,12 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu, for (i = 0; i < global_pt_count; i++) { if (global_pt_entries[i].memdesc == memdesc) { u64 offset = memdesc->gpuaddr - KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); bitmap_clear(global_map, offset >> PAGE_SHIFT, kgsl_memdesc_footprint(memdesc) >> PAGE_SHIFT); memdesc->gpuaddr = 0; memdesc->priv &= ~KGSL_MEMDESC_GLOBAL; global_pt_entries[i].memdesc = NULL; Loading @@ -197,15 +199,43 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu, static void kgsl_iommu_add_global(struct kgsl_mmu *mmu, struct kgsl_memdesc *memdesc, const char *name) { u32 bit, start = 0; u64 size = kgsl_memdesc_footprint(memdesc); if (memdesc->gpuaddr != 0) return; BUG_ON(global_pt_count >= GLOBAL_PT_ENTRIES); BUG_ON((global_pt_alloc + memdesc->size) >= KGSL_IOMMU_GLOBAL_MEM_SIZE); if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES)) return; if (WARN_ON(size > KGSL_IOMMU_GLOBAL_MEM_SIZE)) return; if (memdesc->priv & KGSL_MEMDESC_RANDOM) { u32 range = GLOBAL_MAP_PAGES - (size >> PAGE_SHIFT); start = get_random_int() % range; } while (start >= 0) { bit = bitmap_find_next_zero_area(global_map, GLOBAL_MAP_PAGES, start, size >> PAGE_SHIFT, 0); if (bit < GLOBAL_MAP_PAGES) break; start--; } if (WARN_ON(start < 0)) return; memdesc->gpuaddr = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + (bit << PAGE_SHIFT); bitmap_set(global_map, bit, size >> PAGE_SHIFT); memdesc->gpuaddr = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + global_pt_alloc; memdesc->priv |= KGSL_MEMDESC_GLOBAL; global_pt_alloc += memdesc->size; global_pt_entries[global_pt_count].memdesc = memdesc; strlcpy(global_pt_entries[global_pt_count].name, name, Loading