Loading drivers/gpu/msm/adreno_ringbuffer.c +44 −10 Original line number Diff line number Diff line Loading @@ -289,6 +289,11 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev, PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc"); if (ret) return ret; /* allocate a chunk of memory to create user profiling IB1s */ kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->profile_desc, PAGE_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc"); return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc, KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "ringbuffer"); Loading Loading @@ -343,7 +348,7 @@ static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); kgsl_free_global(device, &rb->pagetable_desc); kgsl_free_global(device, &rb->profile_desc); kgsl_free_global(device, &rb->buffer_desc); kgsl_del_event_group(&rb->events); memset(rb, 0, sizeof(struct adreno_ringbuffer)); Loading Loading @@ -814,6 +819,37 @@ static inline int _get_alwayson_counter(struct adreno_device *adreno_dev, return (unsigned int)(p - cmds); } /* This is the maximum possible size for 64 bit targets */ #define PROFILE_IB_DWORDS 4 #define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) static int set_user_profiling(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, u32 *cmds, u64 gpuaddr) { int dwords, index = 0; u64 ib_gpuaddr; u32 *ib; if (!rb->profile_desc.hostptr) return 0; ib = ((u32 *) rb->profile_desc.hostptr) + (rb->profile_index * PROFILE_IB_DWORDS); ib_gpuaddr = rb->profile_desc.gpuaddr + (rb->profile_index * (PROFILE_IB_DWORDS << 2)); dwords = _get_alwayson_counter(adreno_dev, ib, gpuaddr); /* Make an indirect buffer for the request */ cmds[index++] = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1); index += cp_gpuaddr(adreno_dev, &cmds[index], ib_gpuaddr); cmds[index++] = dwords; rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; return index; } /* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, struct kgsl_drawobj_cmd *cmdobj, Loading Loading @@ -913,14 +949,12 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, !adreno_is_a3xx(adreno_dev) && (cmdobj->profiling_buf_entry != NULL)) { user_profiling = true; dwords += 6; /* * REG_TO_MEM packet on A5xx and above needs another ordinal. * Add 2 more dwords since we do profiling before and after. * User side profiling uses two IB1s, one before with 4 dwords * per INDIRECT_BUFFER_PFE call */ if (!ADRENO_LEGACY_PM4(adreno_dev)) dwords += 2; dwords += 8; /* * we want to use an adreno_submit_time struct to get the Loading Loading @@ -972,11 +1006,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, } /* * Add cmds to read the GPU ticks at the start of command obj and * Add IB1 to read the GPU ticks at the start of command obj and * write it into the appropriate command obj profiling buffer offset */ if (user_profiling) { cmds += _get_alwayson_counter(adreno_dev, cmds, cmds += set_user_profiling(adreno_dev, rb, cmds, cmdobj->profiling_buffer_gpuaddr + offsetof(struct kgsl_drawobj_profiling_buffer, gpu_ticks_submitted)); Loading Loading @@ -1023,11 +1057,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, } /* * Add cmds to read the GPU ticks at the end of command obj and * Add IB1 to read the GPU ticks at the end of command obj and * write it into the appropriate command obj profiling buffer offset */ if (user_profiling) { cmds += _get_alwayson_counter(adreno_dev, cmds, cmds += set_user_profiling(adreno_dev, rb, cmds, cmdobj->profiling_buffer_gpuaddr + offsetof(struct kgsl_drawobj_profiling_buffer, gpu_ticks_retired)); Loading drivers/gpu/msm/adreno_ringbuffer.h +12 −0 Original line number Diff line number Diff line Loading @@ -121,6 +121,18 @@ struct adreno_ringbuffer { int preempted_midway; spinlock_t preempt_lock; bool skip_inline_wptr; /** * @profile_desc: global memory to construct IB1s to do user side * profiling */ struct kgsl_memdesc profile_desc; /** * @profile_index: Pointer to the next "slot" in profile_desc for a user * profiling IB1. This allows for PAGE_SIZE / 16 = 256 simultaneous * commands per ringbuffer with user profiling enabled * enough. */ u32 profile_index; }; /* Returns the current ringbuffer */ Loading drivers/gpu/msm/kgsl_drawobj.c +20 −4 Original line number Diff line number Diff line Loading @@ -584,13 +584,29 @@ static void add_profiling_buffer(struct kgsl_device *device, return; } cmdobj->profiling_buf_entry = entry; if (id != 0) if (!id) { cmdobj->profiling_buffer_gpuaddr = gpuaddr; } else { u64 off = offset + sizeof(struct kgsl_drawobj_profiling_buffer); /* * Make sure there is enough room in the object to store the * entire profiling buffer object */ if (off < offset || off >= entry->memdesc.size) { dev_err(device->dev, "ignore invalid profile offset ctxt %d id %d offset %lld gpuaddr %llx size %lld\n", drawobj->context->id, id, offset, gpuaddr, size); kgsl_mem_entry_put(entry); return; } cmdobj->profiling_buffer_gpuaddr = entry->memdesc.gpuaddr + offset; else cmdobj->profiling_buffer_gpuaddr = gpuaddr; } cmdobj->profiling_buf_entry = entry; } /** Loading Loading
drivers/gpu/msm/adreno_ringbuffer.c +44 −10 Original line number Diff line number Diff line Loading @@ -289,6 +289,11 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev, PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc"); if (ret) return ret; /* allocate a chunk of memory to create user profiling IB1s */ kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->profile_desc, PAGE_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc"); return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc, KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "ringbuffer"); Loading Loading @@ -343,7 +348,7 @@ static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev, struct kgsl_device *device = KGSL_DEVICE(adreno_dev); kgsl_free_global(device, &rb->pagetable_desc); kgsl_free_global(device, &rb->profile_desc); kgsl_free_global(device, &rb->buffer_desc); kgsl_del_event_group(&rb->events); memset(rb, 0, sizeof(struct adreno_ringbuffer)); Loading Loading @@ -814,6 +819,37 @@ static inline int _get_alwayson_counter(struct adreno_device *adreno_dev, return (unsigned int)(p - cmds); } /* This is the maximum possible size for 64 bit targets */ #define PROFILE_IB_DWORDS 4 #define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) static int set_user_profiling(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb, u32 *cmds, u64 gpuaddr) { int dwords, index = 0; u64 ib_gpuaddr; u32 *ib; if (!rb->profile_desc.hostptr) return 0; ib = ((u32 *) rb->profile_desc.hostptr) + (rb->profile_index * PROFILE_IB_DWORDS); ib_gpuaddr = rb->profile_desc.gpuaddr + (rb->profile_index * (PROFILE_IB_DWORDS << 2)); dwords = _get_alwayson_counter(adreno_dev, ib, gpuaddr); /* Make an indirect buffer for the request */ cmds[index++] = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1); index += cp_gpuaddr(adreno_dev, &cmds[index], ib_gpuaddr); cmds[index++] = dwords; rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; return index; } /* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, struct kgsl_drawobj_cmd *cmdobj, Loading Loading @@ -913,14 +949,12 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, !adreno_is_a3xx(adreno_dev) && (cmdobj->profiling_buf_entry != NULL)) { user_profiling = true; dwords += 6; /* * REG_TO_MEM packet on A5xx and above needs another ordinal. * Add 2 more dwords since we do profiling before and after. * User side profiling uses two IB1s, one before with 4 dwords * per INDIRECT_BUFFER_PFE call */ if (!ADRENO_LEGACY_PM4(adreno_dev)) dwords += 2; dwords += 8; /* * we want to use an adreno_submit_time struct to get the Loading Loading @@ -972,11 +1006,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, } /* * Add cmds to read the GPU ticks at the start of command obj and * Add IB1 to read the GPU ticks at the start of command obj and * write it into the appropriate command obj profiling buffer offset */ if (user_profiling) { cmds += _get_alwayson_counter(adreno_dev, cmds, cmds += set_user_profiling(adreno_dev, rb, cmds, cmdobj->profiling_buffer_gpuaddr + offsetof(struct kgsl_drawobj_profiling_buffer, gpu_ticks_submitted)); Loading Loading @@ -1023,11 +1057,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, } /* * Add cmds to read the GPU ticks at the end of command obj and * Add IB1 to read the GPU ticks at the end of command obj and * write it into the appropriate command obj profiling buffer offset */ if (user_profiling) { cmds += _get_alwayson_counter(adreno_dev, cmds, cmds += set_user_profiling(adreno_dev, rb, cmds, cmdobj->profiling_buffer_gpuaddr + offsetof(struct kgsl_drawobj_profiling_buffer, gpu_ticks_retired)); Loading
drivers/gpu/msm/adreno_ringbuffer.h +12 −0 Original line number Diff line number Diff line Loading @@ -121,6 +121,18 @@ struct adreno_ringbuffer { int preempted_midway; spinlock_t preempt_lock; bool skip_inline_wptr; /** * @profile_desc: global memory to construct IB1s to do user side * profiling */ struct kgsl_memdesc profile_desc; /** * @profile_index: Pointer to the next "slot" in profile_desc for a user * profiling IB1. This allows for PAGE_SIZE / 16 = 256 simultaneous * commands per ringbuffer with user profiling enabled * enough. */ u32 profile_index; }; /* Returns the current ringbuffer */ Loading
drivers/gpu/msm/kgsl_drawobj.c +20 −4 Original line number Diff line number Diff line Loading @@ -584,13 +584,29 @@ static void add_profiling_buffer(struct kgsl_device *device, return; } cmdobj->profiling_buf_entry = entry; if (id != 0) if (!id) { cmdobj->profiling_buffer_gpuaddr = gpuaddr; } else { u64 off = offset + sizeof(struct kgsl_drawobj_profiling_buffer); /* * Make sure there is enough room in the object to store the * entire profiling buffer object */ if (off < offset || off >= entry->memdesc.size) { dev_err(device->dev, "ignore invalid profile offset ctxt %d id %d offset %lld gpuaddr %llx size %lld\n", drawobj->context->id, id, offset, gpuaddr, size); kgsl_mem_entry_put(entry); return; } cmdobj->profiling_buffer_gpuaddr = entry->memdesc.gpuaddr + offset; else cmdobj->profiling_buffer_gpuaddr = gpuaddr; } cmdobj->profiling_buf_entry = entry; } /** Loading