Loading drivers/gpu/drm/msm/adreno/a5xx_gpu.c +62 −0 Original line number Diff line number Diff line Loading @@ -133,10 +133,30 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_YIELD_ENABLE, 1); OUT_RING(ring, 0x02); /* Record the always on counter before command execution */ if (submit->profile_buf_iova) { uint64_t gpuaddr = submit->profile_buf_iova + offsetof(struct drm_msm_gem_submit_profile_buffer, ticks_submitted); /* * Set bit[30] to make this command a 64 bit write operation. * bits[18-29] is to specify number of consecutive registers * to copy, so set this space with 2, since we want to copy * data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI]. */ OUT_PKT7(ring, CP_REG_TO_MEM, 3); OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO | (1 << 30) | (2 << 18)); OUT_RING(ring, lower_32_bits(gpuaddr)); OUT_RING(ring, upper_32_bits(gpuaddr)); } /* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: case MSM_SUBMIT_CMD_PROFILE_BUF: break; case MSM_SUBMIT_CMD_BUF: OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); Loading Loading @@ -164,6 +184,19 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_YIELD_ENABLE, 1); OUT_RING(ring, 0x01); /* Record the always on counter after command execution */ if (submit->profile_buf_iova) { uint64_t gpuaddr = submit->profile_buf_iova + offsetof(struct drm_msm_gem_submit_profile_buffer, ticks_retired); OUT_PKT7(ring, CP_REG_TO_MEM, 3); OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO | (1 << 30) | (2 << 18)); OUT_RING(ring, lower_32_bits(gpuaddr)); OUT_RING(ring, upper_32_bits(gpuaddr)); } /* Write the fence to the scratch register */ OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); OUT_RING(ring, submit->fence); Loading Loading @@ -193,6 +226,35 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) /* Set bit 0 to trigger an interrupt on preempt complete */ OUT_RING(ring, 0x01); if (submit->profile_buf_iova) { unsigned long flags; uint64_t ktime; struct drm_msm_gem_submit_profile_buffer *profile_buf = submit->profile_buf_vaddr; /* * With this profiling, we are trying to create closest * possible mapping between the CPU time domain(monotonic clock) * and the GPU time domain(ticks). In order to make this * happen, we need to briefly turn off interrupts to make sure * interrupts do not run between collecting these two samples. */ local_irq_save(flags); profile_buf->ticks_queued = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO, REG_A5XX_RBBM_ALWAYSON_COUNTER_HI); ktime = ktime_get_raw_ns(); local_irq_restore(flags); do_div(ktime, NSEC_PER_SEC); profile_buf->queue_time = ktime; profile_buf->submit_time = ktime; } a5xx_flush(gpu, ring); /* Check to see if we need to start preemption */ Loading drivers/gpu/drm/msm/adreno/adreno_gpu.c +1 −0 Original line number Diff line number Diff line Loading @@ -183,6 +183,7 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) case MSM_SUBMIT_CMD_IB_TARGET_BUF: /* ignore IB-targets */ break; case MSM_SUBMIT_CMD_PROFILE_BUF: case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: break; case MSM_SUBMIT_CMD_BUF: Loading drivers/gpu/drm/msm/msm_gem.h +2 −0 Original line number Diff line number Diff line Loading @@ -125,6 +125,8 @@ struct msm_gem_submit { uint32_t fence; int ring; bool valid; uint64_t profile_buf_iova; void *profile_buf_vaddr; unsigned int nr_cmds; unsigned int nr_bos; struct { Loading drivers/gpu/drm/msm/msm_gem_submit.c +10 −0 Original line number Diff line number Diff line Loading @@ -48,6 +48,9 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->nr_bos = 0; submit->nr_cmds = 0; submit->profile_buf_vaddr = NULL; submit->profile_buf_iova = 0; INIT_LIST_HEAD(&submit->bo_list); ww_acquire_init(&submit->ticket, &reservation_ww_class); } Loading Loading @@ -393,6 +396,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, case MSM_SUBMIT_CMD_BUF: case MSM_SUBMIT_CMD_IB_TARGET_BUF: case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: case MSM_SUBMIT_CMD_PROFILE_BUF: break; default: DRM_ERROR("invalid type: %08x\n", submit_cmd.type); Loading Loading @@ -425,6 +429,12 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->cmd[i].iova = iova + submit_cmd.submit_offset; submit->cmd[i].idx = submit_cmd.submit_idx; if (submit_cmd.type == MSM_SUBMIT_CMD_PROFILE_BUF) { submit->profile_buf_iova = submit->cmd[i].iova; submit->profile_buf_vaddr = msm_gem_vaddr_locked(&msm_obj->base); } if (submit->valid) continue; Loading include/uapi/drm/msm_drm.h +11 −0 Original line number Diff line number Diff line Loading @@ -152,10 +152,13 @@ struct drm_msm_gem_submit_reloc { * this buffer in the first-level ringbuffer * CTX_RESTORE_BUF - only executed if there has been a GPU context * switch since the last SUBMIT ioctl * PROFILE_BUF - A profiling buffer written to by both GPU and CPU. */ #define MSM_SUBMIT_CMD_BUF 0x0001 #define MSM_SUBMIT_CMD_IB_TARGET_BUF 0x0002 #define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003 #define MSM_SUBMIT_CMD_PROFILE_BUF 0x0004 struct drm_msm_gem_submit_cmd { __u32 type; /* in, one of MSM_SUBMIT_CMD_x */ __u32 submit_idx; /* in, index of submit_bo cmdstream buffer */ Loading Loading @@ -207,6 +210,14 @@ struct drm_msm_gem_submit { __u64 __user cmds; /* in, ptr to array of submit_cmd's */ }; struct drm_msm_gem_submit_profile_buffer { __s64 queue_time; /* out, Ringbuffer queue time (seconds) */ __s64 submit_time; /* out, Ringbuffer submission time (seconds) */ __u64 ticks_queued; /* out, GPU ticks at ringbuffer submission */ __u64 ticks_submitted; /* out, GPU ticks before cmdstream execution*/ __u64 ticks_retired; /* out, GPU ticks after cmdstream execution */ }; /* The normal way to synchronize with the GPU is just to CPU_PREP on * a buffer if you need to access it from the CPU (other cmdstream * submission from same or other contexts, PAGE_FLIP ioctl, etc, all Loading Loading
drivers/gpu/drm/msm/adreno/a5xx_gpu.c +62 −0 Original line number Diff line number Diff line Loading @@ -133,10 +133,30 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_YIELD_ENABLE, 1); OUT_RING(ring, 0x02); /* Record the always on counter before command execution */ if (submit->profile_buf_iova) { uint64_t gpuaddr = submit->profile_buf_iova + offsetof(struct drm_msm_gem_submit_profile_buffer, ticks_submitted); /* * Set bit[30] to make this command a 64 bit write operation. * bits[18-29] is to specify number of consecutive registers * to copy, so set this space with 2, since we want to copy * data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI]. */ OUT_PKT7(ring, CP_REG_TO_MEM, 3); OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO | (1 << 30) | (2 << 18)); OUT_RING(ring, lower_32_bits(gpuaddr)); OUT_RING(ring, upper_32_bits(gpuaddr)); } /* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: case MSM_SUBMIT_CMD_PROFILE_BUF: break; case MSM_SUBMIT_CMD_BUF: OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); Loading Loading @@ -164,6 +184,19 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_YIELD_ENABLE, 1); OUT_RING(ring, 0x01); /* Record the always on counter after command execution */ if (submit->profile_buf_iova) { uint64_t gpuaddr = submit->profile_buf_iova + offsetof(struct drm_msm_gem_submit_profile_buffer, ticks_retired); OUT_PKT7(ring, CP_REG_TO_MEM, 3); OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO | (1 << 30) | (2 << 18)); OUT_RING(ring, lower_32_bits(gpuaddr)); OUT_RING(ring, upper_32_bits(gpuaddr)); } /* Write the fence to the scratch register */ OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); OUT_RING(ring, submit->fence); Loading Loading @@ -193,6 +226,35 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) /* Set bit 0 to trigger an interrupt on preempt complete */ OUT_RING(ring, 0x01); if (submit->profile_buf_iova) { unsigned long flags; uint64_t ktime; struct drm_msm_gem_submit_profile_buffer *profile_buf = submit->profile_buf_vaddr; /* * With this profiling, we are trying to create closest * possible mapping between the CPU time domain(monotonic clock) * and the GPU time domain(ticks). In order to make this * happen, we need to briefly turn off interrupts to make sure * interrupts do not run between collecting these two samples. */ local_irq_save(flags); profile_buf->ticks_queued = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO, REG_A5XX_RBBM_ALWAYSON_COUNTER_HI); ktime = ktime_get_raw_ns(); local_irq_restore(flags); do_div(ktime, NSEC_PER_SEC); profile_buf->queue_time = ktime; profile_buf->submit_time = ktime; } a5xx_flush(gpu, ring); /* Check to see if we need to start preemption */ Loading
drivers/gpu/drm/msm/adreno/adreno_gpu.c +1 −0 Original line number Diff line number Diff line Loading @@ -183,6 +183,7 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) case MSM_SUBMIT_CMD_IB_TARGET_BUF: /* ignore IB-targets */ break; case MSM_SUBMIT_CMD_PROFILE_BUF: case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: break; case MSM_SUBMIT_CMD_BUF: Loading
drivers/gpu/drm/msm/msm_gem.h +2 −0 Original line number Diff line number Diff line Loading @@ -125,6 +125,8 @@ struct msm_gem_submit { uint32_t fence; int ring; bool valid; uint64_t profile_buf_iova; void *profile_buf_vaddr; unsigned int nr_cmds; unsigned int nr_bos; struct { Loading
drivers/gpu/drm/msm/msm_gem_submit.c +10 −0 Original line number Diff line number Diff line Loading @@ -48,6 +48,9 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->nr_bos = 0; submit->nr_cmds = 0; submit->profile_buf_vaddr = NULL; submit->profile_buf_iova = 0; INIT_LIST_HEAD(&submit->bo_list); ww_acquire_init(&submit->ticket, &reservation_ww_class); } Loading Loading @@ -393,6 +396,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, case MSM_SUBMIT_CMD_BUF: case MSM_SUBMIT_CMD_IB_TARGET_BUF: case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: case MSM_SUBMIT_CMD_PROFILE_BUF: break; default: DRM_ERROR("invalid type: %08x\n", submit_cmd.type); Loading Loading @@ -425,6 +429,12 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->cmd[i].iova = iova + submit_cmd.submit_offset; submit->cmd[i].idx = submit_cmd.submit_idx; if (submit_cmd.type == MSM_SUBMIT_CMD_PROFILE_BUF) { submit->profile_buf_iova = submit->cmd[i].iova; submit->profile_buf_vaddr = msm_gem_vaddr_locked(&msm_obj->base); } if (submit->valid) continue; Loading
include/uapi/drm/msm_drm.h +11 −0 Original line number Diff line number Diff line Loading @@ -152,10 +152,13 @@ struct drm_msm_gem_submit_reloc { * this buffer in the first-level ringbuffer * CTX_RESTORE_BUF - only executed if there has been a GPU context * switch since the last SUBMIT ioctl * PROFILE_BUF - A profiling buffer written to by both GPU and CPU. */ #define MSM_SUBMIT_CMD_BUF 0x0001 #define MSM_SUBMIT_CMD_IB_TARGET_BUF 0x0002 #define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003 #define MSM_SUBMIT_CMD_PROFILE_BUF 0x0004 struct drm_msm_gem_submit_cmd { __u32 type; /* in, one of MSM_SUBMIT_CMD_x */ __u32 submit_idx; /* in, index of submit_bo cmdstream buffer */ Loading Loading @@ -207,6 +210,14 @@ struct drm_msm_gem_submit { __u64 __user cmds; /* in, ptr to array of submit_cmd's */ }; struct drm_msm_gem_submit_profile_buffer { __s64 queue_time; /* out, Ringbuffer queue time (seconds) */ __s64 submit_time; /* out, Ringbuffer submission time (seconds) */ __u64 ticks_queued; /* out, GPU ticks at ringbuffer submission */ __u64 ticks_submitted; /* out, GPU ticks before cmdstream execution*/ __u64 ticks_retired; /* out, GPU ticks after cmdstream execution */ }; /* The normal way to synchronize with the GPU is just to CPU_PREP on * a buffer if you need to access it from the CPU (other cmdstream * submission from same or other contexts, PAGE_FLIP ioctl, etc, all Loading