Loading drivers/gpu/drm/msm/Makefile +1 −0 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ msm-y := \ msm_gem_submit.o \ msm_gpu.o \ msm_iommu.o \ msm_perf.o \ msm_rd.o \ msm_ringbuffer.o Loading drivers/gpu/drm/msm/adreno/a3xx_gpu.c +15 −5 Original line number Diff line number Diff line Loading @@ -207,11 +207,11 @@ static int a3xx_hw_init(struct msm_gpu *gpu) /* Turn on performance counters: */ gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); /* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS * we will use this to augment our hang detection: */ gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT, SP_FS_FULL_ALU_INSTRUCTIONS); /* Enable the perfcntrs that we use.. */ for (i = 0; i < gpu->num_perfcntrs; i++) { const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val); } gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); Loading Loading @@ -465,6 +465,13 @@ static const struct adreno_gpu_funcs funcs = { }, }; static const struct msm_gpu_perfcntr perfcntrs[] = { { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO, SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" }, { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO, SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" }, }; struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) { struct a3xx_gpu *a3xx_gpu = NULL; Loading Loading @@ -504,6 +511,9 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u", gpu->fast_rate, gpu->slow_rate, gpu->bus_freq); gpu->perfcntrs = perfcntrs; gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev); if (ret) goto fail; Loading drivers/gpu/drm/msm/msm_drv.c +7 −0 Original line number Diff line number Diff line Loading @@ -548,6 +548,12 @@ static int late_init_minor(struct drm_minor *minor) return ret; } ret = msm_perf_debugfs_init(minor); if (ret) { dev_err(minor->dev->dev, "could not install perf debugfs\n"); return ret; } return 0; } Loading Loading @@ -588,6 +594,7 @@ static void msm_debugfs_cleanup(struct drm_minor *minor) if (!minor->dev->dev_private) return; msm_rd_debugfs_cleanup(minor); msm_perf_debugfs_cleanup(minor); } #endif Loading drivers/gpu/drm/msm/msm_drv.h +4 −0 Original line number Diff line number Diff line Loading @@ -56,6 +56,7 @@ struct msm_kms; struct msm_gpu; struct msm_mmu; struct msm_rd_state; struct msm_perf_state; struct msm_gem_submit; #define NUM_DOMAINS 2 /* one for KMS, then one per gpu core (?) */ Loading Loading @@ -85,6 +86,7 @@ struct msm_drm_private { wait_queue_head_t fence_event; struct msm_rd_state *rd; struct msm_perf_state *perf; /* list of GEM objects: */ struct list_head inactive_list; Loading Loading @@ -212,6 +214,8 @@ int msm_debugfs_late_init(struct drm_device *dev); int msm_rd_debugfs_init(struct drm_minor *minor); void msm_rd_debugfs_cleanup(struct drm_minor *minor); void msm_rd_dump_submit(struct msm_gem_submit *submit); int msm_perf_debugfs_init(struct drm_minor *minor); void msm_perf_debugfs_cleanup(struct drm_minor *minor); #else static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; } static inline void msm_rd_dump_submit(struct msm_gem_submit *submit) {} Loading drivers/gpu/drm/msm/msm_gpu.c +103 −0 Original line number Diff line number Diff line Loading @@ -319,6 +319,101 @@ static void hangcheck_handler(unsigned long data) queue_work(priv->wq, &gpu->retire_work); } /* * Performance Counters: */ /* called under perf_lock */ static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) { uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; int i, n = min(ncntrs, gpu->num_perfcntrs); /* read current values: */ for (i = 0; i < gpu->num_perfcntrs; i++) current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); /* update cntrs: */ for (i = 0; i < n; i++) cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; /* save current values: */ for (i = 0; i < gpu->num_perfcntrs; i++) gpu->last_cntrs[i] = current_cntrs[i]; return n; } static void update_sw_cntrs(struct msm_gpu *gpu) { ktime_t time; uint32_t elapsed; unsigned long flags; spin_lock_irqsave(&gpu->perf_lock, flags); if (!gpu->perfcntr_active) goto out; time = ktime_get(); elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); gpu->totaltime += elapsed; if (gpu->last_sample.active) gpu->activetime += elapsed; gpu->last_sample.active = msm_gpu_active(gpu); gpu->last_sample.time = time; out: spin_unlock_irqrestore(&gpu->perf_lock, flags); } void msm_gpu_perfcntr_start(struct msm_gpu *gpu) { unsigned long flags; spin_lock_irqsave(&gpu->perf_lock, flags); /* we could dynamically enable/disable perfcntr registers too.. */ gpu->last_sample.active = msm_gpu_active(gpu); gpu->last_sample.time = ktime_get(); gpu->activetime = gpu->totaltime = 0; gpu->perfcntr_active = true; update_hw_cntrs(gpu, 0, NULL); spin_unlock_irqrestore(&gpu->perf_lock, flags); } void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) { gpu->perfcntr_active = false; } /* returns -errno or # of cntrs sampled */ int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) { unsigned long flags; int ret; spin_lock_irqsave(&gpu->perf_lock, flags); if (!gpu->perfcntr_active) { ret = -EINVAL; goto out; } *activetime = gpu->activetime; *totaltime = gpu->totaltime; gpu->activetime = gpu->totaltime = 0; ret = update_hw_cntrs(gpu, ncntrs, cntrs); out: spin_unlock_irqrestore(&gpu->perf_lock, flags); return ret; } /* * Cmdstream submission/retirement: */ Loading Loading @@ -361,6 +456,7 @@ void msm_gpu_retire(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; queue_work(priv->wq, &gpu->retire_work); update_sw_cntrs(gpu); } /* add bo's to gpu's ring, and kick gpu: */ Loading @@ -381,6 +477,8 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, gpu->submitted_fence = submit->fence; update_sw_cntrs(gpu); ret = gpu->funcs->submit(gpu, submit, ctx); priv->lastctx = ctx; Loading Loading @@ -433,6 +531,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct iommu_domain *iommu; int i, ret; if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; Loading @@ -448,6 +549,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); spin_lock_init(&gpu->perf_lock); BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks)); /* Map registers: */ Loading Loading
drivers/gpu/drm/msm/Makefile +1 −0 Original line number Diff line number Diff line Loading @@ -34,6 +34,7 @@ msm-y := \ msm_gem_submit.o \ msm_gpu.o \ msm_iommu.o \ msm_perf.o \ msm_rd.o \ msm_ringbuffer.o Loading
drivers/gpu/drm/msm/adreno/a3xx_gpu.c +15 −5 Original line number Diff line number Diff line Loading @@ -207,11 +207,11 @@ static int a3xx_hw_init(struct msm_gpu *gpu) /* Turn on performance counters: */ gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); /* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS * we will use this to augment our hang detection: */ gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT, SP_FS_FULL_ALU_INSTRUCTIONS); /* Enable the perfcntrs that we use.. */ for (i = 0; i < gpu->num_perfcntrs; i++) { const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val); } gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); Loading Loading @@ -465,6 +465,13 @@ static const struct adreno_gpu_funcs funcs = { }, }; static const struct msm_gpu_perfcntr perfcntrs[] = { { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO, SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" }, { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO, SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" }, }; struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) { struct a3xx_gpu *a3xx_gpu = NULL; Loading Loading @@ -504,6 +511,9 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u", gpu->fast_rate, gpu->slow_rate, gpu->bus_freq); gpu->perfcntrs = perfcntrs; gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev); if (ret) goto fail; Loading
drivers/gpu/drm/msm/msm_drv.c +7 −0 Original line number Diff line number Diff line Loading @@ -548,6 +548,12 @@ static int late_init_minor(struct drm_minor *minor) return ret; } ret = msm_perf_debugfs_init(minor); if (ret) { dev_err(minor->dev->dev, "could not install perf debugfs\n"); return ret; } return 0; } Loading Loading @@ -588,6 +594,7 @@ static void msm_debugfs_cleanup(struct drm_minor *minor) if (!minor->dev->dev_private) return; msm_rd_debugfs_cleanup(minor); msm_perf_debugfs_cleanup(minor); } #endif Loading
drivers/gpu/drm/msm/msm_drv.h +4 −0 Original line number Diff line number Diff line Loading @@ -56,6 +56,7 @@ struct msm_kms; struct msm_gpu; struct msm_mmu; struct msm_rd_state; struct msm_perf_state; struct msm_gem_submit; #define NUM_DOMAINS 2 /* one for KMS, then one per gpu core (?) */ Loading Loading @@ -85,6 +86,7 @@ struct msm_drm_private { wait_queue_head_t fence_event; struct msm_rd_state *rd; struct msm_perf_state *perf; /* list of GEM objects: */ struct list_head inactive_list; Loading Loading @@ -212,6 +214,8 @@ int msm_debugfs_late_init(struct drm_device *dev); int msm_rd_debugfs_init(struct drm_minor *minor); void msm_rd_debugfs_cleanup(struct drm_minor *minor); void msm_rd_dump_submit(struct msm_gem_submit *submit); int msm_perf_debugfs_init(struct drm_minor *minor); void msm_perf_debugfs_cleanup(struct drm_minor *minor); #else static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; } static inline void msm_rd_dump_submit(struct msm_gem_submit *submit) {} Loading
drivers/gpu/drm/msm/msm_gpu.c +103 −0 Original line number Diff line number Diff line Loading @@ -319,6 +319,101 @@ static void hangcheck_handler(unsigned long data) queue_work(priv->wq, &gpu->retire_work); } /* * Performance Counters: */ /* called under perf_lock */ static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) { uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; int i, n = min(ncntrs, gpu->num_perfcntrs); /* read current values: */ for (i = 0; i < gpu->num_perfcntrs; i++) current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); /* update cntrs: */ for (i = 0; i < n; i++) cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; /* save current values: */ for (i = 0; i < gpu->num_perfcntrs; i++) gpu->last_cntrs[i] = current_cntrs[i]; return n; } static void update_sw_cntrs(struct msm_gpu *gpu) { ktime_t time; uint32_t elapsed; unsigned long flags; spin_lock_irqsave(&gpu->perf_lock, flags); if (!gpu->perfcntr_active) goto out; time = ktime_get(); elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); gpu->totaltime += elapsed; if (gpu->last_sample.active) gpu->activetime += elapsed; gpu->last_sample.active = msm_gpu_active(gpu); gpu->last_sample.time = time; out: spin_unlock_irqrestore(&gpu->perf_lock, flags); } void msm_gpu_perfcntr_start(struct msm_gpu *gpu) { unsigned long flags; spin_lock_irqsave(&gpu->perf_lock, flags); /* we could dynamically enable/disable perfcntr registers too.. */ gpu->last_sample.active = msm_gpu_active(gpu); gpu->last_sample.time = ktime_get(); gpu->activetime = gpu->totaltime = 0; gpu->perfcntr_active = true; update_hw_cntrs(gpu, 0, NULL); spin_unlock_irqrestore(&gpu->perf_lock, flags); } void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) { gpu->perfcntr_active = false; } /* returns -errno or # of cntrs sampled */ int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) { unsigned long flags; int ret; spin_lock_irqsave(&gpu->perf_lock, flags); if (!gpu->perfcntr_active) { ret = -EINVAL; goto out; } *activetime = gpu->activetime; *totaltime = gpu->totaltime; gpu->activetime = gpu->totaltime = 0; ret = update_hw_cntrs(gpu, ncntrs, cntrs); out: spin_unlock_irqrestore(&gpu->perf_lock, flags); return ret; } /* * Cmdstream submission/retirement: */ Loading Loading @@ -361,6 +456,7 @@ void msm_gpu_retire(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; queue_work(priv->wq, &gpu->retire_work); update_sw_cntrs(gpu); } /* add bo's to gpu's ring, and kick gpu: */ Loading @@ -381,6 +477,8 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, gpu->submitted_fence = submit->fence; update_sw_cntrs(gpu); ret = gpu->funcs->submit(gpu, submit, ctx); priv->lastctx = ctx; Loading Loading @@ -433,6 +531,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct iommu_domain *iommu; int i, ret; if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; Loading @@ -448,6 +549,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); spin_lock_init(&gpu->perf_lock); BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks)); /* Map registers: */ Loading