Merge "drm/msm: Enable per cmdstream profiling for the user" (94bc5fd5) · Commits · e / devices / android_kernel_sony_msm8998

drivers/gpu/drm/msm/adreno/a5xx_gpu.c

+62 −0

Original line number	Diff line number	Diff line
		@@ -133,10 +133,30 @@ static int a5xx_submit(struct msm_gpu gpu, struct msm_gem_submit submit)
		OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
		OUT_RING(ring, 0x02);

		/* Record the always on counter before command execution */
		if (submit->profile_buf_iova) {
		uint64_t gpuaddr = submit->profile_buf_iova +
		offsetof(struct drm_msm_gem_submit_profile_buffer,
		ticks_submitted);

		/*
		* Set bit[30] to make this command a 64 bit write operation.
		* bits[18-29] is to specify number of consecutive registers
		* to copy, so set this space with 2, since we want to copy
		* data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI].
		*/
		OUT_PKT7(ring, CP_REG_TO_MEM, 3);
		OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO \|
		(1 << 30) \| (2 << 18));
		OUT_RING(ring, lower_32_bits(gpuaddr));
		OUT_RING(ring, upper_32_bits(gpuaddr));
		}

		/* Submit the commands */
		for (i = 0; i < submit->nr_cmds; i++) {
		switch (submit->cmd[i].type) {
		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
		case MSM_SUBMIT_CMD_PROFILE_BUF:
		break;
		case MSM_SUBMIT_CMD_BUF:
		OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
		@@ -164,6 +184,19 @@ static int a5xx_submit(struct msm_gpu gpu, struct msm_gem_submit submit)
		OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
		OUT_RING(ring, 0x01);

		/* Record the always on counter after command execution */
		if (submit->profile_buf_iova) {
		uint64_t gpuaddr = submit->profile_buf_iova +
		offsetof(struct drm_msm_gem_submit_profile_buffer,
		ticks_retired);

		OUT_PKT7(ring, CP_REG_TO_MEM, 3);
		OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO \|
		(1 << 30) \| (2 << 18));
		OUT_RING(ring, lower_32_bits(gpuaddr));
		OUT_RING(ring, upper_32_bits(gpuaddr));
		}

		/* Write the fence to the scratch register */
		OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
		OUT_RING(ring, submit->fence);
		@@ -193,6 +226,35 @@ static int a5xx_submit(struct msm_gpu gpu, struct msm_gem_submit submit)
		/* Set bit 0 to trigger an interrupt on preempt complete */
		OUT_RING(ring, 0x01);

		if (submit->profile_buf_iova) {
		unsigned long flags;
		uint64_t ktime;
		struct drm_msm_gem_submit_profile_buffer *profile_buf =
		submit->profile_buf_vaddr;

		/*
		* With this profiling, we are trying to create closest
		* possible mapping between the CPU time domain(monotonic clock)
		* and the GPU time domain(ticks). In order to make this
		* happen, we need to briefly turn off interrupts to make sure
		* interrupts do not run between collecting these two samples.
		*/
		local_irq_save(flags);

		profile_buf->ticks_queued = gpu_read64(gpu,
		REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
		REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);

		ktime = ktime_get_raw_ns();

		local_irq_restore(flags);

		do_div(ktime, NSEC_PER_SEC);

		profile_buf->queue_time = ktime;
		profile_buf->submit_time = ktime;
		}

		a5xx_flush(gpu, ring);

		/* Check to see if we need to start preemption */

drivers/gpu/drm/msm/adreno/adreno_gpu.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -183,6 +183,7 @@ int adreno_submit(struct msm_gpu gpu, struct msm_gem_submit submit)
		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
		/* ignore IB-targets */
		break;
		case MSM_SUBMIT_CMD_PROFILE_BUF:
		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
		break;
		case MSM_SUBMIT_CMD_BUF:

drivers/gpu/drm/msm/msm_gem.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -125,6 +125,8 @@ struct msm_gem_submit {
		uint32_t fence;
		int ring;
		bool valid;
		uint64_t profile_buf_iova;
		void *profile_buf_vaddr;
		unsigned int nr_cmds;
		unsigned int nr_bos;
		struct {

drivers/gpu/drm/msm/msm_gem_submit.c

+10 −0

Original line number	Diff line number	Diff line
		@@ -48,6 +48,9 @@ static struct msm_gem_submit submit_create(struct drm_device dev,
		submit->nr_bos = 0;
		submit->nr_cmds = 0;

		submit->profile_buf_vaddr = NULL;
		submit->profile_buf_iova = 0;

		INIT_LIST_HEAD(&submit->bo_list);
		ww_acquire_init(&submit->ticket, &reservation_ww_class);
		}
		@@ -393,6 +396,7 @@ int msm_ioctl_gem_submit(struct drm_device dev, void data,
		case MSM_SUBMIT_CMD_BUF:
		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
		case MSM_SUBMIT_CMD_PROFILE_BUF:
		break;
		default:
		DRM_ERROR("invalid type: %08x\n", submit_cmd.type);
		@@ -425,6 +429,12 @@ int msm_ioctl_gem_submit(struct drm_device dev, void data,
		submit->cmd[i].iova = iova + submit_cmd.submit_offset;
		submit->cmd[i].idx = submit_cmd.submit_idx;

		if (submit_cmd.type == MSM_SUBMIT_CMD_PROFILE_BUF) {
		submit->profile_buf_iova = submit->cmd[i].iova;
		submit->profile_buf_vaddr =
		msm_gem_vaddr_locked(&msm_obj->base);
		}

		if (submit->valid)
		continue;

include/uapi/drm/msm_drm.h

+11 −0

Original line number	Diff line number	Diff line
		@@ -152,10 +152,13 @@ struct drm_msm_gem_submit_reloc {
		* this buffer in the first-level ringbuffer
		* CTX_RESTORE_BUF - only executed if there has been a GPU context
		* switch since the last SUBMIT ioctl
		* PROFILE_BUF - A profiling buffer written to by both GPU and CPU.
		*/
		#define MSM_SUBMIT_CMD_BUF 0x0001
		#define MSM_SUBMIT_CMD_IB_TARGET_BUF 0x0002
		#define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003
		#define MSM_SUBMIT_CMD_PROFILE_BUF 0x0004

		struct drm_msm_gem_submit_cmd {
		__u32 type; /* in, one of MSM_SUBMIT_CMD_x */
		__u32 submit_idx; /* in, index of submit_bo cmdstream buffer */
		@@ -207,6 +210,14 @@ struct drm_msm_gem_submit {
		__u64 __user cmds; /* in, ptr to array of submit_cmd's */
		};

		struct drm_msm_gem_submit_profile_buffer {
		__s64 queue_time; /* out, Ringbuffer queue time (seconds) */
		__s64 submit_time; /* out, Ringbuffer submission time (seconds) */
		__u64 ticks_queued; /* out, GPU ticks at ringbuffer submission */
		__u64 ticks_submitted; /* out, GPU ticks before cmdstream execution*/
		__u64 ticks_retired; /* out, GPU ticks after cmdstream execution */
		};

		/* The normal way to synchronize with the GPU is just to CPU_PREP on
		* a buffer if you need to access it from the CPU (other cmdstream
		* submission from same or other contexts, PAGE_FLIP ioctl, etc, all