Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f4dcba2b authored by Shrenuj Bansal's avatar Shrenuj Bansal
Browse files

msm: kgsl: Support command batch profiling



This feature allows us to profile the length of completion for
a certain cmdbatch using the RBBM_ALWAYSON_COUNTER. We provide the
userspace with the timer values after the cmdbatch has been
submitted to the ringbuffer, GPU begins execution and finishes
execution on the cmdbatch.

Change-Id: Iacdb6f991084a32a7bf6139eae39067bc1f9b765
Signed-off-by: default avatarShrenuj Bansal <shrenujb@codeaurora.org>
parent 5ba909fc
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -445,6 +445,7 @@ enum adreno_regs {
	ADRENO_REG_UCHE_INVALIDATE0,
	ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
	ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
	ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
	ADRENO_REG_REGISTER_MAX,
};

+2 −0
Original line number Diff line number Diff line
@@ -844,6 +844,8 @@ static unsigned int a4xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
				A4XX_RBBM_PERFCTR_LOAD_VALUE_LO),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
				A4XX_RBBM_PERFCTR_LOAD_VALUE_HI),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
				A4XX_RBBM_ALWAYSON_COUNTER_LO),
};

const struct adreno_reg_offsets a4xx_reg_offsets = {
+57 −3
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include "adreno_ringbuffer.h"

#include "a3xx_reg.h"
#include "adreno_a4xx.h"

#define ADRENO_NUM_RINGBUFFERS 1
#define GSL_RB_NOP_SIZEDWORDS				2
@@ -1202,15 +1203,22 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
	struct kgsl_device *device = &adreno_dev->dev;
	struct kgsl_memobj_node *ib;
	unsigned int numibs = 0;
	unsigned int secured_ctxt = 0;
	unsigned int *link;
	unsigned int *cmds;
	struct kgsl_context *context;
	struct adreno_context *drawctxt;
	bool use_preamble = true;
	bool secured_ctxt = false;
	bool cmdbatch_profiling = false;
	int flags = KGSL_CMD_FLAGS_NONE;
	int ret;
	struct adreno_ringbuffer *rb;
	struct kgsl_cmdbatch_profiling_buffer *profile_buffer = NULL;

	struct kgsl_mem_entry *entry = cmdbatch->profiling_buf_entry;
	if (entry)
		profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc,
					cmdbatch->profiling_buffer_gpuaddr);

	context = cmdbatch->context;
	drawctxt = ADRENO_CONTEXT(context);
@@ -1277,11 +1285,15 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
	 * 2 - end of IB identifier
	 */
	if (context->flags & KGSL_CONTEXT_SECURE)
		secured_ctxt = 1;
		secured_ctxt = true;

	if (cmdbatch->flags & KGSL_CMDBATCH_PROFILING &&
		adreno_is_a4xx(adreno_dev) && profile_buffer)
		cmdbatch_profiling = true;

	cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 5 +
					(secured_ctxt ? 14 : 0)),
					(secured_ctxt ? 14 : 0) +
					(cmdbatch_profiling ? 6 : 0)),
				GFP_KERNEL);
	if (!link) {
		ret = -ENOMEM;
@@ -1301,6 +1313,20 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
		*cmds++ = 1;
	}

	/*
	 * Add cmds to read the GPU ticks at the start of the cmdbatch and
	 * write it into the appropriate cmdbatch profiling buffer offset
	 */
	if (cmdbatch_profiling) {
		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
		*cmds++ = adreno_getreg(adreno_dev,
				ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO) |
				(1 << 30) | (2 << 18);
		*cmds++ = cmdbatch->profiling_buffer_gpuaddr +
				offsetof(struct kgsl_cmdbatch_profiling_buffer,
				gpu_ticks_submitted);
	}

	if (numibs) {
		list_for_each_entry(ib, &cmdbatch->cmdlist, node) {
			/* use the preamble? */
@@ -1322,6 +1348,20 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
		}
	}

	/*
	 * Add cmds to read the GPU ticks at the end of the cmdbatch and
	 * write it into the appropriate cmdbatch profiling buffer offset
	 */
	if (cmdbatch_profiling) {
		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
		*cmds++ = adreno_getreg(adreno_dev,
				ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO) |
				(1 << 30) | (2 << 18);
		*cmds++ = cmdbatch->profiling_buffer_gpuaddr +
				offsetof(struct kgsl_cmdbatch_profiling_buffer,
				gpu_ticks_retired);
	}

	if (secured_ctxt) {
		*cmds++ = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
		*cmds++ = 0;
@@ -1369,10 +1409,24 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
	/* CFF stuff executed only if CFF is enabled */
	kgsl_cffdump_capture_ib_desc(device, context, cmdbatch);

	/* Put the wall clock and gpu timer values in the profiling buffer */
	if (cmdbatch_profiling) {
		struct timespec ts;
		do_posix_clock_monotonic_gettime(&ts);
		profile_buffer->wall_clock_s = ts.tv_sec;
		profile_buffer->wall_clock_ns = ts.tv_nsec;
		profile_buffer->gpu_ticks_queued =
			a4xx_alwayson_counter_read(adreno_dev);
	}

	ret = adreno_ringbuffer_addcmds(rb, drawctxt, flags,
					&link[0], (cmds - link),
					cmdbatch->timestamp);

	/* Corresponding unmap to the memdesc map of profile_buffer */
	if (entry)
		kgsl_memdesc_unmap(&entry->memdesc);

	kgsl_cffdump_regpoll(device,
		adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS) << 2,
		0x00000000, 0x80000000);
+35 −2
Original line number Diff line number Diff line
@@ -1671,6 +1671,13 @@ void kgsl_cmdbatch_destroy(struct kgsl_cmdbatch *cmdbatch)
		kgsl_cmdbatch_sync_event_put(event);
	}

	/*
	 * Release the the refcount on the mem entry associated with the
	 * cmdbatch profiling buffer
	 */
	if (cmdbatch->flags & KGSL_CMDBATCH_PROFILING)
		kgsl_mem_entry_put(cmdbatch->profiling_buf_entry);

	/* Destroy the cmdlist we created */
	_free_memobj_list(&cmdbatch->cmdlist);

@@ -1946,12 +1953,34 @@ int kgsl_cmdbatch_add_memobj(struct kgsl_cmdbatch *cmdbatch,
	mem->priv = 0;

	/* sanitize the ibdesc ctrl flags */
	ibdesc->ctrl &= KGSL_IBDESC_MEMLIST;
	ibdesc->ctrl &= KGSL_IBDESC_MEMLIST | KGSL_IBDESC_PROFILING_BUFFER;

	if (cmdbatch->flags & KGSL_CMDBATCH_MEMLIST &&
			ibdesc->ctrl & KGSL_IBDESC_MEMLIST) {
		/* add to the memlist */
		list_add_tail(&mem->node, &cmdbatch->memlist);

		/*
		 * If the memlist contains a cmdbatch profiling buffer, store
		 * the mem_entry containing the buffer and the gpuaddr at
		 * which the buffer can be found
		 */
		if (cmdbatch->flags & KGSL_CMDBATCH_PROFILING &&
			ibdesc->ctrl & KGSL_IBDESC_PROFILING_BUFFER &&
			!cmdbatch->profiling_buf_entry) {
			cmdbatch->profiling_buf_entry =
				kgsl_sharedmem_find_region(
				cmdbatch->context->proc_priv, mem->gpuaddr,
				mem->sizedwords << 2);
			if (!cmdbatch->profiling_buf_entry) {
				WARN_ONCE(1,
				"No mem entry for profiling buf, gpuaddr=%lx\n",
				mem->gpuaddr);
				return 0;
			}

			cmdbatch->profiling_buffer_gpuaddr = mem->gpuaddr;
		}
	} else {
		/* set the preamble flag if directed to */
		if (cmdbatch->context->flags & KGSL_CONTEXT_PREAMBLE &&
@@ -2003,7 +2032,8 @@ static struct kgsl_cmdbatch *kgsl_cmdbatch_create(struct kgsl_device *device,
				| KGSL_CMDBATCH_END_OF_FRAME
				| KGSL_CMDBATCH_SYNC
				| KGSL_CMDBATCH_PWR_CONSTRAINT
				| KGSL_CMDBATCH_MEMLIST);
				| KGSL_CMDBATCH_MEMLIST
				| KGSL_CMDBATCH_PROFILING);

	/* Add a timer to help debug sync deadlocks */
	setup_timer(&cmdbatch->timer, _kgsl_cmdbatch_timer,
@@ -2136,6 +2166,9 @@ static struct kgsl_cmdbatch *_kgsl_cmdbatch_create(struct kgsl_device *device,

			uptr += sizeof(ibdesc);
		}

		if (cmdbatch->profiling_buf_entry == NULL)
			cmdbatch->flags &= ~KGSL_CMDBATCH_PROFILING;
	}

	if (synclist && numsyncs) {
+6 −1
Original line number Diff line number Diff line
@@ -235,8 +235,11 @@ struct kgsl_memobj_node {
 * @timer: a timer used to track possible sync timeouts for this cmdbatch
 * @marker_timestamp: For markers, the timestamp of the last "real" command that
 * was queued
 * @profiling_buf_entry: Mem entry containing the profiling buffer
 * @profiling_buffer_gpuaddr: GPU virt address of the profile buffer added here
 * for easy access
 *
 * This struture defines an atomic batch of command buffers issued from
 * This structure defines an atomic batch of command buffers issued from
 * userspace.
 */
struct kgsl_cmdbatch {
@@ -255,6 +258,8 @@ struct kgsl_cmdbatch {
	struct list_head synclist;
	struct timer_list timer;
	unsigned int marker_timestamp;
	struct kgsl_mem_entry *profiling_buf_entry;
	unsigned long profiling_buffer_gpuaddr;
};

/**
Loading