Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 02a0d82c authored by Pankaj Gupta's avatar Pankaj Gupta Committed by Gerrit - the friendly Code Review server
Browse files

msm: kgsl: Zap performance counters across context switches



Performance counter values need not be retained across contexts unless
specifically requested for debug. Zap the counters by initialising
perfcounter SRAM with 0's using GPU_RBBM_PERFCTR_SRAM_INIT_CMD.

Add pm4 packets during context switches and add a KMD postamble packet to
clear the counters during preemption. Do not enable perfcounter save and
restore unless requested.

Change-Id: I371779ce659c07a1cc664327f5ecdcf0374201d8
Signed-off-by: default avatarMohammed Mirza Mandayappurath Manzoor <quic_mmandaya@quicinc.com>
Signed-off-by: default avatarSebanti Das <quic_sebadas@quicinc.com>
Signed-off-by: default avatarPankaj Gupta <quic_gpankaj@quicinc.com>
Signed-off-by: default avatarHarshitha Sai Neelati <quic_hsaineel@quicinc.com>
parent df48d875
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
/* Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
 * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -395,6 +396,8 @@
#define A6XX_RBBM_PERFCTR_RBBM_SEL_2             0x509
#define A6XX_RBBM_PERFCTR_RBBM_SEL_3             0x50A
#define A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED        0x50B
#define A6XX_RBBM_PERFCTR_SRAM_INIT_CMD          0x50e
#define A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS       0x50f

#define A6XX_RBBM_ISDB_CNT                       0x533

+8 −0
Original line number Diff line number Diff line
@@ -35,6 +35,9 @@
#define DEVICE_3D_NAME "kgsl-3d"
#define DEVICE_3D0_NAME "kgsl-3d0"

/* Index to preemption scratch buffer to store KMD postamble */
#define KMD_POSTAMBLE_IDX 100

/* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */
#define ADRENO_DEVICE(device) \
		container_of(device, struct adreno_device, dev)
@@ -252,6 +255,9 @@ struct adreno_gpudev;
/* Time to allow preemption to complete (in ms) */
#define ADRENO_PREEMPT_TIMEOUT 10000

#define PREEMPT_SCRATCH_ADDR(dev, id) \
	((dev)->preempt.scratch.gpuaddr + (id * sizeof(u64)))

#define ADRENO_INT_BIT(a, _bit) (((a)->gpucore->gpudev->int_bits) ? \
		(adreno_get_int(a, _bit) < 0 ? 0 : \
		BIT(adreno_get_int(a, _bit))) : 0)
@@ -288,6 +294,7 @@ enum adreno_preempt_states {
 * skipsaverestore: To skip saverestore during L1 preemption (for 6XX)
 * usesgmem: enable GMEM save/restore across preemption (for 6XX)
 * count: Track the number of preemptions triggered
 * @postamble_len: Number of dwords in KMD postamble pm4 packet
 */
struct adreno_preemption {
	atomic_t state;
@@ -299,6 +306,7 @@ struct adreno_preemption {
	bool skipsaverestore;
	bool usesgmem;
	unsigned int count;
	u32 postamble_len;
};


+45 −4
Original line number Diff line number Diff line
/* Copyright (c) 2017-2018,2020, The Linux Foundation. All rights reserved.
 * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -544,13 +545,27 @@ unsigned int a6xx_preemption_pre_ibsubmit(
	if (context) {
		struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
		struct adreno_ringbuffer *rb = drawctxt->rb;
		uint64_t dest = adreno_dev->preempt.scratch.gpuaddr +
			sizeof(u64) * rb->id;
		uint64_t dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id);

		*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2);
		cmds += cp_gpuaddr(adreno_dev, cmds, dest);
		*cmds++ = lower_32_bits(gpuaddr);
		*cmds++ = upper_32_bits(gpuaddr);

		/*
		 * Add a KMD post amble to clear the perf counters during
		 * preemption
		 */
		if (!adreno_dev->perfcounter) {
			u64 kmd_postamble_addr =
			PREEMPT_SCRATCH_ADDR(adreno_dev, KMD_POSTAMBLE_IDX);

			*cmds++ = cp_type7_packet(CP_SET_AMBLE, 3);
			*cmds++ = lower_32_bits(kmd_postamble_addr);
			*cmds++ = upper_32_bits(kmd_postamble_addr);
			*cmds++ = ((CP_KMD_AMBLE_TYPE << 20) | GENMASK(22, 20))
			| (adreno_dev->preempt.postamble_len | GENMASK(19, 0));
		}
	}

	return (unsigned int) (cmds - cmds_orig);
@@ -563,8 +578,7 @@ unsigned int a6xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;

	if (rb) {
		uint64_t dest = adreno_dev->preempt.scratch.gpuaddr +
			sizeof(u64) * rb->id;
		uint64_t dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id);

		*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2);
		cmds += cp_gpuaddr(adreno_dev, cmds, dest);
@@ -769,6 +783,33 @@ int a6xx_preemption_init(struct adreno_device *adreno_dev)
		addr += A6XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE;
	}

	/*
	 * First 8 dwords of the preemption scratch buffer is used to store the
	 * address for CP to save/restore VPC data. Reserve 11 dwords in the
	 * preemption scratch buffer from index KMD_POSTAMBLE_IDX for KMD
	 * postamble pm4 packets
	 */
	if (!adreno_dev->perfcounter) {
		u32 *postamble = preempt->scratch.hostptr +
					(KMD_POSTAMBLE_IDX * sizeof(u64));
		u32 count = 0;

		postamble[count++] = cp_type7_packet(CP_REG_RMW, 3);
		postamble[count++] = A6XX_RBBM_PERFCTR_SRAM_INIT_CMD;
		postamble[count++] = 0x0;
		postamble[count++] = 0x1;

		postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
		postamble[count++] = 0x3;
		postamble[count++] = A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
		postamble[count++] = 0x0;
		postamble[count++] = 0x1;
		postamble[count++] = 0x1;
		postamble[count++] = 0x0;

		preempt->postamble_len = count;
	}

	ret = a6xx_preemption_iommu_init(adreno_dev);

err:
+19 −0
Original line number Diff line number Diff line
/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
 * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -13,6 +14,7 @@
#include "adreno.h"
#include "kgsl_sharedmem.h"
#include "a3xx_reg.h"
#include "a6xx_reg.h"
#include "adreno_pm4types.h"

#define A5XX_PFP_PER_PROCESS_UCODE_VER 0x5FF064
@@ -586,6 +588,12 @@ static unsigned int _adreno_iommu_set_pt_v2_a6xx(struct kgsl_device *device,
	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
	cmds += cp_wait_for_me(adreno_dev, cmds);

	/* Clear performance counters during contect switches */
	if (!adreno_dev->perfcounter) {
		*cmds++ = cp_type4_packet(A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
		*cmds++ = 0x1;
	}

	/* CP switches the pagetable and flushes the Caches */
	*cmds++ = cp_packet(adreno_dev, CP_SMMU_TABLE_UPDATE, 4);
	*cmds++ = lower_32_bits(ttbr0);
@@ -605,6 +613,17 @@ static unsigned int _adreno_iommu_set_pt_v2_a6xx(struct kgsl_device *device,

	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);

	/* Wait for performance counter clear to finish */
	if (!adreno_dev->perfcounter) {
		*cmds++ = cp_type7_packet(CP_WAIT_REG_MEM, 6);
		*cmds++ = 0x3;
		*cmds++ = A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
		*cmds++ = 0x0;
		*cmds++ = 0x1;
		*cmds++ = 0x1;
		*cmds++ = 0x0;
	}

	return cmds - cmds_orig;
}

+5 −2
Original line number Diff line number Diff line
/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
 * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -143,7 +144,8 @@ void adreno_perfcounter_restore(struct adreno_device *adreno_dev)
	struct adreno_perfcount_group *group;
	unsigned int counter, groupid;

	if (counters == NULL)
	/* Do not save/restore if not requested */
	if (counters == NULL || !adreno_dev->perfcounter)
		return;

	for (groupid = 0; groupid < counters->group_count; groupid++) {
@@ -177,7 +179,8 @@ inline void adreno_perfcounter_save(struct adreno_device *adreno_dev)
	unsigned int counter, groupid;
	int ret = 0;

	if (counters == NULL)
	/* Do not save/restore if not requested */
	if (counters == NULL || !adreno_dev->perfcounter)
		return;

	if (gpudev->oob_set)
Loading