Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit de4efb79 authored by Jordan Crouse's avatar Jordan Crouse
Browse files

msm: kgsl: Enable perfcounter save/restore for A4XX



3XX targets have the ability to save the contents of most performance
counters on power down and to restore the values on power up so as
lessen the impact of sleep and nap on profiilng applications. Bring
this support to 4XX as well and support the larger field
of counters available on 4XX targets.

Change-Id: Ic0dedbad00adad16cadb75ed5d3a49761e045107
Signed-off-by: default avatarJordan Crouse <jcrouse@codeaurora.org>
parent 30a2b230
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -204,6 +204,12 @@ enum a4xx_rb_perfctr_rb_sel {
#define A4XX_RBBM_PERFCTR_CP_0_HI		0x9d
#define A4XX_RBBM_PERFCTR_CP_1_LO		0x9e
#define A4XX_RBBM_PERFCTR_CP_1_HI		0x9f
#define A4XX_RBBM_PERFCTR_CP_3_LO		0xa2
#define A4XX_RBBM_PERFCTR_CP_3_HI		0xa3
#define A4XX_RBBM_PERFCTR_CP_5_LO		0xa6
#define A4XX_RBBM_PERFCTR_CP_5_HI		0xa7
#define A4XX_RBBM_PERFCTR_CP_7_LO		0xaa
#define A4XX_RBBM_PERFCTR_CP_7_HI		0xab
#define A4XX_RBBM_PERFCTR_RBBM_0_LO		0xac
#define A4XX_RBBM_PERFCTR_RBBM_0_HI		0xad
#define A4XX_RBBM_PERFCTR_RBBM_1_LO		0xae
@@ -376,6 +382,8 @@ enum a4xx_rb_perfctr_rb_sel {
#define A4XX_RBBM_PERFCTR_LOAD_CMD0		0x171
#define A4XX_RBBM_PERFCTR_LOAD_CMD1		0x172
#define A4XX_RBBM_PERFCTR_LOAD_CMD2		0x173
#define A4XX_RBBM_PERFCTR_LOAD_VALUE_LO		0x174
#define A4XX_RBBM_PERFCTR_LOAD_VALUE_HI		0x175
#define A4XX_RBBM_PERFCTR_RBBM_SEL_0		0x176
#define A4XX_RBBM_PERFCTR_RBBM_SEL_1		0x177
#define A4XX_RBBM_PERFCTR_RBBM_SEL_2		0x178
@@ -454,6 +462,10 @@ enum a4xx_rb_perfctr_rb_sel {
#define A4XX_CP_PROTECT_STATUS		0x4da
#define A4XX_CP_PERFCTR_CP_SEL_0	0x500
#define A4XX_CP_PERFCTR_CP_SEL_1	0x501
#define A4XX_CP_PERFCTR_CP_SEL_2	0x502
#define A4XX_CP_PERFCTR_CP_SEL_3	0x503
#define A4XX_CP_PERFCTR_CP_SEL_4	0x504

#define A4XX_CP_SCRATCH_REG0		0x578

/* SP registers */
+2 −0
Original line number Diff line number Diff line
@@ -364,6 +364,8 @@ enum adreno_regs {
	ADRENO_REG_TP0_CHICKEN,
	ADRENO_REG_RBBM_RBBM_CTL,
	ADRENO_REG_UCHE_INVALIDATE0,
	ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
	ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
	ADRENO_REG_REGISTER_MAX,
};

+28 −13
Original line number Diff line number Diff line
@@ -1207,7 +1207,7 @@ static inline int active_countable(unsigned int countable)
 * Read all the physical performance counter's values and save them
 * before GPU power collapse.
 */
static void a3xx_perfcounter_save(struct adreno_device *adreno_dev)
void a3xx_perfcounter_save(struct adreno_device *adreno_dev)
{
	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
	struct adreno_perfcount_group *group;
@@ -1245,20 +1245,23 @@ static void a3xx_perfcounter_save(struct adreno_device *adreno_dev)
static void a3xx_perfcounter_write(struct adreno_device *adreno_dev,
				unsigned int group, unsigned int counter)
{
	struct kgsl_device *device = &(adreno_dev->dev);
	struct adreno_perfcount_register *reg;
	unsigned int val;

	reg = &(adreno_dev->gpudev->perfcounters->groups[group].regs[counter]);

	/* Clear the load cmd registers */
	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, 0);
	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, 0);
	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, 0);
	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, 0);
	if (adreno_is_a4xx(adreno_dev))
		adreno_writereg(adreno_dev,
			ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, 0);


	/* Write the saved value to PERFCTR_LOAD_VALUE* registers. */
	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_LO,
	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
			(uint32_t)reg->value);
	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_HI,
	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
			(uint32_t)(reg->value >> 32));

	/*
@@ -1268,10 +1271,16 @@ static void a3xx_perfcounter_write(struct adreno_device *adreno_dev,
	 */
	if (reg->load_bit < 32)	{
		val = 1 << reg->load_bit;
		kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, val);
	} else {
		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
			val);
	} else if (reg->load_bit < 64) {
		val  = 1 << (reg->load_bit - 32);
		kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, val);
		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
			val);
	} else if (reg->load_bit >= 64 && adreno_is_a4xx(adreno_dev)) {
		val = 1 << (reg->load_bit - 64);
		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2,
			val);
	}
}

@@ -1282,9 +1291,8 @@ static void a3xx_perfcounter_write(struct adreno_device *adreno_dev,
 * This function together with a3xx_perfcounter_save make sure that performance
 * counters are coherent across GPU power collapse.
 */
static void a3xx_perfcounter_restore(struct adreno_device *adreno_dev)
void a3xx_perfcounter_restore(struct adreno_device *adreno_dev)
{
	struct kgsl_device *device = &adreno_dev->dev;
	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
	struct adreno_perfcount_group *group;
	unsigned int regid, groupid;
@@ -1305,8 +1313,11 @@ static void a3xx_perfcounter_restore(struct adreno_device *adreno_dev)
	}

	/* Clear the load cmd registers */
	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, 0);
	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, 0);
	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, 0);
	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, 0);
	if (adreno_is_a4xx(adreno_dev))
		adreno_writereg(adreno_dev,
			ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, 0);

}

@@ -2267,6 +2278,10 @@ static unsigned int a3xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A3XX_RBBM_SW_RESET_CMD),
	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0,
			A3XX_UCHE_CACHE_INVALIDATE0_REG),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
				A3XX_RBBM_PERFCTR_LOAD_VALUE_LO),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
				A3XX_RBBM_PERFCTR_LOAD_VALUE_HI),
};

const struct adreno_reg_offsets a3xx_reg_offsets = {
+4 −1
Original line number Diff line number Diff line
/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -29,6 +29,9 @@ uint64_t a3xx_perfcounter_read(struct adreno_device *adreno_dev,
	unsigned int group, unsigned int counter);
void a3xx_perfcounter_disable(struct adreno_device *adreno_dev,
	unsigned int group, unsigned int counter);
void a3xx_perfcounter_save(struct adreno_device *adreno_dev);
void a3xx_perfcounter_restore(struct adreno_device *adreno_dev);

void a3xx_soft_reset(struct adreno_device *adreno_dev);
void a3xx_irq_setup(struct adreno_device *adreno_dev);
void a3xx_a4xx_err_callback(struct adreno_device *adreno_dev, int bit);
+17 −0
Original line number Diff line number Diff line
@@ -653,6 +653,10 @@ static unsigned int a4xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_RBBM_CTL, A4XX_RBBM_RBBM_CTL),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A4XX_RBBM_SW_RESET_CMD),
	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0, A4XX_UCHE_INVALIDATE0),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
				A4XX_RBBM_PERFCTR_LOAD_VALUE_LO),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
				A4XX_RBBM_PERFCTR_LOAD_VALUE_HI),
};

const struct adreno_reg_offsets a4xx_reg_offsets = {
@@ -665,6 +669,17 @@ static struct adreno_perfcount_register a4xx_perfcounters_cp[] = {
		A4XX_RBBM_PERFCTR_CP_0_HI, 0, A4XX_CP_PERFCTR_CP_SEL_0 },
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_1_LO,
		A4XX_RBBM_PERFCTR_CP_1_HI, 1, A4XX_CP_PERFCTR_CP_SEL_1 },
	/*
	 * The selector registers for 3, 5, and 7 are swizzled on the hardware.
	 * CP_4 and CP_6 are duped to SEL_2 and SEL_3 so we don't enable them
	 * here
	 */
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_3_LO,
		A4XX_RBBM_PERFCTR_CP_3_HI, 3, A4XX_CP_PERFCTR_CP_SEL_2 },
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_5_LO,
		A4XX_RBBM_PERFCTR_CP_5_HI, 5, A4XX_CP_PERFCTR_CP_SEL_3 },
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_7_LO,
		A4XX_RBBM_PERFCTR_CP_7_HI, 7, A4XX_CP_PERFCTR_CP_SEL_4 },
};

static struct adreno_perfcount_register a4xx_perfcounters_rbbm[] = {
@@ -1218,6 +1233,8 @@ struct adreno_gpudev adreno_a4xx_gpudev = {
	.start = a4xx_start,
	.perfcounter_enable = a3xx_perfcounter_enable,
	.perfcounter_read = a3xx_perfcounter_read,
	.perfcounter_save = a3xx_perfcounter_save,
	.perfcounter_restore = a3xx_perfcounter_restore,
	.fault_detect_start = a3xx_fault_detect_start,
	.fault_detect_stop = a3xx_fault_detect_stop,
	.invalid_countables = a4xx_perfctr_invalid_countables,