Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 061ddc3b authored by Jordan Crouse's avatar Jordan Crouse
Browse files

msm: kgsl: Remove the emulated PWR counter group



a3xx had a dedicated block of counters that counted (among other things)
the GPU busy ticks for frequency scaling. That block went away in a5xx
but we kept the counter group around anyway and emulated it so that
the rest of the code didn't need to change.

Unfortunately, this won't fly for a6xx since the register we want to
use is up high in the GMU CX region and making it available through
a counter group means that we need to make the register accessible
from the command stream and that breaks register address protection
on some a6xx targets.

It turns out that nobody outside of the kernel really uses the PWR group
anyway, so we can safely remove it and just directly set up the counters
internally and use them.  Remove the power group for a5xx and a6xx and
for a3xx remove PWR:1 from the available pool of counters.  For each
target make the appropriate counter fixed function and always enabled.

Change-Id: Ic0dedbad4a134340f2b4f8db428ba682ee3d4788
Signed-off-by: default avatarJordan Crouse <jcrouse@codeaurora.org>
parent 013a2a0b
Loading
Loading
Loading
Loading
+17 −33
Original line number Diff line number Diff line
@@ -1559,7 +1559,6 @@ static int adreno_remove(struct platform_device *pdev)
	if (efuse_base != NULL)
		iounmap(efuse_base);

	adreno_perfcounter_close(adreno_dev);
	kgsl_device_platform_remove(device);

	gmu_core_remove(device);
@@ -1935,17 +1934,6 @@ static int _adreno_start(struct adreno_device *adreno_dev)
			adreno_support_64bit(adreno_dev))
		gpudev->enable_64bit(adreno_dev);

	if (adreno_dev->perfctr_pwr_lo == 0) {
		ret = adreno_perfcounter_get(adreno_dev,
			KGSL_PERFCOUNTER_GROUP_PWR, 1,
			&adreno_dev->perfctr_pwr_lo, NULL,
			PERFCOUNTER_FLAG_KERNEL);

		if (WARN_ONCE(ret, "Unable to get perfcounters for DCVS\n"))
			adreno_dev->perfctr_pwr_lo = 0;
	}


	if (device->pwrctrl.bus_control) {
		/* VBIF waiting for RAM */
		if (adreno_dev->starved_ram_lo == 0) {
@@ -3615,13 +3603,10 @@ static void adreno_power_stats(struct kgsl_device *device,
	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
	struct adreno_busy_data *busy = &adreno_dev->busy_data;
	int64_t adj = 0;
	u64 gpu_busy;

	memset(stats, 0, sizeof(*stats));

	/* Get the busy cycles counted since the counter was last reset */
	if (adreno_dev->perfctr_pwr_lo != 0) {
		uint64_t gpu_busy;

	gpu_busy = counter_delta(device, adreno_dev->perfctr_pwr_lo,
		&busy->gpu_busy);

@@ -3642,7 +3627,6 @@ static void adreno_power_stats(struct kgsl_device *device,
		stats->busy_time = adreno_ticks_to_us(gpu_busy,
			kgsl_pwrctrl_active_freq(pwr));
	}
	}

	if (device->pwrctrl.bus_control) {
		uint64_t ram_cycles = 0, starved_ram = 0;
+0 −3
Original line number Diff line number Diff line
@@ -891,7 +891,6 @@ struct adreno_gpudev {
	int (*rb_start)(struct adreno_device *adreno_dev);
	int (*microcode_read)(struct adreno_device *adreno_dev);
	void (*perfcounter_init)(struct adreno_device *adreno_dev);
	void (*perfcounter_close)(struct adreno_device *adreno_dev);
	void (*start)(struct adreno_device *adreno_dev);
	bool (*is_sptp_idle)(struct adreno_device *adreno_dev);
	int (*regulator_enable)(struct adreno_device *adreno_dev);
@@ -902,8 +901,6 @@ struct adreno_gpudev {
	int64_t (*read_throttling_counters)(struct adreno_device *adreno_dev);
	void (*count_throttles)(struct adreno_device *adreno_dev,
					uint64_t adj);
	int (*enable_pwr_counters)(struct adreno_device *adrneo_dev,
				unsigned int counter);
	unsigned int (*preemption_pre_ibsubmit)(
				struct adreno_device *adreno_dev,
				struct adreno_ringbuffer *rb,
+7 −16
Original line number Diff line number Diff line
@@ -611,6 +611,9 @@ static void a3xx_platform_setup(struct adreno_device *adreno_dev)

	gpudev->vbif_xin_halt_ctrl0_mask = A30X_VBIF_XIN_HALT_CTRL0_MASK;

	/* Set the GPU busy counter for frequency scaling */
	adreno_dev->perfctr_pwr_lo = A3XX_RBBM_PERFCTR_PWR_1_LO;

	/* Check efuse bits for various capabilties */
	a3xx_check_features(adreno_dev);
}
@@ -968,8 +971,10 @@ static struct adreno_perfcount_register a3xx_perfcounters_rb[] = {
static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = {
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO,
		A3XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 },
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_1_LO,
		A3XX_RBBM_PERFCTR_PWR_1_HI, -1, 0 },
	/*
	 * A3XX_RBBM_PERFCTR_PWR_1_LO is used for frequency scaling and removed
	 * from the pool of available counters
	 */
};

static struct adreno_perfcount_register a3xx_perfcounters_vbif[] = {
@@ -1063,19 +1068,6 @@ static void a3xx_perfcounter_init(struct adreno_device *adreno_dev)
		counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF_PWR].regs =
			a3xx_perfcounters_vbif2_pwr;
	}

	/*
	 * Enable the GPU busy count counter. This is a fixed counter on
	 * A3XX so we don't need to bother checking the return value
	 */
	adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
		NULL, NULL, PERFCOUNTER_FLAG_KERNEL);
}

static void a3xx_perfcounter_close(struct adreno_device *adreno_dev)
{
	adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
		PERFCOUNTER_FLAG_KERNEL);
}

/**
@@ -1514,7 +1506,6 @@ struct adreno_gpudev adreno_a3xx_gpudev = {
	.init = a3xx_init,
	.microcode_read = a3xx_microcode_read,
	.perfcounter_init = a3xx_perfcounter_init,
	.perfcounter_close = a3xx_perfcounter_close,
	.start = a3xx_start,
	.snapshot = a3xx_snapshot,
	.coresight = {&a3xx_coresight},
+10 −41
Original line number Diff line number Diff line
@@ -146,6 +146,9 @@ static void a5xx_platform_setup(struct adreno_device *adreno_dev)
	adreno_dev->lm_leakage = A530_DEFAULT_LEAKAGE;
	adreno_dev->speed_bin = 0;

	/* Set the GPU busy counter to use for frequency scaling */
	adreno_dev->perfctr_pwr_lo = A5XX_RBBM_PERFCTR_RBBM_0_LO;

	/* Check efuse bits for various capabilties */
	a5xx_check_features(adreno_dev);
}
@@ -1255,24 +1258,6 @@ static void a5xx_count_throttles(struct adreno_device *adreno_dev,
		adreno_dev->lm_threshold_cross = adj;
}

static int a5xx_enable_pwr_counters(struct adreno_device *adreno_dev,
		unsigned int counter)
{
	/*
	 * On 5XX we have to emulate the PWR counters which are physically
	 * missing. Program countable 6 on RBBM_PERFCTR_RBBM_0 as a substitute
	 * for PWR:1. Don't emulate PWR:0 as nobody uses it and we don't want
	 * to take away too many of the generic RBBM counters.
	 */

	if (counter == 0)
		return -EINVAL;

	kgsl_regwrite(KGSL_DEVICE(adreno_dev), A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);

	return 0;
}

/* FW driven idle 10% throttle */
#define IDLE_10PCT 0
/* number of cycles when clock is throttled by 50% (CRC) */
@@ -1444,6 +1429,9 @@ static void a5xx_start(struct adreno_device *adreno_dev)
	/* Make all blocks contribute to the GPU BUSY perf counter */
	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);

	/* Program RBBM counter 0 to report GPU busy for frequency scaling */
	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);

	/*
	 * Enable the RBBM error reporting bits.  This lets us get
	 * useful information on failure
@@ -2093,11 +2081,11 @@ static struct adreno_perfcount_register a5xx_perfcounters_cp[] = {
		A5XX_RBBM_PERFCTR_CP_7_HI, 7, A5XX_CP_PERFCTR_CP_SEL_7 },
};

static struct adreno_perfcount_register a5xx_perfcounters_rbbm[] = {
	/*
 * Note that PERFCTR_RBBM_0 is missing - it is used to emulate the PWR counters.
 * See below.
	 * A5XX_RBBM_PERFCTR_RBBM_0 is used for frequency scaling and omitted
	 * from the poool of available counters
	 */
static struct adreno_perfcount_register a5xx_perfcounters_rbbm[] = {
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_1_LO,
		A5XX_RBBM_PERFCTR_RBBM_1_HI, 9, A5XX_RBBM_PERFCTR_RBBM_SEL_1 },
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_2_LO,
@@ -2346,22 +2334,6 @@ static struct adreno_perfcount_register a5xx_perfcounters_alwayson[] = {
		A5XX_RBBM_ALWAYSON_COUNTER_HI, -1 },
};

/*
 * 5XX targets don't really have physical PERFCTR_PWR registers - we emulate
 * them using similar performance counters from the RBBM block. The difference
 * between using this group and the RBBM group is that the RBBM counters are
 * reloaded after a power collapse which is not how the PWR counters behaved on
 * legacy hardware. In order to limit the disruption on the rest of the system
 * we go out of our way to ensure backwards compatibility. Since RBBM counters
 * are in short supply, we don't emulate PWR:0 which nobody uses - mark it as
 * broken.
 */
static struct adreno_perfcount_register a5xx_perfcounters_pwr[] = {
	{ KGSL_PERFCOUNTER_BROKEN, 0, 0, 0, 0, -1, 0 },
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_0_LO,
		A5XX_RBBM_PERFCTR_RBBM_0_HI, -1, 0},
};

static struct adreno_perfcount_register a5xx_pwrcounters_sp[] = {
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_0_LO,
		A5XX_SP_POWER_COUNTER_0_HI, -1, A5XX_SP_POWERCTR_SP_SEL_0 },
@@ -2481,8 +2453,6 @@ static struct adreno_perfcount_group a5xx_perfcounter_groups
	A5XX_PERFCOUNTER_GROUP(SP, sp),
	A5XX_PERFCOUNTER_GROUP(RB, rb),
	A5XX_PERFCOUNTER_GROUP(VSC, vsc),
	A5XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr,
		ADRENO_PERFCOUNTER_GROUP_FIXED),
	A5XX_PERFCOUNTER_GROUP(VBIF, vbif),
	A5XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
		ADRENO_PERFCOUNTER_GROUP_FIXED),
@@ -3184,7 +3154,6 @@ struct adreno_gpudev adreno_a5xx_gpudev = {
	.pwrlevel_change_settings = a5xx_pwrlevel_change_settings,
	.read_throttling_counters = a5xx_read_throttling_counters,
	.count_throttles = a5xx_count_throttles,
	.enable_pwr_counters = a5xx_enable_pwr_counters,
	.preemption_pre_ibsubmit = a5xx_preemption_pre_ibsubmit,
	.preemption_yield_enable =
				a5xx_preemption_yield_enable,
+11 −37
Original line number Diff line number Diff line
@@ -631,6 +631,14 @@ static void a6xx_start(struct adreno_device *adreno_dev)
		kgsl_regwrite(device, A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE,
			0x1);

	/*
	 * Enable GMU power counter 0 to count GPU busy. This is applicable to
	 * all a6xx targets
	 */
	kgsl_regwrite(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
	kgsl_regrmw(device, A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, 0x20);
	kgsl_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0x1);

	a6xx_protect_init(adreno_dev);

	if (!patch_reglist && (adreno_dev->pwrup_reglist.gpuaddr != 0)) {
@@ -2211,13 +2219,6 @@ static struct adreno_perfcount_register a6xx_perfcounters_gbif_pwr[] = {
		A6XX_GBIF_PWR_CNT_HIGH2, -1, A6XX_GBIF_PERF_PWR_CNT_EN },
};

static struct adreno_perfcount_register a6xx_perfcounters_pwr[] = {
	{ KGSL_PERFCOUNTER_BROKEN, 0, 0, 0, 0, -1, 0 },
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0,
		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
		A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H, -1, 0 },
};

static struct adreno_perfcount_register a6xx_perfcounters_alwayson[] = {
	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A6XX_CP_ALWAYS_ON_COUNTER_LO,
		A6XX_CP_ALWAYS_ON_COUNTER_HI, -1 },
@@ -2293,8 +2294,6 @@ static struct adreno_perfcount_group a6xx_perfcounter_groups
	A6XX_PERFCOUNTER_GROUP_FLAGS(VBIF, vbif, 0),
	A6XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
		ADRENO_PERFCOUNTER_GROUP_FIXED),
	A6XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr,
		ADRENO_PERFCOUNTER_GROUP_FIXED),
	A6XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
		ADRENO_PERFCOUNTER_GROUP_FIXED),
	A6XX_POWER_COUNTER_GROUP(GPMU, gpmu),
@@ -2305,33 +2304,6 @@ static struct adreno_perfcounters a6xx_perfcounters = {
	ARRAY_SIZE(a6xx_perfcounter_groups),
};

/* Program the GMU power counter to count GPU busy cycles */
static int a6xx_enable_pwr_counters(struct adreno_device *adreno_dev,
		unsigned int counter)
{
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);

	/*
	 * We have a limited number of power counters. Since we're not using
	 * total GPU cycle count, return error if requested.
	 */
	if (counter == 0)
		return -EINVAL;

	/* We can use GPU without GMU and allow it to count GPU busy cycles */
	if (!gmu_core_isenabled(device) &&
			!kgsl_is_register_offset(device,
				A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK))
		return -ENODEV;

	kgsl_regwrite(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xFF000000);
	kgsl_regrmw(device,
			A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xFF, 0x20);
	kgsl_regwrite(device, A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0x1);

	return 0;
}

static void a6xx_efuse_speed_bin(struct adreno_device *adreno_dev)
{
	unsigned int val;
@@ -2391,6 +2363,9 @@ static void a6xx_platform_setup(struct adreno_device *adreno_dev)
		gpudev->vbif_xin_halt_ctrl0_mask =
				A6XX_VBIF_XIN_HALT_CTRL0_MASK;

	/* Set the GPU busy counter for frequency scaling */
	adreno_dev->perfctr_pwr_lo = A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L;

	/* Check efuse bits for various capabilties */
	a6xx_check_features(adreno_dev);
}
@@ -2664,7 +2639,6 @@ struct adreno_gpudev adreno_a6xx_gpudev = {
	.regulator_enable = a6xx_sptprac_enable,
	.regulator_disable = a6xx_sptprac_disable,
	.perfcounters = &a6xx_perfcounters,
	.enable_pwr_counters = a6xx_enable_pwr_counters,
	.read_throttling_counters = a6xx_read_throttling_counters,
	.microcode_read = a6xx_microcode_read,
	.enable_64bit = a6xx_enable_64bit,
Loading