Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ece8d2dc authored by Hareesh Gundu's avatar Hareesh Gundu Committed by Gerrit - the friendly Code Review server
Browse files

msm: kgsl: Halt RGMU execution before dumping snapshot



On GPU and GMU soft fault detection there is possibility
of putting GX in OFF state during the snapshot dump path.
Ensure that RGMU is halted before dump the snapshot to
avoid getting stale data from the GX registers.

Also add some additional debug register dumps to
analyze rgmu_wait_for_lowest_idle() failure.

Change-Id: I04223a1bc566c6bd960ae4141506fbddfbd59533
Signed-off-by: default avatarHareesh Gundu <hareeshg@codeaurora.org>
parent 09dd55fa
Loading
Loading
Loading
Loading
+75 −4
Original line number Diff line number Diff line
@@ -263,26 +263,68 @@ static bool a6xx_rgmu_gx_is_on(struct kgsl_device *device)
static int a6xx_rgmu_wait_for_lowest_idle(struct kgsl_device *device)
{
	struct rgmu_device *rgmu = KGSL_RGMU_DEVICE(device);
	unsigned int reg;
	unsigned int reg[10] = {0};
	unsigned long t;
	uint64_t ts1, ts2, ts3;

	if (!gmu_core_isenabled(device) ||
			rgmu->idle_level != GPU_HW_IFPC)
		return 0;

	ts1 = read_AO_counter(device);

	t = jiffies + msecs_to_jiffies(RGMU_IDLE_TIMEOUT);
	do {
		gmu_core_regread(device,
			A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &reg);
			A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &reg[0]);

		if (reg & GX_GDSC_POWER_OFF)
		if (reg[0] & GX_GDSC_POWER_OFF)
			return 0;

		/* Wait 10us to reduce unnecessary AHB bus traffic */
		usleep_range(10, 100);
	} while (!time_after(jiffies, t));

	dev_err(&rgmu->pdev->dev, "Timeout waiting for lowest idle:%x\n", reg);
	ts2 = read_AO_counter(device);

	/* Do one last read incase it succeeds */
	gmu_core_regread(device,
		A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, &reg[0]);

	if (reg[0] & GX_GDSC_POWER_OFF)
		return 0;

	ts3 = read_AO_counter(device);

	/* Collect abort data to help with debugging */
	gmu_core_regread(device, A6XX_RGMU_CX_PCC_DEBUG, &reg[1]);
	gmu_core_regread(device, A6XX_RGMU_CX_PCC_STATUS, &reg[2]);
	gmu_core_regread(device, A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, &reg[3]);
	gmu_core_regread(device, A6XX_CP_STATUS_1, &reg[4]);
	gmu_core_regread(device, A6XX_GMU_RBBM_INT_UNMASKED_STATUS, &reg[5]);
	gmu_core_regread(device, A6XX_GMU_GMU_PWR_COL_KEEPALIVE, &reg[6]);
	gmu_core_regread(device, A6XX_CP_CP2GMU_STATUS, &reg[7]);
	gmu_core_regread(device, A6XX_CP_CONTEXT_SWITCH_CNTL, &reg[8]);
	gmu_core_regread(device, A6XX_GMU_AO_SPARE_CNTL, &reg[9]);

	dev_err(&rgmu->pdev->dev,
		"----------------------[ RGMU error ]----------------------\n");
	dev_err(&rgmu->pdev->dev, "Timeout waiting for lowest idle level\n");
	dev_err(&rgmu->pdev->dev,
			"Timestamps: %llx %llx %llx\n", ts1, ts2, ts3);
	dev_err(&rgmu->pdev->dev,
			"SPTPRAC_PWR_CLK_STATUS=%x PCC_DEBUG=%x PCC_STATUS=%x\n",
			reg[0], reg[1], reg[2]);
	dev_err(&rgmu->pdev->dev,
			"CX_BUSY_STATUS=%x CP_STATUS_1=%x\n", reg[3], reg[4]);
	dev_err(&rgmu->pdev->dev,
			"RBBM_INT_UNMASKED_STATUS=%x PWR_COL_KEEPALIVE=%x\n",
			reg[5], reg[6]);
	dev_err(&rgmu->pdev->dev,
			"CP2GMU_STATUS=%x CONTEXT_SWITCH_CNTL=%x AO_SPARE_CNTL=%x\n",
			reg[7], reg[8], reg[9]);

	WARN_ON(1);
	return -ETIMEDOUT;
}

@@ -488,6 +530,34 @@ static int a6xx_rgmu_load_firmware(struct kgsl_device *device)
	return rgmu->fw_hostptr ? 0 : -ENOMEM;
}

/* Halt RGMU execution */
static void a6xx_rgmu_halt_execution(struct kgsl_device *device)
{
	struct rgmu_device *rgmu = KGSL_RGMU_DEVICE(device);
	unsigned int index, status, fence;

	gmu_core_regread(device, A6XX_RGMU_CX_PCC_DEBUG, &index);
	gmu_core_regread(device, A6XX_RGMU_CX_PCC_STATUS, &status);
	gmu_core_regread(device, A6XX_GMU_AO_AHB_FENCE_CTRL, &fence);

	dev_err(&rgmu->pdev->dev,
			"RGMU Fault PCC_DEBUG:0x%x PCC_STATUS:0x%x FENCE_CTRL:0x%x\n",
			index, status, fence);

	/*
	 * Write 0 to halt RGMU execution. We halt it in GMU/GPU fault and
	 * re start PCC execution in recovery path.
	 */
	gmu_core_regwrite(device, A6XX_RGMU_CX_PCC_CTRL, 0);

	/*
	 * Ensure that fence is in allow mode after halting RGMU.
	 * After halting RGMU we dump snapshot.
	 */
	gmu_core_regwrite(device, A6XX_GMU_AO_AHB_FENCE_CTRL, 0);

}

/*
 * a6xx_rgmu_snapshot() - A6XX GMU snapshot function
 * @adreno_dev: Device being snapshotted
@@ -517,6 +587,7 @@ struct gmu_dev_ops adreno_a6xx_rgmudev = {
	.ifpc_store = a6xx_rgmu_ifpc_store,
	.ifpc_show = a6xx_rgmu_ifpc_show,
	.snapshot = a6xx_rgmu_snapshot,
	.halt_execution = a6xx_rgmu_halt_execution,
	.gmu2host_intr_mask = RGMU_OOB_IRQ_MASK,
	.gmu_ao_intr_mask = RGMU_AO_IRQ_MASK,
};
+1 −0
Original line number Diff line number Diff line
@@ -153,6 +153,7 @@ struct gmu_dev_ops {
	unsigned int (*ifpc_show)(struct kgsl_device *device);
	void (*snapshot)(struct kgsl_device *device,
		struct kgsl_snapshot *shapshot);
	void (*halt_execution)(struct kgsl_device *device);
	int (*wait_for_active_transition)(struct kgsl_device *device);
	const unsigned int gmu2host_intr_mask;
	const unsigned int gmu_ao_intr_mask;
+6 −0
Original line number Diff line number Diff line
@@ -235,6 +235,12 @@ static void rgmu_snapshot(struct kgsl_device *device)
	/* Make sure the interrupt is masked */
	wmb();

	/*
	 * Halt RGMU execution so that GX will not
	 * be collapsed while dumping snapshot.
	 */
	gmu_dev_ops->halt_execution(device);

	kgsl_device_snapshot(device, NULL, true);

	adreno_write_gmureg(adreno_dev,