Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c381d24a authored by Harshdeep Dhatt's avatar Harshdeep Dhatt
Browse files

msm: kgsl: Trigger recovery upon GMU assert in hwscheduling



A GMU assert puts GMU in NMI so it will no longer be responsive
and hence triggering recovery is important. Also, trigger recovery
on WDOG or CM3 fault interrupt from GMU. Make sure snapshot and
reset is only triggered if device was active. Also, clear the
CM3 fault every SLUMBER exit.

Change-Id: I9d6a063470bcd082a9cbecc5b8dcf0a412713bb1
Signed-off-by: default avatarHarshdeep Dhatt <hdhatt@codeaurora.org>
parent c047705b
Loading
Loading
Loading
Loading
+1 −3
Original line number Diff line number Diff line
@@ -1523,9 +1523,7 @@ static void adreno_unbind(struct device *dev)

	kgsl_pwrscale_close(device);

	if (test_bit(GMU_DISPATCH, &device->gmu_core.flags))
		adreno_hwsched_dispatcher_close(adreno_dev);
	else {
	if (!test_bit(GMU_DISPATCH, &device->gmu_core.flags)) {
		adreno_dispatcher_close(adreno_dev);

		adreno_ringbuffer_close(adreno_dev);
+8 −0
Original line number Diff line number Diff line
@@ -1314,6 +1314,9 @@ void a6xx_gmu_register_config(struct adreno_device *adreno_dev)
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
	u32 gmu_log_info, chipid = 0;

	/* Clear any previously set cm3 fault */
	atomic_set(&gmu->cm3_fault, 0);

	/* Vote veto for FAL10 feature if supported*/
	if (a6xx_core->veto_fal10) {
		gmu_core_regwrite(device, A6XX_GPU_GMU_CX_GMU_CX_FAL_INTF, 0x1);
@@ -2063,6 +2066,11 @@ static irqreturn_t a6xx_gmu_irq_handler(int irq, void *data)

		dev_err_ratelimited(&gmu->pdev->dev,
				"GMU watchdog expired interrupt received\n");

		if (test_bit(GMU_DISPATCH, &device->gmu_core.flags)) {
			adreno_get_gpu_halt(adreno_dev);
			adreno_hwsched_set_fault(adreno_dev);
		}
	}
	if (status & GMU_INT_HOST_AHB_BUS_ERR)
		dev_err_ratelimited(&gmu->pdev->dev,
+8 −6
Original line number Diff line number Diff line
@@ -511,6 +511,8 @@ static int a6xx_hwsched_boot(struct adreno_device *adreno_dev)

	trace_kgsl_pwr_request_state(device, KGSL_STATE_ACTIVE);

	adreno_hwsched_start(adreno_dev);

	ret = a6xx_hwsched_gmu_boot(adreno_dev);
	if (ret)
		return ret;
@@ -519,8 +521,6 @@ static int a6xx_hwsched_boot(struct adreno_device *adreno_dev)
	if (ret)
		return ret;

	adreno_hwsched_start(adreno_dev);

	mod_timer(&device->idle_timer, jiffies +
			device->pwrctrl.interval_timeout);

@@ -543,6 +543,8 @@ static int a6xx_hwsched_first_boot(struct adreno_device *adreno_dev)
	if (test_bit(GMU_PRIV_FIRST_BOOT_DONE, &gmu->flags))
		return a6xx_hwsched_boot(adreno_dev);

	adreno_hwsched_start(adreno_dev);

	ret = a6xx_microcode_read(adreno_dev);
	if (ret)
		return ret;
@@ -565,10 +567,6 @@ static int a6xx_hwsched_first_boot(struct adreno_device *adreno_dev)
	if (ret)
		return ret;

	adreno_hwsched_init(adreno_dev);

	adreno_hwsched_start(adreno_dev);

	adreno_get_bus_counters(adreno_dev);

	adreno_dev->cooperative_reset = ADRENO_FEATURE(adreno_dev,
@@ -980,6 +978,8 @@ int a6xx_hwsched_probe(struct platform_device *pdev,

	adreno_dev->irq_mask = A6XX_HWSCHED_INT_MASK;

	adreno_hwsched_init(adreno_dev);

	return 0;
}

@@ -1017,6 +1017,8 @@ static void a6xx_hwsched_unbind(struct device *dev, struct device *master,
	struct kgsl_device *device = dev_get_drvdata(master);

	a6xx_gmu_remove(device);

	adreno_hwsched_dispatcher_close(ADRENO_DEVICE(device));
}

static const struct component_ops a6xx_hwsched_component_ops = {
+13 −1
Original line number Diff line number Diff line
@@ -247,11 +247,14 @@ static void process_dbgq_irq(struct adreno_device *adreno_dev)
{
	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
	u32 rcvd[MAX_RCVD_SIZE];
	bool recovery = false;

	while (a6xx_hfi_queue_read(gmu, HFI_DBG_ID, rcvd, sizeof(rcvd)) > 0) {

		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_ERR)
		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_ERR) {
			adreno_a6xx_receive_err_req(gmu, rcvd);
			recovery = true;
		}

		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_DEBUG)
			adreno_a6xx_receive_debug_req(gmu, rcvd);
@@ -259,6 +262,12 @@ static void process_dbgq_irq(struct adreno_device *adreno_dev)
		if (MSG_HDR_GET_ID(rcvd[0]) == F2H_MSG_LOG_BLOCK)
			adreno_a6xx_add_log_block(adreno_dev, rcvd);
	}

	if (!recovery)
		return;

	adreno_get_gpu_halt(adreno_dev);
	adreno_hwsched_set_fault(adreno_dev);
}

/* HFI interrupt handler */
@@ -293,6 +302,9 @@ static irqreturn_t a6xx_hwsched_hfi_handler(int irq, void *data)

		dev_err_ratelimited(&gmu->pdev->dev,
				"GMU CM3 fault interrupt received\n");

		adreno_get_gpu_halt(adreno_dev);
		adreno_hwsched_set_fault(adreno_dev);
	}

	/* Ignore OOB bits */
+3 −0
Original line number Diff line number Diff line
@@ -1221,6 +1221,9 @@ static void reset_and_snapshot(struct adreno_device *adreno_dev)
	struct cmd_list_obj *obj = get_fault_cmdobj(adreno_dev);
	struct adreno_hwsched *hwsched = to_hwsched(adreno_dev);

	if (device->state != KGSL_STATE_ACTIVE)
		return;

	if (!obj) {
		kgsl_device_snapshot(device, NULL, false);
		goto done;