Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit baa3fc8a authored by Puranam V G Tejaswi's avatar Puranam V G Tejaswi
Browse files

msm: kgsl: Abort poll for HFI response if GMU fails any assertion



If GMU firmware fails any assertion, NMI is triggered and GMU would be
in an unresponsive state. Hence abort polling for any HFI response
if GMU assertion fails. Also avoid sending NMI in that case.

Change-Id: I8073594c417ec2716d3a94b1fcfeeb4bde9031b0
Signed-off-by: default avatarPuranam V G Tejaswi <pvgtejas@codeaurora.org>
parent 55572f84
Loading
Loading
Loading
Loading
+35 −52
Original line number Diff line number Diff line
@@ -1874,27 +1874,45 @@ static unsigned int a6xx_gmu_ifpc_show(struct kgsl_device *device)
}

/* Send an NMI to the GMU */
static void a6xx_gmu_send_nmi(struct adreno_device *adreno_dev)
static void a6xx_gmu_send_nmi(struct adreno_device *adreno_dev, bool force)
{
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
	u32 val;

	if (!a6xx_gmu_gx_is_on(device))
		goto done;

	/*
	 * Do not send NMI if the SMMU is stalled because GMU will not be able
	 * to save cm3 state to DDR.
	 */
	if (a6xx_is_smmu_stalled(device)) {
		struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);

	if (a6xx_gmu_gx_is_on(device) && a6xx_is_smmu_stalled(device)) {
		dev_err(&gmu->pdev->dev,
			"Skipping NMI because SMMU is stalled\n");
		return;
	}

done:
	if (force)
		goto nmi;

	/*
	 * We should not send NMI if there was a CM3 fault reported because we
	 * don't want to overwrite the critical CM3 state captured by gmu before
	 * it sent the CM3 fault interrupt. Also don't send NMI if GMU reset is
	 * already active. We could have hit a GMU assert and NMI might have
	 * already been triggered.
	 */

	/* make sure we're reading the latest cm3_fault */
	smp_rmb();

	if (atomic_read(&gmu->cm3_fault))
		return;

	gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &val);

	if (val & 0xE00)
		return;

nmi:
	/* Mask so there's no interrupt caused by NMI */
	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_MASK, 0xFFFFFFFF);

@@ -1911,6 +1929,9 @@ static void a6xx_gmu_send_nmi(struct adreno_device *adreno_dev)

	/* Make sure the NMI is invoked before we proceed*/
	wmb();

	/* Wait for the NMI to be handled */
	udelay(200);
}

static void a6xx_gmu_cooperative_reset(struct kgsl_device *device)
@@ -1937,8 +1958,8 @@ static void a6xx_gmu_cooperative_reset(struct kgsl_device *device)
	 * If we dont get a snapshot ready from GMU, trigger NMI
	 * and if we still timeout then we just continue with reset.
	 */
	a6xx_gmu_send_nmi(adreno_dev);
	udelay(200);
	a6xx_gmu_send_nmi(adreno_dev, true);

	gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &result);
	if ((result & 0x800) != 0x800)
		dev_err(&gmu->pdev->dev,
@@ -2000,22 +2021,7 @@ static irqreturn_t a6xx_gmu_irq_handler(int irq, void *data)
		gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK,
				(mask | GMU_INT_WDOG_BITE));

		/* make sure we're reading the latest cm3_fault */
		smp_rmb();

		/*
		 * We should not send NMI if there was a CM3 fault reported
		 * because we don't want to overwrite the critical CM3 state
		 * captured by gmu before it sent the CM3 fault interrupt.
		 */
		if (!atomic_read(&gmu->cm3_fault))
			a6xx_gmu_send_nmi(adreno_dev);

		/*
		 * There is sufficient delay for the GMU to have finished
		 * handling the NMI before snapshot is taken, as the fault
		 * worker is scheduled below.
		 */
		a6xx_gmu_send_nmi(adreno_dev, false);

		dev_err_ratelimited(&gmu->pdev->dev,
				"GMU watchdog expired interrupt received\n");
@@ -2045,37 +2051,14 @@ static irqreturn_t a6xx_gmu_irq_handler(int irq, void *data)
	return IRQ_HANDLED;
}

static void a6xx_gmu_nmi(struct adreno_device *adreno_dev)
{
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);

	/* No need to nmi if it was a gpu fault */
	if (!device->gmu_fault)
		return;

	/* make sure we're reading the latest cm3_fault */
	smp_rmb();

	/*
	 * We should not send NMI if there was a CM3 fault reported because we
	 * don't want to overwrite the critical CM3 state captured by gmu before
	 * it sent the CM3 fault interrupt.
	 */
	if (!atomic_read(&gmu->cm3_fault)) {
		a6xx_gmu_send_nmi(adreno_dev);

		/* Wait for the NMI to be handled */
		udelay(100);
	}
}

void a6xx_gmu_snapshot(struct adreno_device *adreno_dev,
	struct kgsl_snapshot *snapshot)
{
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);

	a6xx_gmu_nmi(adreno_dev);
	/* No need to nmi if it was a gpu fault */
	if (device->gmu_fault)
		a6xx_gmu_send_nmi(adreno_dev, false);

	a6xx_gmu_device_snapshot(device, snapshot);

+14 −1
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
 * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved.
 */

#include <linux/delay.h>
@@ -268,6 +268,19 @@ static int poll_gmu_reg(struct adreno_device *adreno_dev,
		gmu_core_regread(device, offsetdwords, &val);
		if ((val & mask) == expected_val)
			return 0;

		/*
		 * If GMU firmware fails any assertion, error message is sent
		 * to KMD and NMI is triggered. So check if GMU is in NMI and
		 * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT
		 * contain GMU reset status. Non zero value here indicates that
		 * GMU reset is active, NMI handler would eventually complete
		 * and GMU would wait for recovery.
		 */
		gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &val);
		if (val & 0xE00)
			return -ETIMEDOUT;

		usleep_range(10, 100);
	}