Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 75e814e0 authored by qctecmdr's avatar qctecmdr Committed by Gerrit - the friendly Code Review server
Browse files

Merge "msm: kgsl: Abort poll for HFI response if GMU fails any assertion"

parents 317d25c7 baa3fc8a
Loading
Loading
Loading
Loading
+35 −52
Original line number Diff line number Diff line
@@ -1871,27 +1871,45 @@ static unsigned int a6xx_gmu_ifpc_show(struct kgsl_device *device)
}

/* Send an NMI to the GMU */
static void a6xx_gmu_send_nmi(struct adreno_device *adreno_dev)
static void a6xx_gmu_send_nmi(struct adreno_device *adreno_dev, bool force)
{
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
	u32 val;

	if (!a6xx_gmu_gx_is_on(device))
		goto done;

	/*
	 * Do not send NMI if the SMMU is stalled because GMU will not be able
	 * to save cm3 state to DDR.
	 */
	if (a6xx_is_smmu_stalled(device)) {
		struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);

	if (a6xx_gmu_gx_is_on(device) && a6xx_is_smmu_stalled(device)) {
		dev_err(&gmu->pdev->dev,
			"Skipping NMI because SMMU is stalled\n");
		return;
	}

done:
	if (force)
		goto nmi;

	/*
	 * We should not send NMI if there was a CM3 fault reported because we
	 * don't want to overwrite the critical CM3 state captured by gmu before
	 * it sent the CM3 fault interrupt. Also don't send NMI if GMU reset is
	 * already active. We could have hit a GMU assert and NMI might have
	 * already been triggered.
	 */

	/* make sure we're reading the latest cm3_fault */
	smp_rmb();

	if (atomic_read(&gmu->cm3_fault))
		return;

	gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &val);

	if (val & 0xE00)
		return;

nmi:
	/* Mask so there's no interrupt caused by NMI */
	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_MASK, 0xFFFFFFFF);

@@ -1908,6 +1926,9 @@ static void a6xx_gmu_send_nmi(struct adreno_device *adreno_dev)

	/* Make sure the NMI is invoked before we proceed*/
	wmb();

	/* Wait for the NMI to be handled */
	udelay(200);
}

static void a6xx_gmu_cooperative_reset(struct kgsl_device *device)
@@ -1934,8 +1955,8 @@ static void a6xx_gmu_cooperative_reset(struct kgsl_device *device)
	 * If we dont get a snapshot ready from GMU, trigger NMI
	 * and if we still timeout then we just continue with reset.
	 */
	a6xx_gmu_send_nmi(adreno_dev);
	udelay(200);
	a6xx_gmu_send_nmi(adreno_dev, true);

	gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &result);
	if ((result & 0x800) != 0x800)
		dev_err(&gmu->pdev->dev,
@@ -1997,22 +2018,7 @@ static irqreturn_t a6xx_gmu_irq_handler(int irq, void *data)
		gmu_core_regwrite(device, A6XX_GMU_AO_HOST_INTERRUPT_MASK,
				(mask | GMU_INT_WDOG_BITE));

		/* make sure we're reading the latest cm3_fault */
		smp_rmb();

		/*
		 * We should not send NMI if there was a CM3 fault reported
		 * because we don't want to overwrite the critical CM3 state
		 * captured by gmu before it sent the CM3 fault interrupt.
		 */
		if (!atomic_read(&gmu->cm3_fault))
			a6xx_gmu_send_nmi(adreno_dev);

		/*
		 * There is sufficient delay for the GMU to have finished
		 * handling the NMI before snapshot is taken, as the fault
		 * worker is scheduled below.
		 */
		a6xx_gmu_send_nmi(adreno_dev, false);

		dev_err_ratelimited(&gmu->pdev->dev,
				"GMU watchdog expired interrupt received\n");
@@ -2042,37 +2048,14 @@ static irqreturn_t a6xx_gmu_irq_handler(int irq, void *data)
	return IRQ_HANDLED;
}

static void a6xx_gmu_nmi(struct adreno_device *adreno_dev)
{
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);

	/* No need to nmi if it was a gpu fault */
	if (!device->gmu_fault)
		return;

	/* make sure we're reading the latest cm3_fault */
	smp_rmb();

	/*
	 * We should not send NMI if there was a CM3 fault reported because we
	 * don't want to overwrite the critical CM3 state captured by gmu before
	 * it sent the CM3 fault interrupt.
	 */
	if (!atomic_read(&gmu->cm3_fault)) {
		a6xx_gmu_send_nmi(adreno_dev);

		/* Wait for the NMI to be handled */
		udelay(100);
	}
}

void a6xx_gmu_snapshot(struct adreno_device *adreno_dev,
	struct kgsl_snapshot *snapshot)
{
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);

	a6xx_gmu_nmi(adreno_dev);
	/* No need to nmi if it was a gpu fault */
	if (device->gmu_fault)
		a6xx_gmu_send_nmi(adreno_dev, false);

	a6xx_gmu_device_snapshot(device, snapshot);

+14 −1
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
 * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved.
 */

#include <linux/delay.h>
@@ -268,6 +268,19 @@ static int poll_gmu_reg(struct adreno_device *adreno_dev,
		gmu_core_regread(device, offsetdwords, &val);
		if ((val & mask) == expected_val)
			return 0;

		/*
		 * If GMU firmware fails any assertion, error message is sent
		 * to KMD and NMI is triggered. So check if GMU is in NMI and
		 * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT
		 * contain GMU reset status. Non zero value here indicates that
		 * GMU reset is active, NMI handler would eventually complete
		 * and GMU would wait for recovery.
		 */
		gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &val);
		if (val & 0xE00)
			return -ETIMEDOUT;

		usleep_range(10, 100);
	}