Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 042220c6 authored by Sushmita Susheelendra's avatar Sushmita Susheelendra
Browse files

msm: kgsl: Fix pagefault handler and stall on fault



On some platforms, certain IOMMU hardware state
might be cleared before reaching client fault handlers,
making it difficult for client fault handlers to do much
useful processing. Instead, use the pagefault information
passed in by the IOMMU fault handler. Eliminate the race
condition where the kworker thread could initiate recovery/GPU
reset before the pagefault handler has had a chance to run.
Fix the stall on fault path.

Change-Id: I2414a02ca55357a7eab863a973638b37846b82a1
Signed-off-by: default avatarSushmita Susheelendra <ssusheel@codeaurora.org>
parent e62ecb7f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -486,6 +486,7 @@
#define A5XX_RBBM_ALWAYSON_COUNTER_LO            0x4D2
#define A5XX_RBBM_ALWAYSON_COUNTER_HI            0x4D3
#define A5XX_RBBM_STATUS                         0x4F5
#define A5XX_RBBM_STATUS3                        0x530
#define A5XX_RBBM_INT_0_STATUS                   0x4E1
#define A5XX_RBBM_AHB_ME_SPLIT_STATUS            0x4F0
#define A5XX_RBBM_AHB_PFP_SPLIT_STATUS           0x4F1
+27 −10
Original line number Diff line number Diff line
@@ -1465,6 +1465,9 @@ static int _adreno_start(struct adreno_device *adreno_dev)

	adreno_perfcounter_start(adreno_dev);

	/* Clear FSR here in case it is set from a previous pagefault */
	kgsl_mmu_clear_fsr(&device->mmu);

	status = adreno_ringbuffer_cold_start(adreno_dev);

	if (status)
@@ -1606,29 +1609,41 @@ static int adreno_stop(struct kgsl_device *device)
	return 0;
}

static inline bool adreno_try_soft_reset(struct kgsl_device *device, int fault)
{
	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);

	/*
	 * Try soft reset for non mmu fault case only and if VBIF
	 * pipe clears cleanly.
	 * Skip soft reset and use hard reset for A304 GPU, As
	 * A304 is not able to do SMMU programming after soft reset.
	 */
	if (!(fault & ADRENO_IOMMU_PAGE_FAULT) &&
			!adreno_is_a304(adreno_dev) &&
			!adreno_vbif_clear_pending_transactions(device))
		return true;

	return false;
}

/**
 * adreno_reset() - Helper function to reset the GPU
 * @device: Pointer to the KGSL device structure for the GPU
 * @fault: Type of fault. Needed to skip soft reset for MMU fault
 *
 * Try to reset the GPU to recover from a fault.  First, try to do a low latency
 * soft reset.  If the soft reset fails for some reason, then bring out the big
 * guns and toggle the footswitch.
 */
int adreno_reset(struct kgsl_device *device)
int adreno_reset(struct kgsl_device *device, int fault)
{
	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
	int ret = -EINVAL;
	struct kgsl_mmu *mmu = &device->mmu;
	int i = 0;

	/*
	 * Try soft reset first, for non mmu fault case only and if VBIF
	 * pipe clears cleanly.
	 * Skip soft reset and use hard reset for A304 GPU, As
	 * A304 is not able to do SMMU programming after soft reset.
	 */
	if (!atomic_read(&mmu->fault) && !adreno_is_a304(adreno_dev)
		&& !adreno_vbif_clear_pending_transactions(device)) {
	/* Try soft reset first */
	if (adreno_try_soft_reset(device, fault)) {
		ret = adreno_soft_reset(device);
		if (ret)
			KGSL_DEV_ERR_ONCE(device, "Device soft reset failed\n");
@@ -1636,6 +1651,8 @@ int adreno_reset(struct kgsl_device *device)
	if (ret) {
		/* If soft reset failed/skipped, then pull the power */
		kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
		/* since device is officially off now clear start bit */
		clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);

		/* Keep trying to start the device until it works */
		for (i = 0; i < NUM_TIMES_RESET_RETRY; i++) {
+2 −1
Original line number Diff line number Diff line
@@ -431,6 +431,7 @@ enum adreno_regs {
	ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
	ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
	ADRENO_REG_RBBM_STATUS,
	ADRENO_REG_RBBM_STATUS3,
	ADRENO_REG_RBBM_PERFCTR_CTL,
	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
@@ -786,7 +787,7 @@ void adreno_snapshot(struct kgsl_device *device,
		struct kgsl_snapshot *snapshot,
		struct kgsl_context *context);

int adreno_reset(struct kgsl_device *device);
int adreno_reset(struct kgsl_device *device, int fault);

void adreno_fault_skipcmd_detached(struct kgsl_device *device,
					 struct adreno_context *drawctxt,
+1 −0
Original line number Diff line number Diff line
@@ -2063,6 +2063,7 @@ static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
				A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A5XX_RBBM_STATUS),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A5XX_RBBM_STATUS3),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A5XX_RBBM_PERFCTR_CNTL),
	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
					A5XX_RBBM_PERFCTR_LOAD_CMD0),
+22 −4
Original line number Diff line number Diff line
@@ -2088,6 +2088,7 @@ static int dispatcher_do_fault(struct kgsl_device *device)
	int ret, i;
	int fault;
	int halt;
	unsigned int reg_rbbm_status3;

	fault = atomic_xchg(&dispatcher->fault, 0);
	if (fault == 0)
@@ -2113,6 +2114,22 @@ static int dispatcher_do_fault(struct kgsl_device *device)
		return ret;
	}

	/*
	 * On A5xx, read RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) to
	 * tell if this function was entered after a pagefault. If so, only
	 * proceed if the fault handler has already run in the IRQ thread,
	 * else return early to give the fault handler a chance to run.
	 */
	if (adreno_is_a5xx(adreno_dev)) {
		mutex_lock(&device->mutex);
		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3,
				&reg_rbbm_status3);
		mutex_unlock(&device->mutex);
		if (reg_rbbm_status3 & BIT(24))
			if (!(fault & ADRENO_IOMMU_PAGE_FAULT))
				return 0;
	}

	/* Turn off all the timers */
	del_timer_sync(&dispatcher->timer);
	del_timer_sync(&dispatcher->fault_timer);
@@ -2168,9 +2185,6 @@ static int dispatcher_do_fault(struct kgsl_device *device)
		trace_adreno_cmdbatch_fault(cmdbatch, fault);
	}

	/* Set pagefault if it occurred */
	kgsl_mmu_set_pagefault(&device->mmu);

	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
		ADRENO_REG_CP_IB1_BASE_HI, &base);

@@ -2185,6 +2199,10 @@ static int dispatcher_do_fault(struct kgsl_device *device)
		kgsl_device_snapshot(device, cmdbatch->context);
	}

	/* Terminate the stalled transaction and resume the IOMMU */
	if (fault & ADRENO_IOMMU_PAGE_FAULT)
		kgsl_mmu_pagefault_resume(&device->mmu);

	/* Reset the dispatcher queue */
	dispatcher->inflight = 0;
	atomic_set(&dispatcher->preemption_state,
@@ -2193,7 +2211,7 @@ static int dispatcher_do_fault(struct kgsl_device *device)
	/* Reset the GPU and make sure halt is not set during recovery */
	halt = adreno_gpu_halt(adreno_dev);
	adreno_clear_gpu_halt(adreno_dev);
	ret = adreno_reset(device);
	ret = adreno_reset(device, fault);
	mutex_unlock(&device->mutex);
	/* if any other fault got in until reset then ignore */
	atomic_set(&dispatcher->fault, 0);
Loading