Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f461e52b authored by Carter Cooper's avatar Carter Cooper
Browse files

msm: kgsl: Avoid an interrupt storm from the GPU



CP_CACHE_FLUSH interrupts can storm on very rare occasions.
Check for this interrupt storm and do nothing when it occurs
rather than thrashing the CPU which can occasionally bring the
system down.

Change-Id: I0528ad4fec43abfaeeba1499d0b0e51e14b09f0d
Signed-off-by: default avatarCarter Cooper <ccooper@codeaurora.org>
parent 6f19e644
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -749,12 +749,14 @@ static irqreturn_t adreno_irq_handler(struct kgsl_device *device)
		i = fls(tmp) - 1;

		if (irq_params->funcs[i].func != NULL) {
			if (irq_params->mask & BIT(i))
				irq_params->funcs[i].func(adreno_dev, i);
			ret = IRQ_HANDLED;
		} else
			KGSL_DRV_CRIT(device,
					"Unhandled interrupt bit %x\n", i);

		ret = IRQ_HANDLED;

		tmp &= ~BIT(i);
	}

+6 −1
Original line number Diff line number Diff line
/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved.
/* Copyright (c) 2008-2016, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -317,6 +317,7 @@ struct adreno_gpu_core {
 * @speed_bin: Indicate which power level set to use
 * @csdev: Pointer to a coresight device (if applicable)
 * @gpmu_throttle_counters - counteers for number of throttled clocks
 * @irq_storm_work: Worker to handle possible interrupt storms
 */
struct adreno_device {
	struct kgsl_device dev;    /* Must be first field in this struct */
@@ -377,6 +378,7 @@ struct adreno_device {

	struct coresight_device *csdev;
	uint32_t gpmu_throttle_counters[ADRENO_GPMU_THROTTLE_COUNTERS];
	struct work_struct irq_storm_work;
};

/**
@@ -398,6 +400,8 @@ struct adreno_device {
 * @ADRENO_DEVICE_GPMU_INITIALIZED - Set if GPMU firmware initialization succeed
 * @ADRENO_DEVICE_ISDB_ENABLED - Set if the Integrated Shader DeBugger is
 * attached and enabled
 * @ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED - Set if a CACHE_FLUSH_TS irq storm
 * is in progress
 */
enum adreno_device_flags {
	ADRENO_DEVICE_PWRON = 0,
@@ -413,6 +417,7 @@ enum adreno_device_flags {
	ADRENO_DEVICE_SOFT_FAULT_DETECT = 10,
	ADRENO_DEVICE_GPMU_INITIALIZED = 11,
	ADRENO_DEVICE_ISDB_ENABLED = 12,
	ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED = 13,
};

/**
+87 −2
Original line number Diff line number Diff line
/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -56,6 +56,8 @@ static const struct adreno_vbif_platform a5xx_vbif_platforms[] = {

#define PREEMPT_SMMU_RECORD(_field) \
		offsetof(struct a5xx_cp_smmu_info, _field)

static void a5xx_irq_storm_worker(struct work_struct *work);
static int _read_fw2_block_header(uint32_t *header, uint32_t id,
	uint32_t major, uint32_t minor);
static void a5xx_gpmu_reset(struct work_struct *work);
@@ -414,6 +416,7 @@ static void a5xx_init(struct adreno_device *adreno_dev)
	if (ADRENO_FEATURE(adreno_dev, ADRENO_GPMU))
		INIT_WORK(&adreno_dev->gpmu_work, a5xx_gpmu_reset);

	INIT_WORK(&adreno_dev->irq_storm_work, a5xx_irq_storm_worker);
	a5xx_crashdump_init(adreno_dev);
}

@@ -3042,6 +3045,88 @@ static void a5xx_err_callback(struct adreno_device *adreno_dev, int bit)
	}
}

static void a5xx_irq_storm_worker(struct work_struct *work)
{
	struct adreno_device *adreno_dev = container_of(work,
			struct adreno_device, irq_storm_work);
	struct kgsl_device *device = &adreno_dev->dev;
	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
	unsigned int status;

	mutex_lock(&device->mutex);

	/* Wait for the storm to clear up */
	do {
		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_CLEAR_CMD,
				BIT(A5XX_INT_CP_CACHE_FLUSH_TS));
		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_INT_0_STATUS,
				&status);
	} while (status & BIT(A5XX_INT_CP_CACHE_FLUSH_TS));

	/* Re-enable the interrupt bit in the mask */
	gpudev->irq->mask |= BIT(A5XX_INT_CP_CACHE_FLUSH_TS);
	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK,
			gpudev->irq->mask);
	clear_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, &adreno_dev->priv);

	KGSL_DRV_WARN(device, "Re-enabled A5XX_INT_CP_CACHE_FLUSH_TS");
	mutex_unlock(&device->mutex);

	/* Reschedule just to make sure everything retires */
	kgsl_schedule_work(&device->event_work);
	adreno_dispatcher_schedule(device);
}

static void a5xx_cp_callback(struct adreno_device *adreno_dev, int bit)
{
	struct kgsl_device *device = &adreno_dev->dev;
	unsigned int cur;
	static unsigned int count;
	static unsigned int prev;

	if (test_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, &adreno_dev->priv))
		return;

	kgsl_sharedmem_readl(&device->memstore, &cur,
			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
				ref_wait_ts));

	/*
	 * prev holds a previously read value
	 * from memory.  It should be changed by the GPU with every
	 * interrupt. If the value we know about and the value we just
	 * read are the same, then we are likely in a storm.
	 * If this happens twice, disable the interrupt in the mask
	 * so the dispatcher can take care of the issue. It is then
	 * up to the dispatcher to re-enable the mask once all work
	 * is done and the storm has ended.
	 */
	if (prev == cur) {
		count++;
		if (count == 2) {
			struct adreno_gpudev *gpudev =
				ADRENO_GPU_DEVICE(adreno_dev);

			/* disable interrupt from the mask */
			set_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED,
					&adreno_dev->priv);
			gpudev->irq->mask &= ~BIT(A5XX_INT_CP_CACHE_FLUSH_TS);
			adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK,
					gpudev->irq->mask);

			kgsl_schedule_work(&adreno_dev->irq_storm_work);

			return;
		}
	} else {
		count = 0;
		prev = cur;
	}

	kgsl_schedule_work(&device->event_work);
	adreno_dispatcher_schedule(device);
}

static const char *gpmu_int_msg[32] = {
	[FW_INTR_INFO] = "FW_INTR_INFO",
	[LLM_ACK_ERR_INTR] = "LLM_ACK_ERR_INTR",
@@ -3163,7 +3248,7 @@ static struct adreno_irq_funcs a5xx_irq_funcs[32] = {
	ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
	ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_WT_DONE_TS */
	ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNKNOWN_1 */
	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
	ADRENO_IRQ_CALLBACK(a5xx_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
	/* 21 - UNUSED_2 */
	ADRENO_IRQ_CALLBACK(NULL),
	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */
+16 −1
Original line number Diff line number Diff line
/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -515,6 +515,7 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
	struct kgsl_context *context = NULL;
	bool secured_ctxt = false;
	uint64_t cond_addr;
	static unsigned int _seq_cnt;

	if (drawctxt != NULL && kgsl_context_detached(&drawctxt->base) &&
		!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
@@ -565,6 +566,9 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,

	total_sizedwords += (secured_ctxt) ? 26 : 0;

	/* _seq mem write for each submission */
	total_sizedwords += 4;

	/* context rollover */
	if (adreno_is_a3xx(adreno_dev))
		total_sizedwords += 3;
@@ -713,6 +717,17 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
				&adreno_dev->ft_pf_policy))
		*ringcmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 0);

	/*
	 * Do a unique memory write from the GPU. This can be used in
	 * early detection of timestamp interrupt storms to stave
	 * off system collapse.
	 */
	*ringcmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
	ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr +
			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
				ref_wait_ts));
	*ringcmds++ = ++_seq_cnt;

	/*
	 * end-of-pipeline timestamp.  If per context timestamps is not
	 * enabled, then drawctxt will be NULL or internal command flag will be