Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 58ea104d authored by Shubhraprakash Das's avatar Shubhraprakash Das
Browse files

msm: kgsl: Add hang interrupt and change HW fault generation



Add a sysfs property to dynamically control HW hang interrupt and
enable it for A4XX by default. Also, HW fault only needs to
be generated for fatal interrupts only.

Change-Id: Ib50c57476e54cccfef0ae18a0cbb42fce29457c1
Signed-off-by: default avatarShubhraprakash Das <sadas@codeaurora.org>
parent d3394510
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -14,6 +14,18 @@
#ifndef _A4XX_REG_H
#define _A4XX_REG_H

/* A4XX interrupt bit that are different from A3XX */
#define A4XX_INT_RBBM_ETS_MS_TIMEOUT		5
#define A4XX_INT_RBBM_ASYNC_OVERFLOW		6
#define A4XX_INT_RBBM_GPC_ERR			7
#define A4XX_INT_CP_OPCODE_ERR			9
#define A4XX_INT_CP_SW				8
#define A4XX_INT_RBBM_ATB_BUS_OVERFLOW		22
#define A4XX_INT_RBBM_DPM_CALC_ERR		28
#define A4XX_INT_RBBM_DPM_EPOCH_ERR		29
#define A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR	30
#define A4XX_INT_RBBM_DPM_THERMAL_RED_ERR	31

/* RB registers */
#define A4XX_RB_GMEM_BASE_ADDR		0xcc0

+93 −1
Original line number Diff line number Diff line
@@ -1807,6 +1807,13 @@ static int adreno_init(struct kgsl_device *device)
	for (i = 6; i < FT_DETECT_REGS_COUNT; i++)
		ft_detect_regs[i] = 0;

	/* turn on hang interrupt for A4XX by default */
	if (adreno_is_a4xx(adreno_dev))
		set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);

	if (adreno_dev->gpudev->irq_setup)
		adreno_dev->gpudev->irq_setup(adreno_dev);

	ret = adreno_perfcounter_init(device);

	/* Power down the device */
@@ -2015,7 +2022,7 @@ int adreno_reset(struct kgsl_device *device)
 * _ft_sysfs_store() -  Common routine to write to FT sysfs files
 * @buf: value to write
 * @count: size of the value to write
 * @sysfs_cfg: KGSL FT sysfs config to write
 * @ptr: pointer to config to write
 *
 * This is a common routine to write to FT sysfs files.
 */
@@ -2242,6 +2249,89 @@ static ssize_t _ft_long_ib_detect_show(struct device *dev,
}


/**
 * _ft_hang_intr_status_store -  Routine to enable/disable h/w hang interrupt
 * @dev: device ptr
 * @attr: Device attribute
 * @buf: value to write
 * @count: size of the value to write
 */
static ssize_t _ft_hang_intr_status_store(struct device *dev,
				struct device_attribute *attr,
				const char *buf, size_t count)
{
	unsigned int new_setting, old_setting;
	struct kgsl_device *device = kgsl_device_from_dev(dev);
	struct adreno_device *adreno_dev;
	int ret;
	if (device == NULL)
		return 0;
	adreno_dev = ADRENO_DEVICE(device);

	mutex_lock(&device->mutex);
	ret = _ft_sysfs_store(buf, count, &new_setting);
	if (ret != count)
		goto done;
	if (new_setting)
		new_setting = 1;
	old_setting =
		(test_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv) ? 1 : 0);
	if (new_setting != old_setting) {
		if (new_setting)
			set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
		else
			clear_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
		/* Set the new setting based on device state */
		switch (device->state) {
		case KGSL_STATE_NAP:
		case KGSL_STATE_SLEEP:
			kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON,
					device->state);
		case KGSL_STATE_ACTIVE:
			adreno_dev->gpudev->irq_control(adreno_dev, 1);
		/*
		 * For following states setting will be picked up on device
		 * start. Still need them in switch statement to differentiate
		 * from default
		 */
		case KGSL_STATE_SLUMBER:
		case KGSL_STATE_SUSPEND:
			break;
		default:
			ret = -EACCES;
			/* reset back to old setting on error */
			if (new_setting)
				clear_bit(ADRENO_DEVICE_HANG_INTR,
					&adreno_dev->priv);
			else
				set_bit(ADRENO_DEVICE_HANG_INTR,
					&adreno_dev->priv);
			goto done;
		}
	}
done:
	mutex_unlock(&device->mutex);
	return ret;
}

/**
 * _ft_hang_intr_status_show() -  Routine to read hardware hang interrupt
 * enablement
 * @dev: device ptr
 * @attr: Device attribute
 * @buf: value read
 */
static ssize_t _ft_hang_intr_status_show(struct device *dev,
					struct device_attribute *attr,
					char *buf)
{
	struct adreno_device *adreno_dev = _get_adreno_dev(dev);
	if (adreno_dev == NULL)
		return 0;
	return snprintf(buf, PAGE_SIZE, "%d\n",
		test_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv) ? 1 : 0);
}

#define FT_DEVICE_ATTR(name) \
	DEVICE_ATTR(name, 0644,	_ ## name ## _show, _ ## name ## _store);

@@ -2249,6 +2339,7 @@ FT_DEVICE_ATTR(ft_policy);
FT_DEVICE_ATTR(ft_pagefault_policy);
FT_DEVICE_ATTR(ft_fast_hang_detect);
FT_DEVICE_ATTR(ft_long_ib_detect);
FT_DEVICE_ATTR(ft_hang_intr_status);


const struct device_attribute *ft_attr_list[] = {
@@ -2256,6 +2347,7 @@ const struct device_attribute *ft_attr_list[] = {
	&dev_attr_ft_pagefault_policy,
	&dev_attr_ft_fast_hang_detect,
	&dev_attr_ft_long_ib_detect,
	&dev_attr_ft_hang_intr_status,
	NULL,
};

+14 −0
Original line number Diff line number Diff line
@@ -197,6 +197,7 @@ enum adreno_device_flags {
	ADRENO_DEVICE_PWRON_FIXUP = 1,
	ADRENO_DEVICE_INITIALIZED = 2,
	ADRENO_DEVICE_CORESIGHT = 3,
	ADRENO_DEVICE_HANG_INTR = 4,
};

#define PERFCOUNTER_FLAG_NONE 0x0
@@ -412,6 +413,17 @@ struct adreno_coresight {
};


struct adreno_irq_funcs {
	void (*func)(struct adreno_device *, int);
};
#define ADRENO_IRQ_CALLBACK(_c) { .func = _c }

struct adreno_irq {
	unsigned int mask;
	struct adreno_irq_funcs *funcs;
	int funcs_count;
};

struct adreno_gpudev {
	/*
	 * These registers are in a different location on different devices,
@@ -427,11 +439,13 @@ struct adreno_gpudev {

	struct adreno_coresight *coresight;

	struct adreno_irq *irq;
	/* GPU specific function hooks */
	int (*ctxt_create)(struct adreno_device *, struct adreno_context *);
	irqreturn_t (*irq_handler)(struct adreno_device *);
	void (*irq_control)(struct adreno_device *, int);
	unsigned int (*irq_pending)(struct adreno_device *);
	void (*irq_setup)(struct adreno_device *);
	void * (*snapshot)(struct adreno_device *, void *, int *, int);
	int (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *);
	int (*perfcounter_init)(struct adreno_device *);
+128 −52
Original line number Diff line number Diff line
@@ -3134,7 +3134,13 @@ int a3xx_rb_init(struct adreno_device *adreno_dev,
	return 0;
}

static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
/*
 * a3xx_a4xx_err_callback() - Common interrupts shared between A4XX
 * and A3XX
 * @adreno_dev: Pointer to device
 * @bit: Interrupt bit
 */
void a3xx_a4xx_err_callback(struct adreno_device *adreno_dev, int bit)
{
	struct kgsl_device *device = &adreno_dev->dev;
	const char *err = "";
@@ -3175,17 +3181,8 @@ static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
	case A3XX_INT_RBBM_PFP_MS_TIMEOUT:
		err = "RBBM: PFP master split timeout";
		break;
	case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
		err = "RBBM: ATB bus oveflow";
		break;
	case A3XX_INT_VFD_ERROR:
		err = "VFD: Out of bounds access";
		break;
	case A3XX_INT_CP_T0_PACKET_IN_IB:
		err = "ringbuffer TO packet in IB interrupt";
		break;
	case A3XX_INT_CP_OPCODE_ERROR:
		err = "ringbuffer opcode error interrupt";
	case A3XX_INT_UCHE_OOB_ACCESS:
		err = "UCHE:  Out of bounds access";
		break;
	case A3XX_INT_CP_RESERVED_BIT_ERROR:
		err = "ringbuffer reserved bit error interrupt";
@@ -3209,21 +3206,64 @@ static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
	case A3XX_INT_MISC_HANG_DETECT:
		err = "MISC: GPU hang detected";
		break;
	case A3XX_INT_UCHE_OOB_ACCESS:
		err = "UCHE:  Out of bounds access";
	}
done:
	KGSL_DRV_CRIT(device, "%s\n", err);
}

static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
{
	struct kgsl_device *device = &adreno_dev->dev;
	const char *err = "";

	switch (bit) {
	case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
		err = "RBBM: ATB bus oveflow";
		break;
	case A3XX_INT_VFD_ERROR:
		err = "VFD: Out of bounds access";
		break;
	case A3XX_INT_CP_OPCODE_ERROR:
		err = "ringbuffer opcode error interrupt";
		break;
	case A3XX_INT_CP_T0_PACKET_IN_IB:
		err = "ringbuffer TO packet in IB interrupt";
		break;
	default:
		return;
	}
	KGSL_DRV_CRIT(device, "%s\n", err);
}

/*
 * a3xx_fatal_err_callback() - Isr for fatal interrupts that hang GPU
 * @adreno_dev: Pointer to device
 * @bit: Interrupt bit
 *
 * Called for both A4XX and A3XX
 */
void a3xx_fatal_err_callback(struct adreno_device *adreno_dev, int bit)
{
	struct kgsl_device *device = &adreno_dev->dev;

	/* Call the other error routines to get the error print */
	switch (bit) {
	case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
	case A3XX_INT_VFD_ERROR:
	case A3XX_INT_CP_T0_PACKET_IN_IB:
		a3xx_err_callback(adreno_dev, bit);
		break;
	default:
		a3xx_a4xx_err_callback(adreno_dev, bit);
	}

	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);

done:
	/* Trigger a fault in the dispatcher - this will effect a restart */
	adreno_dispatcher_irq_fault(device);
}

static void a3xx_gpu_idle_callback(struct adreno_device *adreno_dev,
void a3xx_gpu_idle_callback(struct adreno_device *adreno_dev,
					int irq)
{
	struct kgsl_device *device = &adreno_dev->dev;
@@ -3245,7 +3285,7 @@ static void a3xx_gpu_idle_callback(struct adreno_device *adreno_dev,
 * Handle the cp interrupt generated by GPU, common function between a3xx and
 * a4xx devices
 */
static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
{
	struct kgsl_device *device = &adreno_dev->dev;

@@ -3699,8 +3739,6 @@ static void a3xx_perfcounter_restore(struct adreno_device *adreno_dev)

}

#define A3XX_IRQ_CALLBACK(_c) { .func = _c }

#define A3XX_INT_MASK \
	((1 << A3XX_INT_RBBM_GPU_IDLE) |         \
	 (1 << A3XX_INT_RBBM_AHB_ERROR) |        \
@@ -3716,38 +3754,70 @@ static void a3xx_perfcounter_restore(struct adreno_device *adreno_dev)
	 (1 << A3XX_INT_CP_AHB_ERROR_HALT) |     \
	 (1 << A3XX_INT_UCHE_OOB_ACCESS))

static struct {
	void (*func)(struct adreno_device *, int);
} a3xx_irq_funcs[] = {
	A3XX_IRQ_CALLBACK(a3xx_gpu_idle_callback), /* 0 - RBBM_GPU_IDLE */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 1 - RBBM_AHB_ERROR */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 2 - RBBM_REG_TIMEOUT */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 3 - RBBM_ME_MS_TIMEOUT */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 4 - RBBM_PFP_MS_TIMEOUT */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 5 - RBBM_ATB_BUS_OVERFLOW */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 6 - RBBM_VFD_ERROR */
	A3XX_IRQ_CALLBACK(NULL),	       /* 7 - CP_SW */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 8 - CP_T0_PACKET_IN_IB */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 9 - CP_OPCODE_ERROR */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 10 - CP_RESERVED_BIT_ERROR */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 11 - CP_HW_FAULT */
	A3XX_IRQ_CALLBACK(NULL),	       /* 12 - CP_DMA */
	A3XX_IRQ_CALLBACK(a3xx_cp_callback),   /* 13 - CP_IB2_INT */
	A3XX_IRQ_CALLBACK(a3xx_cp_callback),   /* 14 - CP_IB1_INT */
	A3XX_IRQ_CALLBACK(a3xx_cp_callback),   /* 15 - CP_RB_INT */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 16 - CP_REG_PROTECT_FAULT */
	A3XX_IRQ_CALLBACK(NULL),	       /* 17 - CP_RB_DONE_TS */
	A3XX_IRQ_CALLBACK(NULL),	       /* 18 - CP_VS_DONE_TS */
	A3XX_IRQ_CALLBACK(NULL),	       /* 19 - CP_PS_DONE_TS */
	A3XX_IRQ_CALLBACK(NULL),	       /* 20 - CP_CACHE_FLUSH_TS */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 21 - CP_AHB_ERROR_FAULT */
	A3XX_IRQ_CALLBACK(NULL),	       /* 22 - Unused */
	A3XX_IRQ_CALLBACK(NULL),	       /* 23 - Unused */
	A3XX_IRQ_CALLBACK(NULL),	       /* 24 - MISC_HANG_DETECT */
	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 25 - UCHE_OOB_ACCESS */
static struct adreno_irq_funcs a3xx_irq_funcs[] = {
	ADRENO_IRQ_CALLBACK(a3xx_gpu_idle_callback), /* 0 - RBBM_GPU_IDLE */
	ADRENO_IRQ_CALLBACK(a3xx_a4xx_err_callback),  /* 1 - RBBM_AHB_ERROR */
	ADRENO_IRQ_CALLBACK(a3xx_a4xx_err_callback),  /* 2 - RBBM_REG_TIMEOUT */
	/* * 3 - RBBM_ME_MS_TIMEOUT */
	ADRENO_IRQ_CALLBACK(a3xx_a4xx_err_callback),
	/* 4 - RBBM_PFP_MS_TIMEOUT */
	ADRENO_IRQ_CALLBACK(a3xx_a4xx_err_callback),
	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 5 - RBBM_ATB_BUS_OVERFLOW */
	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 6 - RBBM_VFD_ERROR */
	ADRENO_IRQ_CALLBACK(NULL),	       /* 7 - CP_SW */
	/* 8 - CP_T0_PACKET_IN_IB */
	ADRENO_IRQ_CALLBACK(a3xx_fatal_err_callback),
	ADRENO_IRQ_CALLBACK(a3xx_fatal_err_callback),  /* 9 - CP_OPCODE_ERROR */
	/* 10 - CP_RESERVED_BIT_ERROR */
	ADRENO_IRQ_CALLBACK(a3xx_fatal_err_callback),
	ADRENO_IRQ_CALLBACK(a3xx_fatal_err_callback),  /* 11 - CP_HW_FAULT */
	ADRENO_IRQ_CALLBACK(NULL),	       /* 12 - CP_DMA */
	ADRENO_IRQ_CALLBACK(a3xx_cp_callback),   /* 13 - CP_IB2_INT */
	ADRENO_IRQ_CALLBACK(a3xx_cp_callback),   /* 14 - CP_IB1_INT */
	ADRENO_IRQ_CALLBACK(a3xx_cp_callback),   /* 15 - CP_RB_INT */
	/* 16 - CP_REG_PROTECT_FAULT */
	ADRENO_IRQ_CALLBACK(a3xx_fatal_err_callback),
	ADRENO_IRQ_CALLBACK(NULL),	       /* 17 - CP_RB_DONE_TS */
	ADRENO_IRQ_CALLBACK(NULL),	       /* 18 - CP_VS_DONE_TS */
	ADRENO_IRQ_CALLBACK(NULL),	       /* 19 - CP_PS_DONE_TS */
	ADRENO_IRQ_CALLBACK(NULL),	       /* 20 - CP_CACHE_FLUSH_TS */
	/* 21 - CP_AHB_ERROR_FAULT */
	ADRENO_IRQ_CALLBACK(a3xx_fatal_err_callback),
	ADRENO_IRQ_CALLBACK(NULL),	       /* 22 - Unused */
	ADRENO_IRQ_CALLBACK(NULL),	       /* 23 - Unused */
	ADRENO_IRQ_CALLBACK(a3xx_fatal_err_callback),/* 24 - MISC_HANG_DETECT */
	ADRENO_IRQ_CALLBACK(a3xx_a4xx_err_callback),  /* 25 - UCHE_OOB_ACCESS */
	/* 26 to 31 - Unused */
};

static struct adreno_irq a3xx_irq = {
	.funcs = a3xx_irq_funcs,
	.funcs_count = ARRAY_SIZE(a3xx_irq_funcs),
	.mask = A3XX_INT_MASK,
};

/*
 * a3xx_irq_func_setup() - Sets up callback functions and mask for interrupts
 * @adreno_dev: Adreno device pointer
 *
 * Called during initialization
 */
void a3xx_irq_setup(struct adreno_device *adreno_dev)
{
	struct adreno_irq *irq_params = adreno_dev->gpudev->irq;
	int i;
	/* On a330v2 only the hang interrupt should be fatal */
	if (adreno_is_a330v2(adreno_dev)) {
		for (i = 0; i < irq_params->funcs_count; i++) {
			if ((irq_params->funcs[i].func ==
				a3xx_fatal_err_callback) &&
				A3XX_INT_MISC_HANG_DETECT != i)
				irq_params->funcs[i].func =
						a3xx_err_callback;
		}
	}
}

/*
 * a3xx_irq_handler() - Interrupt handler function
 * @adreno_dev: Pointer to adreno device
@@ -3758,16 +3828,18 @@ static struct {
irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
{
	struct kgsl_device *device = &adreno_dev->dev;
	struct adreno_irq *irq_params = adreno_dev->gpudev->irq;
	irqreturn_t ret = IRQ_NONE;
	unsigned int status, tmp;
	int i;

	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_INT_0_STATUS, &status);

	for (tmp = status, i = 0; tmp && i < ARRAY_SIZE(a3xx_irq_funcs); i++) {
	for (tmp = status, i = 0; tmp &&
			i < irq_params->funcs_count; i++) {
		if (tmp & 1) {
			if (a3xx_irq_funcs[i].func != NULL) {
				a3xx_irq_funcs[i].func(adreno_dev, i);
			if (irq_params->funcs[i].func != NULL) {
				irq_params->funcs[i].func(adreno_dev, i);
				ret = IRQ_HANDLED;
			} else {
				KGSL_DRV_CRIT(device,
@@ -3797,7 +3869,9 @@ void a3xx_irq_control(struct adreno_device *adreno_dev, int state)
{
	if (state)
		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK,
				A3XX_INT_MASK);
			adreno_dev->gpudev->irq->mask |
			(test_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv) ?
				(1 << A3XX_INT_MISC_HANG_DETECT) : 0));
	else
		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK, 0);
}
@@ -4637,6 +4711,7 @@ const struct adreno_reg_offsets a3xx_reg_offsets = {
struct adreno_gpudev adreno_a3xx_gpudev = {
	.reg_offsets = &a3xx_reg_offsets,
	.perfcounters = &a3xx_perfcounters,
	.irq = &a3xx_irq,

	.ctxt_create = a3xx_drawctxt_create,
	.rb_init = a3xx_rb_init,
@@ -4647,6 +4722,7 @@ struct adreno_gpudev adreno_a3xx_gpudev = {
	.irq_control = a3xx_irq_control,
	.irq_handler = a3xx_irq_handler,
	.irq_pending = a3xx_irq_pending,
	.irq_setup = a3xx_irq_setup,
	.busy_cycles = a3xx_busy_cycles,
	.start = a3xx_start,
	.snapshot = a3xx_snapshot,
+5 −1
Original line number Diff line number Diff line
@@ -13,7 +13,6 @@
#ifndef __A3XX_H
#define __A3XX_H

void a3xx_err_callback(struct adreno_device *adreno_dev, int bit);
irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev);
void a3xx_irq_control(struct adreno_device *adreno_dev, int state);
unsigned int a3xx_irq_pending(struct adreno_device *adreno_dev);
@@ -28,4 +27,9 @@ uint64_t a3xx_perfcounter_read(struct adreno_device *adreno_dev,
void a3xx_perfcounter_disable(struct adreno_device *adreno_dev,
	unsigned int group, unsigned int counter);
void a3xx_soft_reset(struct adreno_device *adreno_dev);
void a3xx_irq_setup(struct adreno_device *adreno_dev);
void a3xx_a4xx_err_callback(struct adreno_device *adreno_dev, int bit);
void a3xx_fatal_err_callback(struct adreno_device *adreno_dev, int bit);
void a3xx_gpu_idle_callback(struct adreno_device *adreno_dev, int irq);
void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq);
#endif /*__A3XX_H */
Loading