Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 44ed382d authored by Harshdeep Dhatt's avatar Harshdeep Dhatt
Browse files

msm: kgsl: Add hwsched based reset and recovery



There can be various gmu faults or a context_bad hfi alongside
an actively rendering gpu, for example, dcvs timeout, context
register or unregister timeout or a perfcounter oob timeout.
It is important to request dispatcher to perform reset and
recovery so that un-finished inflight submissions can be
re-submitted post recovery.

Change-Id: I2e66c5b24c45f1af7514044caa74abe1bf84dd0c
Signed-off-by: default avatarHarshdeep Dhatt <hdhatt@codeaurora.org>
parent 0fc2ce7e
Loading
Loading
Loading
Loading
+11 −5
Original line number Diff line number Diff line
@@ -1658,13 +1658,9 @@ static inline int adreno_perfcntr_active_oob_get(

	if (!ret) {
		ret = gmu_core_dev_oob_set(device, oob_perfcntr);
		if (ret) {
			adreno_set_gpu_fault(adreno_dev,
				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
			adreno_dispatcher_schedule(device);
		if (ret)
			adreno_active_count_put(adreno_dev);
	}
	}

	return ret;
}
@@ -1935,4 +1931,14 @@ int adreno_suspend_context(struct kgsl_device *device);
 * submission.
 */
void adreno_profile_submit_time(struct adreno_submit_time *time);

/**
 * adreno_mark_guilty_context - Mark the given context as guilty
 * (failed recovery)
 * @device: Pointer to a KGSL device structure
 * @id: Context ID of the guilty context (or 0 to mark all as guilty)
 *
 * Mark the given (or all) context(s) as guilty (failed recovery)
 */
void adreno_mark_guilty_context(struct kgsl_device *device, unsigned int id);
#endif /*__ADRENO_H */
+25 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@

#include "adreno.h"
#include "adreno_a6xx.h"
#include "adreno_hwsched.h"
#include "kgsl_bus.h"
#include "kgsl_device.h"
#include "kgsl_trace.h"
@@ -728,6 +729,29 @@ static const char *oob_to_str(enum oob_request req)
	return "unknown";
}

static void trigger_reset_recovery(struct adreno_device *adreno_dev,
	enum oob_request req)
{
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);

	/*
	 * Trigger recovery for perfcounter oob only since only
	 * perfcounter oob can happen alongside an actively rendering gpu.
	 */
	if (req != oob_perfcntr)
		return;

	if (test_bit(GMU_DISPATCH, &device->gmu_core.flags)) {
		adreno_get_gpu_halt(adreno_dev);

		adreno_hwsched_set_fault(adreno_dev);
	} else {
		adreno_set_gpu_fault(adreno_dev,
			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
		adreno_dispatcher_schedule(device);
	}
}

int a6xx_gmu_oob_set(struct kgsl_device *device,
		enum oob_request req)
{
@@ -762,6 +786,7 @@ int a6xx_gmu_oob_set(struct kgsl_device *device,
		gmu_fault_snapshot(device);
		ret = -ETIMEDOUT;
		WARN(1, "OOB request %s timed out\n", oob_to_str(req));
		trigger_reset_recovery(adreno_dev, req);
	}

	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, check);
+5 −4
Original line number Diff line number Diff line
@@ -510,10 +510,11 @@ struct hfi_context_rule_cmd {

/* F2H */
struct hfi_context_bad_cmd {
	uint32_t hdr;
	uint32_t ctxt_id;
	uint32_t status;
	uint32_t error;
	u32 hdr;
	u32 ctxt_id;
	u32 policy;
	u32 ts;
	u32 error;
} __packed;

/* H2F */
+64 −1
Original line number Diff line number Diff line
@@ -632,11 +632,24 @@ static int a6xx_hwsched_dcvs_set(struct adreno_device *adreno_dev,

	ret = a6xx_hfi_send_cmd_async(adreno_dev, &req);

	if (ret)
	if (ret) {
		dev_err_ratelimited(&gmu->pdev->dev,
			"Failed to set GPU perf idx %d, bw idx %d\n",
			req.freq, req.bw);

		/*
		 * If this was a dcvs request along side an active gpu, request
		 * dispatcher based reset and recovery.
		 */
		if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) {

			adreno_get_gpu_halt(adreno_dev);

			adreno_hwsched_set_fault(adreno_dev);
		}

	}

	return ret;
}

@@ -753,6 +766,56 @@ static void a6xx_hwsched_pm_resume(struct adreno_device *adreno_dev)
	clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
}

static void a6xx_hwsched_drain_ctxt_unregister(struct adreno_device *adreno_dev)
{
	struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
	struct pending_cmd *cmd = NULL;

	read_lock(&hfi->msglock);

	list_for_each_entry(cmd, &hfi->msglist, node) {
		if (MSG_HDR_GET_ID(cmd->sent_hdr) == H2F_MSG_UNREGISTER_CONTEXT)
			complete(&cmd->complete);
	}

	read_unlock(&hfi->msglock);
}

void a6xx_hwsched_restart(struct adreno_device *adreno_dev)
{
	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
	int ret;

	/*
	 * Any pending context unregister packets will be lost
	 * since we hard reset the GMU. This means any threads waiting
	 * for context unregister hfi ack will timeout. Wake them
	 * to avoid false positive ack timeout messages later.
	 */
	a6xx_hwsched_drain_ctxt_unregister(adreno_dev);

	read_lock(&device->context_lock);
	idr_for_each(&device->context_idr, unregister_context_hwsched, NULL);
	read_unlock(&device->context_lock);


	if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
		return;

	a6xx_hwsched_hfi_stop(adreno_dev);

	a6xx_disable_gpu_irq(adreno_dev);

	a6xx_gmu_suspend(adreno_dev);

	clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);

	ret = a6xx_hwsched_boot(adreno_dev);

	BUG_ON(ret);
}

const struct adreno_power_ops a6xx_hwsched_power_ops = {
	.first_open = a6xx_hwsched_first_open,
	.last_close = a6xx_hwsched_power_off,
+6 −0
Original line number Diff line number Diff line
@@ -33,4 +33,10 @@ struct a6xx_hwsched_device {
 */
int a6xx_hwsched_probe(struct platform_device *pdev,
	u32 chipid, const struct adreno_gpu_core *gpucore);

/**
 * a6xx_hwsched_restart - Restart the gmu and gpu
 * @adreno_dev: Pointer to the adreno device
 */
void a6xx_hwsched_restart(struct adreno_device *adreno_dev);
#endif
Loading