msm: kgsl: Add hwsched based reset and recovery (44ed382d) · Commits · e / devices / android_kernel_fairphone_FP5

drivers/gpu/msm/adreno.h

+11 −5

Original line number	Diff line number	Diff line
		@@ -1658,13 +1658,9 @@ static inline int adreno_perfcntr_active_oob_get(

		if (!ret) {
		ret = gmu_core_dev_oob_set(device, oob_perfcntr);
		if (ret) {
		adreno_set_gpu_fault(adreno_dev,
		ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
		adreno_dispatcher_schedule(device);
		if (ret)
		adreno_active_count_put(adreno_dev);
		}
		}

		return ret;
		}
		@@ -1935,4 +1931,14 @@ int adreno_suspend_context(struct kgsl_device *device);
		* submission.
		*/
		void adreno_profile_submit_time(struct adreno_submit_time *time);

		/**
		* adreno_mark_guilty_context - Mark the given context as guilty
		* (failed recovery)
		* @device: Pointer to a KGSL device structure
		* @id: Context ID of the guilty context (or 0 to mark all as guilty)
		*
		* Mark the given (or all) context(s) as guilty (failed recovery)
		*/
		void adreno_mark_guilty_context(struct kgsl_device *device, unsigned int id);
		#endif /__ADRENO_H /

drivers/gpu/msm/adreno_a6xx_gmu.c

+25 −0

Original line number	Diff line number	Diff line
		@@ -18,6 +18,7 @@

		#include "adreno.h"
		#include "adreno_a6xx.h"
		#include "adreno_hwsched.h"
		#include "kgsl_bus.h"
		#include "kgsl_device.h"
		#include "kgsl_trace.h"
		@@ -728,6 +729,29 @@ static const char *oob_to_str(enum oob_request req)
		return "unknown";
		}

		static void trigger_reset_recovery(struct adreno_device *adreno_dev,
		enum oob_request req)
		{
		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);

		/*
		* Trigger recovery for perfcounter oob only since only
		* perfcounter oob can happen alongside an actively rendering gpu.
		*/
		if (req != oob_perfcntr)
		return;

		if (test_bit(GMU_DISPATCH, &device->gmu_core.flags)) {
		adreno_get_gpu_halt(adreno_dev);

		adreno_hwsched_set_fault(adreno_dev);
		} else {
		adreno_set_gpu_fault(adreno_dev,
		ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
		adreno_dispatcher_schedule(device);
		}
		}

		int a6xx_gmu_oob_set(struct kgsl_device *device,
		enum oob_request req)
		{
		@@ -762,6 +786,7 @@ int a6xx_gmu_oob_set(struct kgsl_device *device,
		gmu_fault_snapshot(device);
		ret = -ETIMEDOUT;
		WARN(1, "OOB request %s timed out\n", oob_to_str(req));
		trigger_reset_recovery(adreno_dev, req);
		}

		gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, check);

drivers/gpu/msm/adreno_a6xx_hfi.h

+5 −4

Original line number	Diff line number	Diff line
		@@ -510,10 +510,11 @@ struct hfi_context_rule_cmd {

		/* F2H */
		struct hfi_context_bad_cmd {
		uint32_t hdr;
		uint32_t ctxt_id;
		uint32_t status;
		uint32_t error;
		u32 hdr;
		u32 ctxt_id;
		u32 policy;
		u32 ts;
		u32 error;
		} __packed;

		/* H2F */

drivers/gpu/msm/adreno_a6xx_hwsched.c

+64 −1

Original line number	Diff line number	Diff line
		@@ -632,11 +632,24 @@ static int a6xx_hwsched_dcvs_set(struct adreno_device *adreno_dev,

		ret = a6xx_hfi_send_cmd_async(adreno_dev, &req);

		if (ret)
		if (ret) {
		dev_err_ratelimited(&gmu->pdev->dev,
		"Failed to set GPU perf idx %d, bw idx %d\n",
		req.freq, req.bw);

		/*
		* If this was a dcvs request along side an active gpu, request
		* dispatcher based reset and recovery.
		*/
		if (test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags)) {

		adreno_get_gpu_halt(adreno_dev);

		adreno_hwsched_set_fault(adreno_dev);
		}

		}

		return ret;
		}

		@@ -753,6 +766,56 @@ static void a6xx_hwsched_pm_resume(struct adreno_device *adreno_dev)
		clear_bit(GMU_PRIV_PM_SUSPEND, &gmu->flags);
		}

		static void a6xx_hwsched_drain_ctxt_unregister(struct adreno_device *adreno_dev)
		{
		struct a6xx_hwsched_hfi *hfi = to_a6xx_hwsched_hfi(adreno_dev);
		struct pending_cmd *cmd = NULL;

		read_lock(&hfi->msglock);

		list_for_each_entry(cmd, &hfi->msglist, node) {
		if (MSG_HDR_GET_ID(cmd->sent_hdr) == H2F_MSG_UNREGISTER_CONTEXT)
		complete(&cmd->complete);
		}

		read_unlock(&hfi->msglock);
		}

		void a6xx_hwsched_restart(struct adreno_device *adreno_dev)
		{
		struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
		int ret;

		/*
		* Any pending context unregister packets will be lost
		* since we hard reset the GMU. This means any threads waiting
		* for context unregister hfi ack will timeout. Wake them
		* to avoid false positive ack timeout messages later.
		*/
		a6xx_hwsched_drain_ctxt_unregister(adreno_dev);

		read_lock(&device->context_lock);
		idr_for_each(&device->context_idr, unregister_context_hwsched, NULL);
		read_unlock(&device->context_lock);


		if (!test_bit(GMU_PRIV_GPU_STARTED, &gmu->flags))
		return;

		a6xx_hwsched_hfi_stop(adreno_dev);

		a6xx_disable_gpu_irq(adreno_dev);

		a6xx_gmu_suspend(adreno_dev);

		clear_bit(GMU_PRIV_GPU_STARTED, &gmu->flags);

		ret = a6xx_hwsched_boot(adreno_dev);

		BUG_ON(ret);
		}

		const struct adreno_power_ops a6xx_hwsched_power_ops = {
		.first_open = a6xx_hwsched_first_open,
		.last_close = a6xx_hwsched_power_off,

drivers/gpu/msm/adreno_a6xx_hwsched.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -33,4 +33,10 @@ struct a6xx_hwsched_device {
		*/
		int a6xx_hwsched_probe(struct platform_device *pdev,
		u32 chipid, const struct adreno_gpu_core *gpucore);

		/**
		* a6xx_hwsched_restart - Restart the gmu and gpu
		* @adreno_dev: Pointer to the adreno device
		*/
		void a6xx_hwsched_restart(struct adreno_device *adreno_dev);
		#endif