Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a8a51a70 authored by Monk Liu's avatar Monk Liu Committed by Alex Deucher
Browse files

drm/amdgpu:cleanup job reset routine(v2)



merge the setting guilty on context into this function
to avoid implement extra routine.

v2:
go through entity list and compare the fence_ctx
before operate on the entity, otherwise the entity
may be just a wild pointer

Signed-off-by: default avatarMonk Liu <Monk.Liu@amd.com>
Reviewed-by: default avatarChunming Zhou <David1.Zhou@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 7716ea56
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -2869,7 +2869,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
			amd_sched_job_kickout(&job->base);

		/* only do job_reset on the hang ring if @job not NULL */
		amd_sched_hw_job_reset(&ring->sched);
		amd_sched_hw_job_reset(&ring->sched, NULL);

		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
		amdgpu_fence_driver_force_completion(ring);
@@ -2990,7 +2990,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
		if (!ring || !ring->sched.thread)
			continue;
		kthread_park(ring->sched.thread);
		amd_sched_hw_job_reset(&ring->sched);
		amd_sched_hw_job_reset(&ring->sched, NULL);
		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
		amdgpu_fence_driver_force_completion(ring);
	}
+30 −1
Original line number Diff line number Diff line
@@ -443,9 +443,18 @@ static void amd_sched_job_timedout(struct work_struct *work)
	job->sched->ops->timedout_job(job);
}

void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
static void amd_sched_set_guilty(struct amd_sched_job *s_job)
{
	if (atomic_inc_return(&s_job->karma) > s_job->sched->hang_limit)
		if (s_job->s_entity->guilty)
			atomic_set(s_job->s_entity->guilty, 1);
}

void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *bad)
{
	struct amd_sched_job *s_job;
	struct amd_sched_entity *entity, *tmp;
	int i;;

	spin_lock(&sched->job_list_lock);
	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
@@ -458,6 +467,26 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
		}
	}
	spin_unlock(&sched->job_list_lock);

	if (bad) {
		bool found = false;

		for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_MAX; i++ ) {
			struct amd_sched_rq *rq = &sched->sched_rq[i];

			spin_lock(&rq->lock);
			list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
				if (bad->s_fence->scheduled.context == entity->fence_context) {
					found = true;
					amd_sched_set_guilty(bad);
					break;
				}
			}
			spin_unlock(&rq->lock);
			if (found)
				break;
		}
	}
}

void amd_sched_job_kickout(struct amd_sched_job *s_job)
+1 −1
Original line number Diff line number Diff line
@@ -174,7 +174,7 @@ int amd_sched_job_init(struct amd_sched_job *job,
		       struct amd_gpu_scheduler *sched,
		       struct amd_sched_entity *entity,
		       void *owner);
void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *job);
void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
bool amd_sched_dependency_optimized(struct dma_fence* fence,
				    struct amd_sched_entity *entity);