Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8fdf074f authored by Monk Liu's avatar Monk Liu Committed by Alex Deucher
Browse files

drm/amdgpu:fix world switch hang



for SR-IOV, we must keep the pipeline-sync in the protection
of COND_EXEC, otherwise the command consumed by CPG is not
consistent when world switch triggerd, e.g.:

world switch hit and the IB frame is skipped so the fence
won't signal, thus CP will jump to the next DMAframe's pipeline-sync
command, and it will make CP hang foever.

after pipelin-sync moved into COND_EXEC the consistency can be
guaranteed

Signed-off-by: default avatarMonk Liu <Monk.Liu@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent bdb8cd10
Loading
Loading
Loading
Loading
+3 −2
Original line number Original line Diff line number Diff line
@@ -130,6 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,


	unsigned i;
	unsigned i;
	int r = 0;
	int r = 0;
	bool need_pipe_sync = false;


	if (num_ibs == 0)
	if (num_ibs == 0)
		return -EINVAL;
		return -EINVAL;
@@ -165,7 +166,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
	if (ring->funcs->emit_pipeline_sync && job &&
	if (ring->funcs->emit_pipeline_sync && job &&
	    ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
	    ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
	     amdgpu_vm_need_pipeline_sync(ring, job))) {
	     amdgpu_vm_need_pipeline_sync(ring, job))) {
		amdgpu_ring_emit_pipeline_sync(ring);
		need_pipe_sync = true;
		dma_fence_put(tmp);
		dma_fence_put(tmp);
	}
	}


@@ -173,7 +174,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
		ring->funcs->insert_start(ring);
		ring->funcs->insert_start(ring);


	if (job) {
	if (job) {
		r = amdgpu_vm_flush(ring, job);
		r = amdgpu_vm_flush(ring, job, need_pipe_sync);
		if (r) {
		if (r) {
			amdgpu_ring_undo(ring);
			amdgpu_ring_undo(ring);
			return r;
			return r;
+5 −2
Original line number Original line Diff line number Diff line
@@ -743,7 +743,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
 *
 *
 * Emit a VM flush when it is necessary.
 * Emit a VM flush when it is necessary.
 */
 */
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
{
{
	struct amdgpu_device *adev = ring->adev;
	struct amdgpu_device *adev = ring->adev;
	unsigned vmhub = ring->funcs->vmhub;
	unsigned vmhub = ring->funcs->vmhub;
@@ -765,12 +765,15 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
		vm_flush_needed = true;
		vm_flush_needed = true;
	}
	}


	if (!vm_flush_needed && !gds_switch_needed)
	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
		return 0;
		return 0;


	if (ring->funcs->init_cond_exec)
	if (ring->funcs->init_cond_exec)
		patch_offset = amdgpu_ring_init_cond_exec(ring);
		patch_offset = amdgpu_ring_init_cond_exec(ring);


	if (need_pipe_sync)
		amdgpu_ring_emit_pipeline_sync(ring);

	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
		struct dma_fence *fence;
		struct dma_fence *fence;


+1 −1
Original line number Original line Diff line number Diff line
@@ -222,7 +222,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
		      struct amdgpu_sync *sync, struct dma_fence *fence,
		      struct amdgpu_sync *sync, struct dma_fence *fence,
		      struct amdgpu_job *job);
		      struct amdgpu_job *job);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
			unsigned vmid);
			unsigned vmid);
void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);