Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c7ae72c0 authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher
Browse files

drm/amdgpu: use IB for copy buffer of eviction



This aids handling buffers moves with the scheduler.

Signed-off-by: default avatarChunming Zhou <david1.zhou@amd.com>
Reviewed-by: default avatarChristian K?nig <christian.koenig@amd.com>
parent 113cd9da
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -247,7 +247,7 @@ struct amdgpu_buffer_funcs {
	unsigned	copy_num_dw;

	/* used for buffer migration */
	void (*emit_copy_buffer)(struct amdgpu_ring *ring,
	void (*emit_copy_buffer)(struct amdgpu_ib *ib,
				 /* src addr in bytes */
				 uint64_t src_offset,
				 /* dst addr in bytes */
@@ -518,7 +518,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
		       uint64_t dst_offset,
		       uint32_t byte_count,
		       struct reservation_object *resv,
		       struct amdgpu_fence **fence);
		       struct fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);

struct amdgpu_bo_list_entry {
@@ -2247,7 +2247,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
#define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b))
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b))
#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b))
#define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev))
#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
@@ -2379,7 +2379,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
		       uint64_t addr);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
		      struct amdgpu_bo_va *bo_va);

int amdgpu_vm_free_job(struct amdgpu_job *job);
/*
 * functions used by amdgpu_encoder.c
 */
+4 −4
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
{
	unsigned long start_jiffies;
	unsigned long end_jiffies;
	struct amdgpu_fence *fence = NULL;
	struct fence *fence = NULL;
	int i, r;

	start_jiffies = jiffies;
@@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
		r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence);
		if (r)
			goto exit_do_move;
		r = fence_wait(&fence->base, false);
		r = fence_wait(fence, false);
		if (r)
			goto exit_do_move;
		amdgpu_fence_unref(&fence);
		fence_put(fence);
	}
	end_jiffies = jiffies;
	r = jiffies_to_msecs(end_jiffies - start_jiffies);

exit_do_move:
	if (fence)
		amdgpu_fence_unref(&fence);
		fence_put(fence);
	return r;
}

+6 −6
Original line number Diff line number Diff line
@@ -77,7 +77,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
		void *gtt_map, *vram_map;
		void **gtt_start, **gtt_end;
		void **vram_start, **vram_end;
		struct amdgpu_fence *fence = NULL;
		struct fence *fence = NULL;

		r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i);
@@ -116,13 +116,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
			goto out_lclean_unpin;
		}

		r = fence_wait(&fence->base, false);
		r = fence_wait(fence, false);
		if (r) {
			DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
			goto out_lclean_unpin;
		}

		amdgpu_fence_unref(&fence);
		fence_put(fence);

		r = amdgpu_bo_kmap(vram_obj, &vram_map);
		if (r) {
@@ -161,13 +161,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
			goto out_lclean_unpin;
		}

		r = fence_wait(&fence->base, false);
		r = fence_wait(fence, false);
		if (r) {
			DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
			goto out_lclean_unpin;
		}

		amdgpu_fence_unref(&fence);
		fence_put(fence);

		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
		if (r) {
@@ -214,7 +214,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
			amdgpu_bo_unref(&gtt_obj[i]);
		}
		if (fence)
			amdgpu_fence_unref(&fence);
			fence_put(fence);
		break;
	}

+40 −37
Original line number Diff line number Diff line
@@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
	struct amdgpu_device *adev;
	struct amdgpu_ring *ring;
	uint64_t old_start, new_start;
	struct amdgpu_fence *fence;
	struct fence *fence;
	int r;

	adev = amdgpu_get_adev(bo->bdev);
@@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
			       new_mem->num_pages * PAGE_SIZE, /* bytes */
			       bo->resv, &fence);
	/* FIXME: handle copy error */
	r = ttm_bo_move_accel_cleanup(bo, &fence->base,
	r = ttm_bo_move_accel_cleanup(bo, fence,
				      evict, no_wait_gpu, new_mem);
	amdgpu_fence_unref(&fence);
	fence_put(fence);
	return r;
}

@@ -987,52 +987,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
		       uint64_t dst_offset,
		       uint32_t byte_count,
		       struct reservation_object *resv,
		       struct amdgpu_fence **fence)
		       struct fence **fence)
{
	struct amdgpu_device *adev = ring->adev;
	struct amdgpu_sync sync;
	uint32_t max_bytes;
	unsigned num_loops, num_dw;
	struct amdgpu_ib *ib;
	unsigned i;
	int r;

	/* sync other rings */
	amdgpu_sync_create(&sync);
	if (resv) {
		r = amdgpu_sync_resv(adev, &sync, resv, false);
		if (r) {
			DRM_ERROR("sync failed (%d).\n", r);
			amdgpu_sync_free(adev, &sync, NULL);
			return r;
		}
	}

	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
	num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;

	/* for fence and sync */
	num_dw += 64 + AMDGPU_NUM_SYNCS * 8;
	/* for IB padding */
	while (num_dw & 0x7)
		num_dw++;

	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
	if (!ib)
		return -ENOMEM;

	r = amdgpu_sync_wait(&sync);
	r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib);
	if (r) {
		DRM_ERROR("sync wait failed (%d).\n", r);
		amdgpu_sync_free(adev, &sync, NULL);
		kfree(ib);
		return r;
	}

	r = amdgpu_ring_lock(ring, num_dw);
	ib->length_dw = 0;

	if (resv) {
		r = amdgpu_sync_resv(adev, &ib->sync, resv,
				     AMDGPU_FENCE_OWNER_UNDEFINED);
		if (r) {
		DRM_ERROR("ring lock failed (%d).\n", r);
		amdgpu_sync_free(adev, &sync, NULL);
		return r;
			DRM_ERROR("sync failed (%d).\n", r);
			goto error_free;
		}
	}
	amdgpu_sync_rings(&sync, ring);

	for (i = 0; i < num_loops; i++) {
		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);

		amdgpu_emit_copy_buffer(adev, ring, src_offset, dst_offset,
		amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset,
					cur_size_in_bytes);

		src_offset += cur_size_in_bytes;
@@ -1040,17 +1036,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
		byte_count -= cur_size_in_bytes;
	}

	r = amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_MOVE, fence);
	if (r) {
		amdgpu_ring_unlock_undo(ring);
		amdgpu_sync_free(adev, &sync, NULL);
		return r;
	}

	amdgpu_ring_unlock_commit(ring);
	amdgpu_sync_free(adev, &sync, &(*fence)->base);
	amdgpu_vm_pad_ib(adev, ib);
	WARN_ON(ib->length_dw > num_dw);
	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
						 &amdgpu_vm_free_job,
						 AMDGPU_FENCE_OWNER_MOVE,
						 fence);
	if (r)
		goto error_free;

	if (!amdgpu_enable_scheduler) {
		amdgpu_ib_free(adev, ib);
		kfree(ib);
	}
	return 0;
error_free:
	amdgpu_ib_free(adev, ib);
	kfree(ib);
	return r;
}

#if defined(CONFIG_DEBUG_FS)
+1 −2
Original line number Diff line number Diff line
@@ -316,8 +316,7 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
	}
}

static int amdgpu_vm_free_job(
	struct amdgpu_job *sched_job)
int amdgpu_vm_free_job(struct amdgpu_job *sched_job)
{
	int i;
	for (i = 0; i < sched_job->num_ibs; i++)
Loading