Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d240cd9e authored by Marek Olšák's avatar Marek Olšák Committed by Alex Deucher
Browse files

drm/amdgpu: optionally do a writeback but don't invalidate TC for IB fences



There is a new IB flag that enables this new behavior.
Full invalidation is unnecessary for RELEASE_MEM and doesn't make sense
when draw calls from two adjacent gfx IBs run in parallel. This will be
the new default for Mesa.

v2: bump the version

Signed-off-by: default avatarMarek Olšák <marek.olsak@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3f188453
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -75,9 +75,10 @@
 * - 3.23.0 - Add query for VRAM lost counter
 * - 3.24.0 - Add high priority compute support for gfx9
 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
 * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
 */
#define KMS_DRIVER_MAJOR	3
#define KMS_DRIVER_MINOR	25
#define KMS_DRIVER_MINOR	26
#define KMS_DRIVER_PATCHLEVEL	0

int amdgpu_vram_limit = 0;
+3 −2
Original line number Diff line number Diff line
@@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
 * Emits a fence command on the requested ring (all asics).
 * Returns 0 on success, -ENOMEM on failure.
 */
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
		      unsigned flags)
{
	struct amdgpu_device *adev = ring->adev;
	struct amdgpu_fence *fence;
@@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
		       adev->fence_context + ring->idx,
		       seq);
	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
			       seq, AMDGPU_FENCE_FLAG_INT);
			       seq, flags | AMDGPU_FENCE_FLAG_INT);

	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
	/* This function can't be called concurrently anyway, otherwise
+6 −2
Original line number Diff line number Diff line
@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
	struct amdgpu_vm *vm;
	uint64_t fence_ctx;
	uint32_t status = 0, alloc_size;
	unsigned fence_flags = 0;

	unsigned i;
	int r = 0;
@@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
#endif
		amdgpu_asic_invalidate_hdp(adev, ring);

	r = amdgpu_fence_emit(ring, f);
	if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
		fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;

	r = amdgpu_fence_emit(ring, f, fence_flags);
	if (r) {
		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
		if (job && job->vmid)
@@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
	/* wrap the last IB with fence */
	if (job && job->uf_addr) {
		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
				       AMDGPU_FENCE_FLAG_64BIT);
				       fence_flags | AMDGPU_FENCE_FLAG_64BIT);
	}

	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
+3 −1
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@

#define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
#define AMDGPU_FENCE_FLAG_INT           (1 << 1)
#define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2)

enum amdgpu_ring_type {
	AMDGPU_RING_TYPE_GFX,
@@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
				   unsigned irq_type);
void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
		      unsigned flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
void amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
+1 −1
Original line number Diff line number Diff line
@@ -633,7 +633,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);

	if (vm_flush_needed || pasid_mapping_needed) {
		r = amdgpu_fence_emit(ring, &fence);
		r = amdgpu_fence_emit(ring, &fence, 0);
		if (r)
			return r;
	}
Loading