Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 753ad49c authored Aug 26, 2016 by Monk Liu Committed by Alex Deucher Sep 14, 2016

drm/amdgpu:implement CONTEXT_CONTROL (v5)



v1:
for gfx8, use CONTEXT_CONTROL package to dynamically
skip preamble CEIB and other load_xxx command in sequence.

v2:
support GFX7 as well.
remove cntxcntl in compute ring funcs because CPC doesn't
support this packet.

v3: fix reduntant judgement in cntxcntl.
v4: some cleanups, don't change cs_submit()
v5: keep old MESA supported & bump up KMS version.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Ack-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

parent 1db422de

drivers/gpu/drm/amd/amdgpu/amdgpu.h

+8 −0

Original line number	Diff line number	Diff line
		@@ -320,6 +320,7 @@ struct amdgpu_ring_funcs {
		void (begin_use)(struct amdgpu_ring ring);
		void (end_use)(struct amdgpu_ring ring);
		void (emit_switch_buffer) (struct amdgpu_ring ring);
		void (emit_cntxcntl) (struct amdgpu_ring ring, uint32_t flags);
		};

		/*
		@@ -966,6 +967,7 @@ struct amdgpu_ctx {
		spinlock_t ring_lock;
		struct fence **fences;
		struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
		bool preamble_presented;
		};

		struct amdgpu_ctx_mgr {
		@@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser {
		struct amdgpu_bo_list_entry uf_entry;
		};

		#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
		#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
		#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */

		struct amdgpu_job {
		struct amd_sched_job base;
		struct amdgpu_device *adev;
		@@ -1239,6 +1245,7 @@ struct amdgpu_job {
		struct amdgpu_sync sync;
		struct amdgpu_ib *ibs;
		struct fence fence; / the hw fence */
		uint32_t preamble_status;
		uint32_t num_ibs;
		void *owner;
		uint64_t fence_ctx; /* the fence_context this job uses */
		@@ -2276,6 +2283,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
		#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
		#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
		#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
		#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
		#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
		#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
		#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

+8 −0

Original line number	Diff line number	Diff line
		@@ -850,6 +850,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
		if (r)
		return r;

		if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
		parser->job->preamble_status \|= AMDGPU_PREAMBLE_IB_PRESENT;
		if (!parser->ctx->preamble_presented) {
		parser->job->preamble_status \|= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
		parser->ctx->preamble_presented = true;
		}
		}

		if (parser->job->ring && parser->job->ring != ring)
		return -EINVAL;

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

+2 −1

Original line number	Diff line number	Diff line
		@@ -55,9 +55,10 @@
		* - 3.3.0 - Add VM support for UVD on supported hardware.
		* - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
		* - 3.5.0 - Add support for new UVD_NO_OP register.
		* - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
		*/
		#define KMS_DRIVER_MAJOR 3
		#define KMS_DRIVER_MINOR 5
		#define KMS_DRIVER_MINOR 6
		#define KMS_DRIVER_PATCHLEVEL 0

		int amdgpu_vram_limit = 0;

drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

+11 −1

Original line number	Diff line number	Diff line
		@@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
		unsigned patch_offset = ~0;
		struct amdgpu_vm *vm;
		uint64_t fence_ctx;
		uint32_t status = 0;

		unsigned i;
		int r = 0;
		@@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,

		skip_preamble = ring->current_ctx == fence_ctx;
		need_ctx_switch = ring->current_ctx != fence_ctx;
		if (job && ring->funcs->emit_cntxcntl) {
		if (need_ctx_switch)
		status \|= AMDGPU_HAVE_CTX_SWITCH;
		status \|= job->preamble_status;
		amdgpu_ring_emit_cntxcntl(ring, status);
		}

		for (i = 0; i < num_ibs; ++i) {
		ib = &ibs[i];

		/* drop preamble IBs if we don't have a context switch */
		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
		skip_preamble &&
		!(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST))
		continue;

		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,

drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c

+20 −0

Original line number	Diff line number	Diff line
		@@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
		amdgpu_ring_write(ring, control);
		}

		static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
		{
		uint32_t dw2 = 0;

		dw2 \|= 0x80000000; /* set load_enable otherwise this package is just NOPs */
		if (flags & AMDGPU_HAVE_CTX_SWITCH) {
		/* set load_global_config & load_global_uconfig */
		dw2 \|= 0x8001;
		/* set load_cs_sh_regs */
		dw2 \|= 0x01000000;
		/* set load_per_context_state & load_gfx_sh_regs */
		dw2 \|= 0x10002;
		}

		amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
		amdgpu_ring_write(ring, dw2);
		amdgpu_ring_write(ring, 0);
		}

		/**
		* gfx_v7_0_ring_test_ib - basic ring IB test
		*
		@@ -4938,6 +4957,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
		.test_ib = gfx_v7_0_ring_test_ib,
		.insert_nop = amdgpu_ring_insert_nop,
		.pad_ib = amdgpu_ring_generic_pad_ib,
		.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
		};

		static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {