Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 753ad49c authored by Monk Liu's avatar Monk Liu Committed by Alex Deucher
Browse files

drm/amdgpu:implement CONTEXT_CONTROL (v5)



v1:
for gfx8, use CONTEXT_CONTROL package to dynamically
skip preamble CEIB and other load_xxx command in sequence.

v2:
support GFX7 as well.
remove cntxcntl in compute ring funcs because CPC doesn't
support this packet.

v3: fix reduntant judgement in cntxcntl.
v4: some cleanups, don't change cs_submit()
v5: keep old MESA supported & bump up KMS version.

Signed-off-by: default avatarMonk Liu <Monk.Liu@amd.com>
Ack-by: default avatarChunming Zhou <David1.Zhou@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 1db422de
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -320,6 +320,7 @@ struct amdgpu_ring_funcs {
	void (*begin_use)(struct amdgpu_ring *ring);
	void (*end_use)(struct amdgpu_ring *ring);
	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
};

/*
@@ -966,6 +967,7 @@ struct amdgpu_ctx {
	spinlock_t		ring_lock;
	struct fence            **fences;
	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
	bool preamble_presented;
};

struct amdgpu_ctx_mgr {
@@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser {
	struct amdgpu_bo_list_entry	uf_entry;
};

#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1) /* bit set means preamble IB is first presented in belonging context */
#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) /* bit set means context switch occured */

struct amdgpu_job {
	struct amd_sched_job    base;
	struct amdgpu_device	*adev;
@@ -1239,6 +1245,7 @@ struct amdgpu_job {
	struct amdgpu_sync	sync;
	struct amdgpu_ib	*ibs;
	struct fence		*fence; /* the hw fence */
	uint32_t		preamble_status;
	uint32_t		num_ibs;
	void			*owner;
	uint64_t		fence_ctx; /* the fence_context this job uses */
@@ -2276,6 +2283,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+8 −0
Original line number Diff line number Diff line
@@ -850,6 +850,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
		if (r)
			return r;

		if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
			parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
			if (!parser->ctx->preamble_presented) {
				parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
				parser->ctx->preamble_presented = true;
			}
		}

		if (parser->job->ring && parser->job->ring != ring)
			return -EINVAL;

+2 −1
Original line number Diff line number Diff line
@@ -55,9 +55,10 @@
 * - 3.3.0 - Add VM support for UVD on supported hardware.
 * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
 * - 3.5.0 - Add support for new UVD_NO_OP register.
 * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
 */
#define KMS_DRIVER_MAJOR	3
#define KMS_DRIVER_MINOR	5
#define KMS_DRIVER_MINOR	6
#define KMS_DRIVER_PATCHLEVEL	0

int amdgpu_vram_limit = 0;
+11 −1
Original line number Diff line number Diff line
@@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
	unsigned patch_offset = ~0;
	struct amdgpu_vm *vm;
	uint64_t fence_ctx;
	uint32_t status = 0;

	unsigned i;
	int r = 0;
@@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,

	skip_preamble = ring->current_ctx == fence_ctx;
	need_ctx_switch = ring->current_ctx != fence_ctx;
	if (job && ring->funcs->emit_cntxcntl) {
		if (need_ctx_switch)
			status |= AMDGPU_HAVE_CTX_SWITCH;
		status |= job->preamble_status;
		amdgpu_ring_emit_cntxcntl(ring, status);
	}

	for (i = 0; i < num_ibs; ++i) {
		ib = &ibs[i];

		/* drop preamble IBs if we don't have a context switch */
		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
			skip_preamble &&
			!(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST))
			continue;

		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
+20 −0
Original line number Diff line number Diff line
@@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
	amdgpu_ring_write(ring, control);
}

static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
{
	uint32_t dw2 = 0;

	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
		/* set load_global_config & load_global_uconfig */
		dw2 |= 0x8001;
		/* set load_cs_sh_regs */
		dw2 |= 0x01000000;
		/* set load_per_context_state & load_gfx_sh_regs */
		dw2 |= 0x10002;
	}

	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
	amdgpu_ring_write(ring, dw2);
	amdgpu_ring_write(ring, 0);
}

/**
 * gfx_v7_0_ring_test_ib - basic ring IB test
 *
@@ -4938,6 +4957,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
	.test_ib = gfx_v7_0_ring_test_ib,
	.insert_nop = amdgpu_ring_insert_nop,
	.pad_ib = amdgpu_ring_generic_pad_ib,
	.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
};

static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
Loading