Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5bbc553a authored by Leo Liu's avatar Leo Liu Committed by Alex Deucher
Browse files

drm/amdgpu: implement VCE two instances support



VCE 3.0 has two indentical instances in the engine, they share
the same registers name in differrent memory block distinguished
by the grbm_gfx_index, we set to master instance after init, it
will dispatch task to slave instance. These two instances will
share the same firmware, but have their own stacks and heaps.

v2: add mutex for using grbm_gfx_index

Signed-off-by: default avatarLeo Liu <leo.liu@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
parent e9822622
Loading
Loading
Loading
Loading
+87 −54
Original line number Diff line number Diff line
@@ -34,12 +34,16 @@
#include "vce/vce_3_0_sh_mask.h"
#include "oss/oss_2_0_d.h"
#include "oss/oss_2_0_sh_mask.h"
#include "gca/gfx_8_0_d.h"

#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10

#define VCE_V3_0_FW_SIZE	(384 * 1024)
#define VCE_V3_0_STACK_SIZE	(64 * 1024)
#define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))

static void vce_v3_0_mc_resume(struct amdgpu_device *adev);
static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);

@@ -104,28 +108,25 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
static int vce_v3_0_start(struct amdgpu_device *adev)
{
	struct amdgpu_ring *ring;
	int i, j, r;
	int idx, i, j, r;

	mutex_lock(&adev->grbm_idx_mutex);
	for (idx = 0; idx < 2; ++idx) {
		if(idx == 0)
			WREG32_P(mmGRBM_GFX_INDEX, 0,
				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
		else
			WREG32_P(mmGRBM_GFX_INDEX,
				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);

	vce_v3_0_mc_resume(adev);
		vce_v3_0_mc_resume(adev, idx);

		/* set BUSY flag */
		WREG32_P(mmVCE_STATUS, 1, ~1);

	ring = &adev->vce.ring[0];
	WREG32(mmVCE_RB_RPTR, ring->wptr);
	WREG32(mmVCE_RB_WPTR, ring->wptr);
	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);

	ring = &adev->vce.ring[1];
	WREG32(mmVCE_RB_RPTR2, ring->wptr);
	WREG32(mmVCE_RB_WPTR2, ring->wptr);
	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);

	WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK, ~VCE_VCPU_CNTL__CLK_EN_MASK);
		WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
			~VCE_VCPU_CNTL__CLK_EN_MASK);

		WREG32_P(mmVCE_SOFT_RESET,
			 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
@@ -133,7 +134,8 @@ static int vce_v3_0_start(struct amdgpu_device *adev)

		mdelay(100);

	WREG32_P(mmVCE_SOFT_RESET, 0, ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
		WREG32_P(mmVCE_SOFT_RESET, 0,
			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);

		for (i = 0; i < 10; ++i) {
			uint32_t status;
@@ -148,10 +150,12 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
				break;

			DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
		WREG32_P(mmVCE_SOFT_RESET, VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
			WREG32_P(mmVCE_SOFT_RESET,
				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
			mdelay(10);
		WREG32_P(mmVCE_SOFT_RESET, 0, ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
			WREG32_P(mmVCE_SOFT_RESET, 0,
				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
			mdelay(10);
			r = -1;
		}
@@ -161,8 +165,27 @@ static int vce_v3_0_start(struct amdgpu_device *adev)

		if (r) {
			DRM_ERROR("VCE not responding, giving up!!!\n");
			mutex_unlock(&adev->grbm_idx_mutex);
			return r;
		}
	}

	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
	mutex_unlock(&adev->grbm_idx_mutex);

	ring = &adev->vce.ring[0];
	WREG32(mmVCE_RB_RPTR, ring->wptr);
	WREG32(mmVCE_RB_WPTR, ring->wptr);
	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);

	ring = &adev->vce.ring[1];
	WREG32(mmVCE_RB_RPTR2, ring->wptr);
	WREG32(mmVCE_RB_WPTR2, ring->wptr);
	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);

	return 0;
}
@@ -292,7 +315,7 @@ static int vce_v3_0_resume(struct amdgpu_device *adev)
	return r;
}

static void vce_v3_0_mc_resume(struct amdgpu_device *adev)
static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
{
	uint32_t offset, size;

@@ -313,15 +336,25 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev)
	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);

	if (idx == 0) {
		offset += size;
		size = VCE_V3_0_STACK_SIZE;
		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);

		offset += size;
		size = VCE_V3_0_DATA_SIZE;
		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
	} else {
		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
		size = VCE_V3_0_STACK_SIZE;
		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
		offset += size;
		size = VCE_V3_0_DATA_SIZE;
		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
	}

	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);