Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2bb795f5 authored by James Zhu's avatar James Zhu Committed by Alex Deucher
Browse files

drm/amdgpu/vg20:Restruct uvd to support multiple uvds



Vega20 has dual-UVD. Need Restruct amdgpu_device::uvd to support
multiple uvds. There are no any logical changes here.

Signed-off-by: default avatarJames Zhu <James.Zhu@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 602ed6c6
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -376,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
	struct amdgpu_device *adev = ring->adev;
	uint64_t index;

	if (ring != &adev->uvd.ring) {
	if (ring != &adev->uvd.inst->ring) {
		ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
		ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
	} else {
		/* put fence directly behind firmware */
		index = ALIGN(adev->uvd.fw->size, 8);
		ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
		ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
		ring->fence_drv.cpu_addr = adev->uvd.inst->cpu_addr + index;
		ring->fence_drv.gpu_addr = adev->uvd.inst->gpu_addr + index;
	}
	amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
	amdgpu_irq_get(adev, irq_src, irq_type);
+2 −2
Original line number Diff line number Diff line
@@ -348,7 +348,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
			break;
		case AMDGPU_HW_IP_UVD:
			type = AMD_IP_BLOCK_TYPE_UVD;
			ring_mask = adev->uvd.ring.ready ? 1 : 0;
			ring_mask = adev->uvd.inst->ring.ready ? 1 : 0;
			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
			ib_size_alignment = 16;
			break;
@@ -362,7 +362,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
		case AMDGPU_HW_IP_UVD_ENC:
			type = AMD_IP_BLOCK_TYPE_UVD;
			for (i = 0; i < adev->uvd.num_enc_rings; i++)
				ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i);
				ring_mask |= ((adev->uvd.inst->ring_enc[i].ready ? 1 : 0) << i);
			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
			ib_size_alignment = 1;
			break;
+2 −2
Original line number Diff line number Diff line
@@ -77,13 +77,13 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
		*out_ring = &adev->sdma.instance[ring].ring;
		break;
	case AMDGPU_HW_IP_UVD:
		*out_ring = &adev->uvd.ring;
		*out_ring = &adev->uvd.inst->ring;
		break;
	case AMDGPU_HW_IP_VCE:
		*out_ring = &adev->vce.ring[ring];
		break;
	case AMDGPU_HW_IP_UVD_ENC:
		*out_ring = &adev->uvd.ring_enc[ring];
		*out_ring = &adev->uvd.inst->ring_enc[ring];
		break;
	case AMDGPU_HW_IP_VCN_DEC:
		*out_ring = &adev->vcn.ring_dec;
+51 −51
Original line number Diff line number Diff line
@@ -129,7 +129,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
	unsigned version_major, version_minor, family_id;
	int i, r;

	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
	INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler);

	switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
@@ -237,16 +237,16 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);

	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
				    AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
				    &adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
				    AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst->vcpu_bo,
				    &adev->uvd.inst->gpu_addr, &adev->uvd.inst->cpu_addr);
	if (r) {
		dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
		return r;
	}

	ring = &adev->uvd.ring;
	ring = &adev->uvd.inst->ring;
	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
	r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity,
	r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity,
				  rq, NULL);
	if (r != 0) {
		DRM_ERROR("Failed setting up UVD run queue.\n");
@@ -254,8 +254,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
	}

	for (i = 0; i < adev->uvd.max_handles; ++i) {
		atomic_set(&adev->uvd.handles[i], 0);
		adev->uvd.filp[i] = NULL;
		atomic_set(&adev->uvd.inst->handles[i], 0);
		adev->uvd.inst->filp[i] = NULL;
	}

	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
@@ -285,18 +285,18 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
	int i;
	kfree(adev->uvd.saved_bo);
	kfree(adev->uvd.inst->saved_bo);

	drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
	drm_sched_entity_fini(&adev->uvd.inst->ring.sched, &adev->uvd.inst->entity);

	amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
			      &adev->uvd.gpu_addr,
			      (void **)&adev->uvd.cpu_addr);
	amdgpu_bo_free_kernel(&adev->uvd.inst->vcpu_bo,
			      &adev->uvd.inst->gpu_addr,
			      (void **)&adev->uvd.inst->cpu_addr);

	amdgpu_ring_fini(&adev->uvd.ring);
	amdgpu_ring_fini(&adev->uvd.inst->ring);

	for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
		amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
		amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);

	release_firmware(adev->uvd.fw);

@@ -309,29 +309,29 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
	void *ptr;
	int i;

	if (adev->uvd.vcpu_bo == NULL)
	if (adev->uvd.inst->vcpu_bo == NULL)
		return 0;

	cancel_delayed_work_sync(&adev->uvd.idle_work);
	cancel_delayed_work_sync(&adev->uvd.inst->idle_work);

	/* only valid for physical mode */
	if (adev->asic_type < CHIP_POLARIS10) {
		for (i = 0; i < adev->uvd.max_handles; ++i)
			if (atomic_read(&adev->uvd.handles[i]))
			if (atomic_read(&adev->uvd.inst->handles[i]))
				break;

		if (i == adev->uvd.max_handles)
			return 0;
	}

	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
	ptr = adev->uvd.cpu_addr;
	size = amdgpu_bo_size(adev->uvd.inst->vcpu_bo);
	ptr = adev->uvd.inst->cpu_addr;

	adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
	if (!adev->uvd.saved_bo)
	adev->uvd.inst->saved_bo = kmalloc(size, GFP_KERNEL);
	if (!adev->uvd.inst->saved_bo)
		return -ENOMEM;

	memcpy_fromio(adev->uvd.saved_bo, ptr, size);
	memcpy_fromio(adev->uvd.inst->saved_bo, ptr, size);

	return 0;
}
@@ -341,16 +341,16 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
	unsigned size;
	void *ptr;

	if (adev->uvd.vcpu_bo == NULL)
	if (adev->uvd.inst->vcpu_bo == NULL)
		return -EINVAL;

	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
	ptr = adev->uvd.cpu_addr;
	size = amdgpu_bo_size(adev->uvd.inst->vcpu_bo);
	ptr = adev->uvd.inst->cpu_addr;

	if (adev->uvd.saved_bo != NULL) {
		memcpy_toio(ptr, adev->uvd.saved_bo, size);
		kfree(adev->uvd.saved_bo);
		adev->uvd.saved_bo = NULL;
	if (adev->uvd.inst->saved_bo != NULL) {
		memcpy_toio(ptr, adev->uvd.inst->saved_bo, size);
		kfree(adev->uvd.inst->saved_bo);
		adev->uvd.inst->saved_bo = NULL;
	} else {
		const struct common_firmware_header *hdr;
		unsigned offset;
@@ -358,14 +358,14 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
		hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
			offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
			memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
			memcpy_toio(adev->uvd.inst->cpu_addr, adev->uvd.fw->data + offset,
				    le32_to_cpu(hdr->ucode_size_bytes));
			size -= le32_to_cpu(hdr->ucode_size_bytes);
			ptr += le32_to_cpu(hdr->ucode_size_bytes);
		}
		memset_io(ptr, 0, size);
		/* to restore uvd fence seq */
		amdgpu_fence_driver_force_completion(&adev->uvd.ring);
		amdgpu_fence_driver_force_completion(&adev->uvd.inst->ring);
	}

	return 0;
@@ -373,12 +373,12 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)

void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
{
	struct amdgpu_ring *ring = &adev->uvd.ring;
	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
	int i, r;

	for (i = 0; i < adev->uvd.max_handles; ++i) {
		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
		if (handle != 0 && adev->uvd.filp[i] == filp) {
		uint32_t handle = atomic_read(&adev->uvd.inst->handles[i]);
		if (handle != 0 && adev->uvd.inst->filp[i] == filp) {
			struct dma_fence *fence;

			r = amdgpu_uvd_get_destroy_msg(ring, handle,
@@ -391,8 +391,8 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
			dma_fence_wait(fence, false);
			dma_fence_put(fence);

			adev->uvd.filp[i] = NULL;
			atomic_set(&adev->uvd.handles[i], 0);
			adev->uvd.inst->filp[i] = NULL;
			atomic_set(&adev->uvd.inst->handles[i], 0);
		}
	}
}
@@ -696,13 +696,13 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,

		/* try to alloc a new handle */
		for (i = 0; i < adev->uvd.max_handles; ++i) {
			if (atomic_read(&adev->uvd.handles[i]) == handle) {
			if (atomic_read(&adev->uvd.inst->handles[i]) == handle) {
				DRM_ERROR("Handle 0x%x already in use!\n", handle);
				return -EINVAL;
			}

			if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
				adev->uvd.filp[i] = ctx->parser->filp;
			if (!atomic_cmpxchg(&adev->uvd.inst->handles[i], 0, handle)) {
				adev->uvd.inst->filp[i] = ctx->parser->filp;
				return 0;
			}
		}
@@ -719,8 +719,8 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,

		/* validate the handle */
		for (i = 0; i < adev->uvd.max_handles; ++i) {
			if (atomic_read(&adev->uvd.handles[i]) == handle) {
				if (adev->uvd.filp[i] != ctx->parser->filp) {
			if (atomic_read(&adev->uvd.inst->handles[i]) == handle) {
				if (adev->uvd.inst->filp[i] != ctx->parser->filp) {
					DRM_ERROR("UVD handle collision detected!\n");
					return -EINVAL;
				}
@@ -734,7 +734,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
	case 2:
		/* it's a destroy msg, free the handle */
		for (i = 0; i < adev->uvd.max_handles; ++i)
			atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
			atomic_cmpxchg(&adev->uvd.inst->handles[i], handle, 0);
		amdgpu_bo_kunmap(bo);
		return 0;

@@ -810,7 +810,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
		}

		if ((cmd == 0 || cmd == 0x3) &&
		    (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) {
		    (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
			DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
				  start, end);
			return -EINVAL;
@@ -1043,7 +1043,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
		if (r)
			goto err_free;

		r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
		r = amdgpu_job_submit(job, ring, &adev->uvd.inst->entity,
				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
		if (r)
			goto err_free;
@@ -1131,8 +1131,8 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
{
	struct amdgpu_device *adev =
		container_of(work, struct amdgpu_device, uvd.idle_work.work);
	unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
		container_of(work, struct amdgpu_device, uvd.inst->idle_work.work);
	unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.inst->ring);

	if (fences == 0) {
		if (adev->pm.dpm_enabled) {
@@ -1146,7 +1146,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
							       AMD_CG_STATE_GATE);
		}
	} else {
		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
		schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
	}
}

@@ -1158,7 +1158,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
	if (amdgpu_sriov_vf(adev))
		return;

	set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
	set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work);
	if (set_clocks) {
		if (adev->pm.dpm_enabled) {
			amdgpu_dpm_enable_uvd(adev, true);
@@ -1175,7 +1175,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
{
	if (!amdgpu_sriov_vf(ring->adev))
		schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
		schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
}

/**
@@ -1237,7 +1237,7 @@ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
		 * necessarily linear. So we need to count
		 * all non-zero handles.
		 */
		if (atomic_read(&adev->uvd.handles[i]))
		if (atomic_read(&adev->uvd.inst->handles[i]))
			used_handles++;
	}

+13 −6
Original line number Diff line number Diff line
@@ -31,30 +31,37 @@
#define AMDGPU_UVD_SESSION_SIZE		(50*1024)
#define AMDGPU_UVD_FIRMWARE_OFFSET	256

#define AMDGPU_MAX_UVD_INSTANCES			2

#define AMDGPU_UVD_FIRMWARE_SIZE(adev)    \
	(AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \
			       8) - AMDGPU_UVD_FIRMWARE_OFFSET)

struct amdgpu_uvd {
struct amdgpu_uvd_inst {
	struct amdgpu_bo	*vcpu_bo;
	void			*cpu_addr;
	uint64_t		gpu_addr;
	unsigned		fw_version;
	void			*saved_bo;
	unsigned		max_handles;
	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
	struct delayed_work	idle_work;
	const struct firmware	*fw;	/* UVD firmware */
	struct amdgpu_ring	ring;
	struct amdgpu_ring	ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
	struct amdgpu_irq_src	irq;
	bool			address_64_bit;
	bool			use_ctx_buf;
	struct drm_sched_entity entity;
	struct drm_sched_entity entity_enc;
	uint32_t                srbm_soft_reset;
};

struct amdgpu_uvd {
	const struct firmware	*fw;	/* UVD firmware */
	unsigned		fw_version;
	unsigned		max_handles;
	unsigned		num_enc_rings;
	uint8_t		num_uvd_inst;
	bool			address_64_bit;
	bool			use_ctx_buf;
	struct amdgpu_uvd_inst		inst[AMDGPU_MAX_UVD_INSTANCES];
};

int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
Loading