drm/amdgpu: add initial vcn support and decode tests (95d0906f) · Commits · e / devices / android_kernel_xiaomi_nabu

drivers/gpu/drm/amd/amdgpu/Makefile

+4 −0

Original line number	Diff line number	Diff line
		@@ -93,6 +93,10 @@ amdgpu-y += \
		vce_v3_0.o \
		vce_v4_0.o

		# add VCN block
		amdgpu-y += \
		amdgpu_vcn.o

		# add amdkfd interfaces
		amdgpu-y += \
		amdgpu_amdkfd.o \

drivers/gpu/drm/amd/amdgpu/amdgpu.h

+36 −4

Original line number	Diff line number	Diff line
		@@ -1180,6 +1180,31 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb);

		void amdgpu_get_pcie_info(struct amdgpu_device *adev);

		/*
		* VCN
		*/
		#define AMDGPU_VCN_STACK_SIZE (200*1024)
		#define AMDGPU_VCN_HEAP_SIZE (256*1024)
		#define AMDGPU_VCN_SESSION_SIZE (50*1024)
		#define AMDGPU_VCN_FIRMWARE_OFFSET 256
		#define AMDGPU_VCN_MAX_ENC_RINGS 3

		struct amdgpu_vcn {
		struct amdgpu_bo *vcpu_bo;
		void *cpu_addr;
		uint64_t gpu_addr;
		unsigned fw_version;
		void *saved_bo;
		struct delayed_work idle_work;
		const struct firmware fw; / VCN firmware */
		struct amdgpu_ring ring_dec;
		struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
		struct amdgpu_irq_src irq;
		struct amd_sched_entity entity_dec;
		struct amd_sched_entity entity_enc;
		uint32_t srbm_soft_reset;
		};

		/*
		* SDMA
		*/
		@@ -1572,11 +1597,18 @@ struct amdgpu_device {
		/* sdma */
		struct amdgpu_sdma sdma;

		union {
		struct {
		/* uvd */
		struct amdgpu_uvd uvd;

		/* vce */
		struct amdgpu_vce vce;
		};

		/* vcn */
		struct amdgpu_vcn vcn;
		};

		/* firmwares */
		struct amdgpu_firmware firmware;

drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c

0 → 100644

+425 −0

Original line number	Diff line number	Diff line
		/*
		* Copyright 2016 Advanced Micro Devices, Inc.
		* All Rights Reserved.
		*
		* Permission is hereby granted, free of charge, to any person obtaining a
		* copy of this software and associated documentation files (the
		* "Software"), to deal in the Software without restriction, including
		* without limitation the rights to use, copy, modify, merge, publish,
		* distribute, sub license, and/or sell copies of the Software, and to
		* permit persons to whom the Software is furnished to do so, subject to
		* the following conditions:
		*
		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
		* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
		* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
		* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
		* USE OR OTHER DEALINGS IN THE SOFTWARE.
		*
		* The above copyright notice and this permission notice (including the
		* next paragraph) shall be included in all copies or substantial portions
		* of the Software.
		*
		*/

		#include <linux/firmware.h>
		#include <linux/module.h>
		#include <drm/drmP.h>
		#include <drm/drm.h>

		#include "amdgpu.h"
		#include "amdgpu_pm.h"
		#include "amdgpu_vcn.h"
		#include "soc15d.h"
		#include "soc15_common.h"

		#include "vega10/soc15ip.h"
		#include "raven1/VCN/vcn_1_0_offset.h"

		/* 1 second timeout */
		#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)

		/* Firmware Names */
		#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"

		MODULE_FIRMWARE(FIRMWARE_RAVEN);

		static void amdgpu_vcn_idle_work_handler(struct work_struct *work);

		int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
		{
		struct amdgpu_ring *ring;
		struct amd_sched_rq *rq;
		unsigned long bo_size;
		const char *fw_name;
		const struct common_firmware_header *hdr;
		unsigned version_major, version_minor, family_id;
		int r;

		INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);

		switch (adev->asic_type) {
		case CHIP_RAVEN:
		fw_name = FIRMWARE_RAVEN;
		break;
		default:
		return -EINVAL;
		}

		r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
		if (r) {
		dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
		fw_name);
		return r;
		}

		r = amdgpu_ucode_validate(adev->vcn.fw);
		if (r) {
		dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
		fw_name);
		release_firmware(adev->vcn.fw);
		adev->vcn.fw = NULL;
		return r;
		}

		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
		family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
		DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
		version_major, version_minor, family_id);


		bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
		+ AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
		+ AMDGPU_VCN_SESSION_SIZE * 40;
		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
		AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
		&adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
		if (r) {
		dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
		return r;
		}

		ring = &adev->vcn.ring_dec;
		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
		r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
		rq, amdgpu_sched_jobs);
		if (r != 0) {
		DRM_ERROR("Failed setting up VCN dec run queue.\n");
		return r;
		}

		return 0;
		}

		int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
		{
		kfree(adev->vcn.saved_bo);

		amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);

		amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
		&adev->vcn.gpu_addr,
		(void **)&adev->vcn.cpu_addr);

		amdgpu_ring_fini(&adev->vcn.ring_dec);

		release_firmware(adev->vcn.fw);

		return 0;
		}

		int amdgpu_vcn_suspend(struct amdgpu_device *adev)
		{
		unsigned size;
		void *ptr;

		if (adev->vcn.vcpu_bo == NULL)
		return 0;

		cancel_delayed_work_sync(&adev->vcn.idle_work);

		size = amdgpu_bo_size(adev->vcn.vcpu_bo);
		ptr = adev->vcn.cpu_addr;

		adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL);
		if (!adev->vcn.saved_bo)
		return -ENOMEM;

		memcpy_fromio(adev->vcn.saved_bo, ptr, size);

		return 0;
		}

		int amdgpu_vcn_resume(struct amdgpu_device *adev)
		{
		unsigned size;
		void *ptr;

		if (adev->vcn.vcpu_bo == NULL)
		return -EINVAL;

		size = amdgpu_bo_size(adev->vcn.vcpu_bo);
		ptr = adev->vcn.cpu_addr;

		if (adev->vcn.saved_bo != NULL) {
		memcpy_toio(ptr, adev->vcn.saved_bo, size);
		kfree(adev->vcn.saved_bo);
		adev->vcn.saved_bo = NULL;
		} else {
		const struct common_firmware_header *hdr;
		unsigned offset;

		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
		offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
		memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
		le32_to_cpu(hdr->ucode_size_bytes));
		size -= le32_to_cpu(hdr->ucode_size_bytes);
		ptr += le32_to_cpu(hdr->ucode_size_bytes);
		memset_io(ptr, 0, size);
		}

		return 0;
		}

		static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring ring, struct amdgpu_bo bo,
		bool direct, struct dma_fence **fence)
		{
		struct ttm_validate_buffer tv;
		struct ww_acquire_ctx ticket;
		struct list_head head;
		struct amdgpu_job *job;
		struct amdgpu_ib *ib;
		struct dma_fence *f = NULL;
		struct amdgpu_device *adev = ring->adev;
		uint64_t addr;
		int i, r;

		memset(&tv, 0, sizeof(tv));
		tv.bo = &bo->tbo;

		INIT_LIST_HEAD(&head);
		list_add(&tv.head, &head);

		r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
		if (r)
		return r;

		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
		if (r)
		goto err;

		r = amdgpu_job_alloc_with_ib(adev, 64, &job);
		if (r)
		goto err;

		ib = &job->ibs[0];
		addr = amdgpu_bo_gpu_offset(bo);
		ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
		ib->ptr[1] = addr;
		ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
		ib->ptr[3] = addr >> 32;
		ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
		ib->ptr[5] = 0;
		for (i = 6; i < 16; i += 2) {
		ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
		ib->ptr[i+1] = 0;
		}
		ib->length_dw = 16;

		if (direct) {
		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
		job->fence = dma_fence_get(f);
		if (r)
		goto err_free;

		amdgpu_job_free(job);
		} else {
		r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec,
		AMDGPU_FENCE_OWNER_UNDEFINED, &f);
		if (r)
		goto err_free;
		}

		ttm_eu_fence_buffer_objects(&ticket, &head, f);

		if (fence)
		*fence = dma_fence_get(f);
		amdgpu_bo_unref(&bo);
		dma_fence_put(f);

		return 0;

		err_free:
		amdgpu_job_free(job);

		err:
		ttm_eu_backoff_reservation(&ticket, &head);
		return r;
		}

		static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
		struct dma_fence **fence)
		{
		struct amdgpu_device *adev = ring->adev;
		struct amdgpu_bo *bo;
		uint32_t *msg;
		int r, i;

		r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
		AMDGPU_GEM_DOMAIN_VRAM,
		AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED \|
		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
		NULL, NULL, &bo);
		if (r)
		return r;

		r = amdgpu_bo_reserve(bo, false);
		if (r) {
		amdgpu_bo_unref(&bo);
		return r;
		}

		r = amdgpu_bo_kmap(bo, (void **)&msg);
		if (r) {
		amdgpu_bo_unreserve(bo);
		amdgpu_bo_unref(&bo);
		return r;
		}

		/* stitch together an vcn create msg */
		msg[0] = cpu_to_le32(0x00000de4);
		msg[1] = cpu_to_le32(0x00000000);
		msg[2] = cpu_to_le32(handle);
		msg[3] = cpu_to_le32(0x00000000);
		msg[4] = cpu_to_le32(0x00000000);
		msg[5] = cpu_to_le32(0x00000000);
		msg[6] = cpu_to_le32(0x00000000);
		msg[7] = cpu_to_le32(0x00000780);
		msg[8] = cpu_to_le32(0x00000440);
		msg[9] = cpu_to_le32(0x00000000);
		msg[10] = cpu_to_le32(0x01b37000);
		for (i = 11; i < 1024; ++i)
		msg[i] = cpu_to_le32(0x0);

		amdgpu_bo_kunmap(bo);
		amdgpu_bo_unreserve(bo);

		return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
		}

		static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
		bool direct, struct dma_fence **fence)
		{
		struct amdgpu_device *adev = ring->adev;
		struct amdgpu_bo *bo;
		uint32_t *msg;
		int r, i;

		r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
		AMDGPU_GEM_DOMAIN_VRAM,
		AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED \|
		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
		NULL, NULL, &bo);
		if (r)
		return r;

		r = amdgpu_bo_reserve(bo, false);
		if (r) {
		amdgpu_bo_unref(&bo);
		return r;
		}

		r = amdgpu_bo_kmap(bo, (void **)&msg);
		if (r) {
		amdgpu_bo_unreserve(bo);
		amdgpu_bo_unref(&bo);
		return r;
		}

		/* stitch together an vcn destroy msg */
		msg[0] = cpu_to_le32(0x00000de4);
		msg[1] = cpu_to_le32(0x00000002);
		msg[2] = cpu_to_le32(handle);
		msg[3] = cpu_to_le32(0x00000000);
		for (i = 4; i < 1024; ++i)
		msg[i] = cpu_to_le32(0x0);

		amdgpu_bo_kunmap(bo);
		amdgpu_bo_unreserve(bo);

		return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
		}

		static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
		{
		struct amdgpu_device *adev =
		container_of(work, struct amdgpu_device, vcn.idle_work.work);
		unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);

		if (fences == 0) {
		if (adev->pm.dpm_enabled) {
		amdgpu_dpm_enable_uvd(adev, false);
		} else {
		amdgpu_asic_set_uvd_clocks(adev, 0, 0);
		}
		} else {
		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
		}
		}

		void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
		{
		struct amdgpu_device *adev = ring->adev;
		bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);

		if (set_clocks) {
		if (adev->pm.dpm_enabled) {
		amdgpu_dpm_enable_uvd(adev, true);
		} else {
		amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
		}
		}
		}

		void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
		{
		schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
		}

		int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
		{
		struct dma_fence *fence;
		long r;

		r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
		if (r) {
		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
		goto error;
		}

		r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence);
		if (r) {
		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
		goto error;
		}

		r = dma_fence_wait_timeout(fence, false, timeout);
		if (r == 0) {
		DRM_ERROR("amdgpu: IB test timed out.\n");
		r = -ETIMEDOUT;
		} else if (r < 0) {
		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
		} else {
		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
		r = 0;
		}

		dma_fence_put(fence);

		error:
		return r;
		}

drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h

0 → 100644

+35 −0

Original line number	Diff line number	Diff line
		/*
		* Copyright 2016 Advanced Micro Devices, Inc.
		*
		* Permission is hereby granted, free of charge, to any person obtaining a
		* copy of this software and associated documentation files (the "Software"),
		* to deal in the Software without restriction, including without limitation
		* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		* and/or sell copies of the Software, and to permit persons to whom the
		* Software is furnished to do so, subject to the following conditions:
		*
		* The above copyright notice and this permission notice shall be included in
		* all copies or substantial portions of the Software.
		*
		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
		* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
		* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
		* OTHER DEALINGS IN THE SOFTWARE.
		*
		*/

		#ifndef __AMDGPU_VCN_H__
		#define __AMDGPU_VCN_H__

		int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
		int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
		int amdgpu_vcn_suspend(struct amdgpu_device *adev);
		int amdgpu_vcn_resume(struct amdgpu_device *adev);
		void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
		void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
		int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);

		#endif