Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6fa7324a authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux into drm-next

Major points for this pull request:
- Add dGPU support for amdkfd initialization code and queue handling. It's
  not complete support since the GPUVM part is missing (the under debate stuff).
- Enable PCIe atomics for dGPU if present
- Various adjustments to the amdgpu<-->amdkfd interface for dGPUs
- Refactor IOMMUv2 code to allow loading amdkfd without IOMMUv2 in the system
- Add HSA process eviction code in case of system memory pressure
- Various fixes and small changes

* tag 'drm-amdkfd-next-2018-03-11' of git://people.freedesktop.org/~gabbayo/linux: (24 commits)
  uapi: Fix type used in ioctl parameter structures
  drm/amdkfd: Implement KFD process eviction/restore
  drm/amdkfd: Add GPUVM virtual address space to PDD
  drm/amdkfd: Remove unaligned memory access
  drm/amdkfd: Centralize IOMMUv2 code and make it conditional
  drm/amdgpu: Add submit IB function for KFD
  drm/amdgpu: Add GPUVM memory management functions for KFD
  drm/amdgpu: add amdgpu_sync_clone
  drm/amdgpu: Update kgd2kfd_shared_resources for dGPU support
  drm/amdgpu: Add KFD eviction fence
  drm/amdgpu: Remove unused kfd2kgd interface
  drm/amdgpu: Fix wrong mask in get_atc_vmid_pasid_mapping_pasid
  drm/amdgpu: Fix header file dependencies
  drm/amdgpu: Replace kgd_mem with amdgpu_bo for kernel pinned gtt mem
  drm/amdgpu: remove useless BUG_ONs
  drm/amdgpu: Enable KFD initialization on dGPUs
  drm/amdkfd: Add dGPU device IDs and device info
  drm/amdkfd: Add dGPU support to kernel_queue_init
  drm/amdkfd: Add dGPU support to the MQD manager
  drm/amdkfd: Add dGPU support to the device queue manager
  ...
parents 0b8eeac5 a1102445
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -766,6 +766,8 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
F:	drivers/gpu/drm/amd/amdkfd/
F:	drivers/gpu/drm/amd/include/cik_structs.h
F:	drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+1 −0
Original line number Diff line number Diff line
@@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref)

	trace_dma_fence_destroy(fence);

	/* Failed to signal before release, could be a refcounting issue */
	WARN_ON(!list_empty(&fence->cb_list));

	if (fence->ops->release)
+2 −0
Original line number Diff line number Diff line
@@ -129,6 +129,8 @@ amdgpu-y += \
# add amdkfd interfaces
amdgpu-y += \
	 amdgpu_amdkfd.o \
	 amdgpu_amdkfd_fence.o \
	 amdgpu_amdkfd_gpuvm.o \
	 amdgpu_amdkfd_gfx_v8.o

# add cgs
+101 −31
Original line number Diff line number Diff line
@@ -30,6 +30,8 @@
const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);

static const unsigned int compute_vmid_bitmap = 0xFF00;

int amdgpu_amdkfd_init(void)
{
	int ret;
@@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void)
#else
	ret = -ENOENT;
#endif
	amdgpu_amdkfd_gpuvm_init_mem_limits();

	return ret;
}
@@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
	switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
	case CHIP_KAVERI:
	case CHIP_HAWAII:
		kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
		break;
#endif
	case CHIP_CARRIZO:
	case CHIP_TONGA:
	case CHIP_FIJI:
	case CHIP_POLARIS10:
	case CHIP_POLARIS11:
		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
		break;
	default:
@@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
	int last_valid_bit;
	if (adev->kfd) {
		struct kgd2kfd_shared_resources gpu_resources = {
			.compute_vmid_bitmap = 0xFF00,
			.compute_vmid_bitmap = compute_vmid_bitmap,
			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
			.gpuvm_size = min(adev->vm_manager.max_pfn
					  << AMDGPU_GPU_PAGE_SHIFT,
					  AMDGPU_VA_HOLE_START),
			.drm_render_minor = adev->ddev->render->index
		};

		/* this is going to have a few of the MSBs set that we need to
@@ -204,19 +216,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
			void **cpu_ptr)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
	struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
	struct amdgpu_bo *bo = NULL;
	int r;

	BUG_ON(kgd == NULL);
	BUG_ON(gpu_addr == NULL);
	BUG_ON(cpu_ptr == NULL);

	*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
	if ((*mem) == NULL)
		return -ENOMEM;
	uint64_t gpu_addr_tmp = 0;
	void *cpu_ptr_tmp = NULL;

	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
			AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &bo);
	if (r) {
		dev_err(adev->dev,
			"failed to allocate BO for amdkfd (%d)\n", r);
@@ -224,54 +230,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
	}

	/* map the buffer */
	r = amdgpu_bo_reserve((*mem)->bo, true);
	r = amdgpu_bo_reserve(bo, true);
	if (r) {
		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
		goto allocate_mem_reserve_bo_failed;
	}

	r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
				&(*mem)->gpu_addr);
	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
				&gpu_addr_tmp);
	if (r) {
		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
		goto allocate_mem_pin_bo_failed;
	}
	*gpu_addr = (*mem)->gpu_addr;

	r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
	if (r) {
		dev_err(adev->dev,
			"(%d) failed to map bo to kernel for amdkfd\n", r);
		goto allocate_mem_kmap_bo_failed;
	}
	*cpu_ptr = (*mem)->cpu_ptr;

	amdgpu_bo_unreserve((*mem)->bo);
	*mem_obj = bo;
	*gpu_addr = gpu_addr_tmp;
	*cpu_ptr = cpu_ptr_tmp;

	amdgpu_bo_unreserve(bo);

	return 0;

allocate_mem_kmap_bo_failed:
	amdgpu_bo_unpin((*mem)->bo);
	amdgpu_bo_unpin(bo);
allocate_mem_pin_bo_failed:
	amdgpu_bo_unreserve((*mem)->bo);
	amdgpu_bo_unreserve(bo);
allocate_mem_reserve_bo_failed:
	amdgpu_bo_unref(&(*mem)->bo);
	amdgpu_bo_unref(&bo);

	return r;
}

void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
{
	struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
	struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;

	BUG_ON(mem == NULL);

	amdgpu_bo_reserve(mem->bo, true);
	amdgpu_bo_kunmap(mem->bo);
	amdgpu_bo_unpin(mem->bo);
	amdgpu_bo_unreserve(mem->bo);
	amdgpu_bo_unref(&(mem->bo));
	kfree(mem);
	amdgpu_bo_reserve(bo, true);
	amdgpu_bo_kunmap(bo);
	amdgpu_bo_unpin(bo);
	amdgpu_bo_unreserve(bo);
	amdgpu_bo_unref(&(bo));
}

void get_local_mem_info(struct kgd_dev *kgd,
@@ -361,3 +366,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)

	return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
}

int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
				uint32_t vmid, uint64_t gpu_addr,
				uint32_t *ib_cmd, uint32_t ib_len)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
	struct amdgpu_job *job;
	struct amdgpu_ib *ib;
	struct amdgpu_ring *ring;
	struct dma_fence *f = NULL;
	int ret;

	switch (engine) {
	case KGD_ENGINE_MEC1:
		ring = &adev->gfx.compute_ring[0];
		break;
	case KGD_ENGINE_SDMA1:
		ring = &adev->sdma.instance[0].ring;
		break;
	case KGD_ENGINE_SDMA2:
		ring = &adev->sdma.instance[1].ring;
		break;
	default:
		pr_err("Invalid engine in IB submission: %d\n", engine);
		ret = -EINVAL;
		goto err;
	}

	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
	if (ret)
		goto err;

	ib = &job->ibs[0];
	memset(ib, 0, sizeof(struct amdgpu_ib));

	ib->gpu_addr = gpu_addr;
	ib->ptr = ib_cmd;
	ib->length_dw = ib_len;
	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
	job->vmid = vmid;

	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
	if (ret) {
		DRM_ERROR("amdgpu: failed to schedule IB.\n");
		goto err_ib_sched;
	}

	ret = dma_fence_wait(f, false);

err_ib_sched:
	dma_fence_put(f);
	amdgpu_job_free(job);
err:
	return ret;
}

bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
{
	if (adev->kfd) {
		if ((1 << vmid) & compute_vmid_bitmap)
			return true;
	}

	return false;
}
+110 −2
Original line number Diff line number Diff line
@@ -28,13 +28,89 @@
#include <linux/types.h>
#include <linux/mmu_context.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"

extern const struct kgd2kfd_calls *kgd2kfd;

struct amdgpu_device;

struct kfd_bo_va_list {
	struct list_head bo_list;
	struct amdgpu_bo_va *bo_va;
	void *kgd_dev;
	bool is_mapped;
	uint64_t va;
	uint64_t pte_flags;
};

struct kgd_mem {
	struct mutex lock;
	struct amdgpu_bo *bo;
	uint64_t gpu_addr;
	void *cpu_ptr;
	struct list_head bo_va_list;
	/* protected by amdkfd_process_info.lock */
	struct ttm_validate_buffer validate_list;
	struct ttm_validate_buffer resv_list;
	uint32_t domain;
	unsigned int mapped_to_gpu_memory;
	uint64_t va;

	uint32_t mapping_flags;

	struct amdkfd_process_info *process_info;

	struct amdgpu_sync sync;

	bool aql_queue;
};

/* KFD Memory Eviction */
struct amdgpu_amdkfd_fence {
	struct dma_fence base;
	struct mm_struct *mm;
	spinlock_t lock;
	char timeline_name[TASK_COMM_LEN];
};

struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
						       struct mm_struct *mm);
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);

struct amdkfd_process_info {
	/* List head of all VMs that belong to a KFD process */
	struct list_head vm_list_head;
	/* List head for all KFD BOs that belong to a KFD process. */
	struct list_head kfd_bo_list;
	/* Lock to protect kfd_bo_list */
	struct mutex lock;

	/* Number of VMs */
	unsigned int n_vms;
	/* Eviction Fence */
	struct amdgpu_amdkfd_fence *eviction_fence;
};

/* struct amdkfd_vm -
 * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
 * belonging to a KFD process. All the VMs belonging to the same process point
 * to the same amdkfd_process_info.
 */
struct amdkfd_vm {
	/* Keep base as the first parameter for pointer compatibility between
	 * amdkfd_vm and amdgpu_vm.
	 */
	struct amdgpu_vm base;

	/* List node in amdkfd_process_info.vm_list_head*/
	struct list_head vm_list_node;

	struct amdgpu_device *adev;
	/* Points to the KFD process VM info*/
	struct amdkfd_process_info *process_info;

	uint64_t pd_phys_addr;
};

int amdgpu_amdkfd_init(void);
@@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);

int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
				uint32_t vmid, uint64_t gpu_addr,
				uint32_t *ib_cmd, uint32_t ib_len);

struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);

bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);

/* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
			void **mem_obj, uint64_t *gpu_addr,
@@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
		valid;							\
	})

/* GPUVM API */
int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
					  void **process_info,
					  struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
		struct kgd_dev *kgd, uint64_t va, uint64_t size,
		void *vm, struct kgd_mem **mem,
		uint64_t *offset, uint32_t flags);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
		struct kgd_dev *kgd, struct kgd_mem *mem);
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
int amdgpu_amdkfd_gpuvm_sync_memory(
		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
		struct kgd_mem *mem, void **kptr, uint64_t *size);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
					    struct dma_fence **ef);

void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);

#endif /* AMDGPU_AMDKFD_H_INCLUDED */
Loading