Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4f4d89af authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-amdkfd-next-2015-01-09' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Add support for SDMA usermode queues
- Replace logic of sub-allocating from GART buffer in amdkfd. Instead
  of using radeon_sa module, use a new module that is more suited for
  this purpose
- Add the number of watch points to amdkfd topology
- Split a function that did two things into two seperate functions.

* tag 'drm-amdkfd-next-2015-01-09' of git://people.freedesktop.org/~gabbayo/linux:
  drm/amd: Remove old radeon_sa funcs from kfd-->kgd interface
  drm/radeon: Remove old radeon_sa usage from kfd-->kgd interface
  drm/amdkfd: Using new gtt sa in amdkfd
  drm/amdkfd: Allocate gart memory using new interface
  drm/amdkfd: Fixed calculation of gart buffer size
  drm/amdkfd: Add kfd gtt sub-allocator functions
  drm/amdkfd: Add gtt sa related data to kfd_dev struct
  drm/radeon: Impl. new gtt allocate/free functions
  drm/amd: Add new kfd-->kgd interface for gart usage
  drm/radeon: Enable sdma preemption
  drm/amdkfd: Pass queue type to pqm_create_queue()
  drm/amdkfd: Identify SDMA queue in create queue ioctl
  drm/amdkfd: Add SDMA user-mode queues support to QCM
  drm/amdkfd: Add SDMA mqd support
  drm/radeon: Implement SDMA interface functions
  drm/amd: Add SDMA functions to kfd-->kgd interface
  drm/amdkfd: Process-device data creation and lookup split
  drm/amdkfd: Add number of watch points to topology
parents 426959c9 6bbcde98
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -192,6 +192,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
		q_properties->type = KFD_QUEUE_TYPE_SDMA;
	else
		return -ENOTSUPP;

@@ -258,8 +260,8 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
			p->pasid,
			dev->id);

	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0,
				KFD_QUEUE_TYPE_COMPUTE, &queue_id);
	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties,
				0, q_properties.type, &queue_id);
	if (err != 0)
		goto err_create_queue;

+220 −5
Original line number Diff line number Diff line
@@ -26,12 +26,14 @@
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers.h"

#define MQD_SIZE_ALIGNED 768

static const struct kfd_device_info kaveri_device_info = {
	.max_pasid_bits = 16,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED
};

@@ -66,6 +68,10 @@ static const struct kfd_deviceid supported_devices[] = {
	{ 0x131D, &kaveri_device_info },	/* Kaveri */
};

static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
				unsigned int chunk_size);
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);

static const struct kfd_device_info *lookup_device_info(unsigned short did)
{
	size_t i;
@@ -173,16 +179,39 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
		max_num_of_queues_per_process *
		kfd->device_info->mqd_size_aligned;

	/* add another 512KB for all other allocations on gart */
	/*
	 * calculate max size of runlist packet.
	 * There can be only 2 packets at once
	 */
	size += (max_num_of_processes * sizeof(struct pm4_map_process) +
		max_num_of_processes * max_num_of_queues_per_process *
		sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;

	/* Add size of HIQ & DIQ */
	size += KFD_KERNEL_QUEUE_SIZE * 2;

	/* add another 512KB for all other allocations on gart (HPD, fences) */
	size += 512 * 1024;

	if (kfd2kgd->init_sa_manager(kfd->kgd, size)) {
	if (kfd2kgd->init_gtt_mem_allocation(kfd->kgd, size, &kfd->gtt_mem,
			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)) {
		dev_err(kfd_device,
			"Error initializing sa manager for device (%x:%x)\n",
			kfd->pdev->vendor, kfd->pdev->device);
			"Could not allocate %d bytes for device (%x:%x)\n",
			size, kfd->pdev->vendor, kfd->pdev->device);
		goto out;
	}

	dev_info(kfd_device,
		"Allocated %d bytes on gart for device(%x:%x)\n",
		size, kfd->pdev->vendor, kfd->pdev->device);

	/* Initialize GTT sa with 512 byte chunk size */
	if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
		dev_err(kfd_device,
			"Error initializing gtt sub-allocator\n");
		goto kfd_gtt_sa_init_error;
	}

	kfd_doorbell_init(kfd);

	if (kfd_topology_add_device(kfd) != 0) {
@@ -241,7 +270,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_interrupt_error:
	kfd_topology_remove_device(kfd);
kfd_topology_add_device_error:
	kfd2kgd->fini_sa_manager(kfd->kgd);
	kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
	kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
	dev_err(kfd_device,
		"device (%x:%x) NOT added due to errors\n",
		kfd->pdev->vendor, kfd->pdev->device);
@@ -256,6 +287,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
		amd_iommu_free_device(kfd->pdev);
		kfd_interrupt_exit(kfd);
		kfd_topology_remove_device(kfd);
		kfd_gtt_sa_fini(kfd);
		kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
	}

	kfree(kfd);
@@ -306,3 +339,185 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
		spin_unlock(&kfd->interrupt_lock);
	}
}

static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
				unsigned int chunk_size)
{
	unsigned int num_of_bits;

	BUG_ON(!kfd);
	BUG_ON(!kfd->gtt_mem);
	BUG_ON(buf_size < chunk_size);
	BUG_ON(buf_size == 0);
	BUG_ON(chunk_size == 0);

	kfd->gtt_sa_chunk_size = chunk_size;
	kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;

	num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE;
	BUG_ON(num_of_bits == 0);

	kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL);

	if (!kfd->gtt_sa_bitmap)
		return -ENOMEM;

	pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
			kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);

	mutex_init(&kfd->gtt_sa_lock);

	return 0;

}

static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
{
	mutex_destroy(&kfd->gtt_sa_lock);
	kfree(kfd->gtt_sa_bitmap);
}

static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
						unsigned int bit_num,
						unsigned int chunk_size)
{
	return start_addr + bit_num * chunk_size;
}

static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
						unsigned int bit_num,
						unsigned int chunk_size)
{
	return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
}

int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
			struct kfd_mem_obj **mem_obj)
{
	unsigned int found, start_search, cur_size;

	BUG_ON(!kfd);

	if (size == 0)
		return -EINVAL;

	if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
		return -ENOMEM;

	*mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
	if ((*mem_obj) == NULL)
		return -ENOMEM;

	pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size);

	start_search = 0;

	mutex_lock(&kfd->gtt_sa_lock);

kfd_gtt_restart_search:
	/* Find the first chunk that is free */
	found = find_next_zero_bit(kfd->gtt_sa_bitmap,
					kfd->gtt_sa_num_of_chunks,
					start_search);

	pr_debug("kfd: found = %d\n", found);

	/* If there wasn't any free chunk, bail out */
	if (found == kfd->gtt_sa_num_of_chunks)
		goto kfd_gtt_no_free_chunk;

	/* Update fields of mem_obj */
	(*mem_obj)->range_start = found;
	(*mem_obj)->range_end = found;
	(*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
					kfd->gtt_start_gpu_addr,
					found,
					kfd->gtt_sa_chunk_size);
	(*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
					kfd->gtt_start_cpu_ptr,
					found,
					kfd->gtt_sa_chunk_size);

	pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n",
			(uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);

	/* If we need only one chunk, mark it as allocated and get out */
	if (size <= kfd->gtt_sa_chunk_size) {
		pr_debug("kfd: single bit\n");
		set_bit(found, kfd->gtt_sa_bitmap);
		goto kfd_gtt_out;
	}

	/* Otherwise, try to see if we have enough contiguous chunks */
	cur_size = size - kfd->gtt_sa_chunk_size;
	do {
		(*mem_obj)->range_end =
			find_next_zero_bit(kfd->gtt_sa_bitmap,
					kfd->gtt_sa_num_of_chunks, ++found);
		/*
		 * If next free chunk is not contiguous than we need to
		 * restart our search from the last free chunk we found (which
		 * wasn't contiguous to the previous ones
		 */
		if ((*mem_obj)->range_end != found) {
			start_search = found;
			goto kfd_gtt_restart_search;
		}

		/*
		 * If we reached end of buffer, bail out with error
		 */
		if (found == kfd->gtt_sa_num_of_chunks)
			goto kfd_gtt_no_free_chunk;

		/* Check if we don't need another chunk */
		if (cur_size <= kfd->gtt_sa_chunk_size)
			cur_size = 0;
		else
			cur_size -= kfd->gtt_sa_chunk_size;

	} while (cur_size > 0);

	pr_debug("kfd: range_start = %d, range_end = %d\n",
		(*mem_obj)->range_start, (*mem_obj)->range_end);

	/* Mark the chunks as allocated */
	for (found = (*mem_obj)->range_start;
		found <= (*mem_obj)->range_end;
		found++)
		set_bit(found, kfd->gtt_sa_bitmap);

kfd_gtt_out:
	mutex_unlock(&kfd->gtt_sa_lock);
	return 0;

kfd_gtt_no_free_chunk:
	pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj);
	mutex_unlock(&kfd->gtt_sa_lock);
	kfree(mem_obj);
	return -ENOMEM;
}

int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
{
	unsigned int bit;

	BUG_ON(!kfd);
	BUG_ON(!mem_obj);

	pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
			mem_obj, mem_obj->range_start, mem_obj->range_end);

	mutex_lock(&kfd->gtt_sa_lock);

	/* Mark the chunks as free */
	for (bit = mem_obj->range_start;
		bit <= mem_obj->range_end;
		bit++)
		clear_bit(bit, kfd->gtt_sa_bitmap);

	mutex_unlock(&kfd->gtt_sa_lock);

	kfree(mem_obj);
	return 0;
}
+155 −28
Original line number Diff line number Diff line
@@ -46,9 +46,24 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd);

static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);

static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd);

static void deallocate_sdma_queue(struct device_queue_manager *dqm,
				unsigned int sdma_queue_id);

static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
{
	if (type == KFD_QUEUE_TYPE_SDMA)
		return KFD_MQD_TYPE_CIK_SDMA;
	return KFD_MQD_TYPE_CIK_CP;
}

static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
{
@@ -75,7 +90,6 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
	nybble = (pdd->lds_base >> 60) & 0x0E;

	return nybble;

}

static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
@@ -190,7 +204,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
	*allocated_vmid = qpd->vmid;
	q->properties.vmid = qpd->vmid;

	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
		retval = create_compute_queue_nocpsch(dqm, q, qpd);
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		retval = create_sdma_queue_nocpsch(dqm, q, qpd);

	if (retval != 0) {
		if (list_empty(&qpd->queues_list)) {
@@ -203,7 +220,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,

	list_add(&q->list, &qpd->queues_list);
	dqm->queue_count++;

	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		dqm->sdma_queue_count++;
	mutex_unlock(&dqm->lock);
	return 0;
}
@@ -280,8 +298,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
				struct queue *q)
{
	int retval;
	struct mqd_manager *mqd;

	struct mqd_manager *mqd, *mqd_sdma;
	BUG_ON(!dqm || !q || !q->mqd || !qpd);

	retval = 0;
@@ -295,6 +312,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
		goto out;
	}

	mqd_sdma = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA);
	if (mqd_sdma == NULL) {
		mutex_unlock(&dqm->lock);
		return -ENOMEM;
	}

	retval = mqd->destroy_mqd(mqd, q->mqd,
				KFD_PREEMPT_TYPE_WAVEFRONT,
				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
@@ -303,7 +326,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
	if (retval != 0)
		goto out;

	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
		deallocate_hqd(dqm, q);
	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
		dqm->sdma_queue_count--;
		deallocate_sdma_queue(dqm, q->sdma_id);
	}

	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);

@@ -324,7 +352,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
	BUG_ON(!dqm || !q || !q->mqd);

	mutex_lock(&dqm->lock);
	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
	mqd = dqm->get_mqd_manager(dqm, q->properties.type);
	if (mqd == NULL) {
		mutex_unlock(&dqm->lock);
		return -ENOMEM;
@@ -491,11 +519,8 @@ static int init_pipelines(struct device_queue_manager *dqm,
	 * because it contains no data when there are no active queues.
	 */

	err = kfd2kgd->allocate_mem(dqm->dev->kgd,
				CIK_HPD_EOP_BYTES * pipes_num,
				PAGE_SIZE,
				KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
				(struct kgd_mem **) &dqm->pipeline_mem);
	err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num,
					&dqm->pipeline_mem);

	if (err) {
		pr_err("kfd: error allocate vidmem num pipes: %d\n",
@@ -510,8 +535,7 @@ static int init_pipelines(struct device_queue_manager *dqm,

	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
	if (mqd == NULL) {
		kfd2kgd->free_mem(dqm->dev->kgd,
				(struct kgd_mem *) dqm->pipeline_mem);
		kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
		return -ENOMEM;
	}

@@ -527,7 +551,6 @@ static int init_pipelines(struct device_queue_manager *dqm,
	return 0;
}


static int init_scheduler(struct device_queue_manager *dqm)
{
	int retval;
@@ -557,6 +580,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
	mutex_init(&dqm->lock);
	INIT_LIST_HEAD(&dqm->queues);
	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
	dqm->sdma_queue_count = 0;
	dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
					sizeof(unsigned int), GFP_KERNEL);
	if (!dqm->allocated_queues) {
@@ -568,6 +592,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
		dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;

	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;

	init_scheduler(dqm);
	return 0;
@@ -585,8 +610,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm)
	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
		kfree(dqm->mqds[i]);
	mutex_destroy(&dqm->lock);
	kfd2kgd->free_mem(dqm->dev->kgd,
			(struct kgd_mem *) dqm->pipeline_mem);
	kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
}

static int start_nocpsch(struct device_queue_manager *dqm)
@@ -599,6 +623,77 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
	return 0;
}

static int allocate_sdma_queue(struct device_queue_manager *dqm,
				unsigned int *sdma_queue_id)
{
	int bit;

	if (dqm->sdma_bitmap == 0)
		return -ENOMEM;

	bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
				CIK_SDMA_QUEUES);

	clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
	*sdma_queue_id = bit;

	return 0;
}

static void deallocate_sdma_queue(struct device_queue_manager *dqm,
				unsigned int sdma_queue_id)
{
	if (sdma_queue_id < 0 || sdma_queue_id >= CIK_SDMA_QUEUES)
		return;
	set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
}

static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
				struct qcm_process_device *qpd)
{
	uint32_t value = SDMA_ATC;

	if (q->process->is_32bit_user_mode)
		value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
	else
		value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
							qpd_to_pdd(qpd)));
	q->properties.sdma_vm_addr = value;
}

static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd)
{
	struct mqd_manager *mqd;
	int retval;

	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA);
	if (!mqd)
		return -ENOMEM;

	retval = allocate_sdma_queue(dqm, &q->sdma_id);
	if (retval != 0)
		return retval;

	q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
	q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;

	pr_debug("kfd: sdma id is:    %d\n", q->sdma_id);
	pr_debug("     sdma queue id: %d\n", q->properties.sdma_queue_id);
	pr_debug("     sdma engine id: %d\n", q->properties.sdma_engine_id);

	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
				&q->gart_mqd_addr, &q->properties);
	if (retval != 0) {
		deallocate_sdma_queue(dqm, q->sdma_id);
		return retval;
	}

	init_sdma_vm(dqm, q, qpd);
	return 0;
}

/*
 * Device Queue Manager implementation for cp scheduler
 */
@@ -640,6 +735,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
	mutex_init(&dqm->lock);
	INIT_LIST_HEAD(&dqm->queues);
	dqm->queue_count = dqm->processes_count = 0;
	dqm->sdma_queue_count = 0;
	dqm->active_runlist = false;
	retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
	if (retval != 0)
@@ -672,18 +768,14 @@ static int start_cpsch(struct device_queue_manager *dqm)
	pr_debug("kfd: allocating fence memory\n");

	/* allocate fence memory on the gart */
	retval = kfd2kgd->allocate_mem(dqm->dev->kgd,
					sizeof(*dqm->fence_addr),
					32,
					KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
					(struct kgd_mem **) &dqm->fence_mem);
	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
					&dqm->fence_mem);

	if (retval != 0)
		goto fail_allocate_vidmem;

	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;

	list_for_each_entry(node, &dqm->queues, list)
		if (node->qpd->pqm->process && dqm->dev)
			kfd_bind_process_to_device(dqm->dev,
@@ -712,8 +804,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
		pdd = qpd_to_pdd(node->qpd);
		pdd->bound = false;
	}
	kfd2kgd->free_mem(dqm->dev->kgd,
			(struct kgd_mem *) dqm->fence_mem);
	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
	pm_uninit(&dqm->packets);

	return 0;
@@ -754,6 +845,14 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
	mutex_unlock(&dqm->lock);
}

static void select_sdma_engine_id(struct queue *q)
{
	static int sdma_id;

	q->sdma_id = sdma_id;
	sdma_id = (sdma_id + 1) % 2;
}

static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
			struct qcm_process_device *qpd, int *allocate_vmid)
{
@@ -769,7 +868,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,

	mutex_lock(&dqm->lock);

	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		select_sdma_engine_id(q);

	mqd = dqm->get_mqd_manager(dqm,
			get_mqd_type_from_queue_type(q->properties.type));

	if (mqd == NULL) {
		mutex_unlock(&dqm->lock);
		return -ENOMEM;
@@ -786,6 +890,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
		retval = execute_queues_cpsch(dqm, false);
	}

	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
			dqm->sdma_queue_count++;

out:
	mutex_unlock(&dqm->lock);
	return retval;
@@ -809,6 +916,14 @@ static int fence_wait_timeout(unsigned int *fence_addr,
	return 0;
}

static int destroy_sdma_queues(struct device_queue_manager *dqm,
				unsigned int sdma_engine)
{
	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
			sdma_engine);
}

static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
{
	int retval;
@@ -821,6 +936,15 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
		mutex_lock(&dqm->lock);
	if (dqm->active_runlist == false)
		goto out;

	pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
		dqm->sdma_queue_count);

	if (dqm->sdma_queue_count > 0) {
		destroy_sdma_queues(dqm, 0);
		destroy_sdma_queues(dqm, 1);
	}

	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
	if (retval != 0)
@@ -892,13 +1016,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,

	/* remove queue from list to prevent rescheduling after preemption */
	mutex_lock(&dqm->lock);

	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
	mqd = dqm->get_mqd_manager(dqm,
			get_mqd_type_from_queue_type(q->properties.type));
	if (!mqd) {
		retval = -ENOMEM;
		goto failed;
	}

	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		dqm->sdma_queue_count--;

	list_del(&q->list);
	dqm->queue_count--;

+5 −0
Original line number Diff line number Diff line
@@ -36,6 +36,9 @@
#define KFD_VMID_START_OFFSET			(8)
#define VMID_PER_DEVICE				CIK_VMID_NUM
#define KFD_DQM_FIRST_PIPE			(0)
#define CIK_SDMA_QUEUES				(4)
#define CIK_SDMA_QUEUES_PER_ENGINE		(2)
#define CIK_SDMA_ENGINE_NUM			(2)

struct device_process_node {
	struct qcm_process_device *qpd;
@@ -130,8 +133,10 @@ struct device_queue_manager {
	struct list_head	queues;
	unsigned int		processes_count;
	unsigned int		queue_count;
	unsigned int		sdma_queue_count;
	unsigned int		next_pipe_to_allocate;
	unsigned int		*allocated_queues;
	unsigned int		sdma_bitmap;
	unsigned int		vmid_bitmap;
	uint64_t		pipelines_addr;
	struct kfd_mem_obj	*pipeline_mem;
+0 −4
Original line number Diff line number Diff line
@@ -137,10 +137,6 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
	if (dev == NULL)
		return -EINVAL;

	/* Find if pdd exists for combination of process and gpu id */
	if (!kfd_get_process_device_data(dev, process, 0))
		return -EINVAL;

	/* Calculate physical address of doorbell */
	address = kfd_get_process_doorbells(dev, process);

Loading