Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b8cbab04 authored by Oded Gabbay's avatar Oded Gabbay
Browse files

drm/amdkfd: Allow user to limit only queues per device



This patch replaces the two current amdkfd module parameters with a new one.

The current parameters that are being replaced are:

- Maximum number of HSA processes
- Maximum number of queues per process

The new parameter that replaces them is called "Maximum queues per device"

This replacement achieves two goals:

- Allows the user to have as many HSA processes as it wants (until
  a maximum of 512 HSA processes in Kaveri).

- Removes the limitation the user had on maximum number of queues per HSA
  process. E.g. the user can now have processes which only have one queue and
  other processes which have hundreds of queues, while before the user
  couldn't have more than 128 queues per process (as default).

The default value of the new parameter is 4096 (32 * 128, which were the
defaults of the old parameters). There is almost no additional GART memory
required for the default case. As a reminder, this amount of queues requires a
little bit below 4MB of GART memory.

v2:
In addition, This patch defines a new counter for queues accounting in the DQM
structure. This is done because the current counter only counts active queues
which allows the user to create more queues than the
max_num_of_queues_per_device module parameter allows.

However, we need the current counter for the runlist packet build process, so
the solution is to have a dedicated counter for this accounting.

Signed-off-by: default avatarOded Gabbay <oded.gabbay@amd.com>
Reviewed-by: default avatarBen Goz <ben.goz@amd.com>
parent f046bfdf
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers.h"

#define MQD_SIZE_ALIGNED 768

@@ -169,8 +170,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
	kfd->shared_resources = *gpu_resources;

	/* calculate max size of mqds needed for queues */
	size = max_num_of_processes *
		max_num_of_queues_per_process *
	size = max_num_of_queues_per_device *
			kfd->device_info->mqd_size_aligned;

	/* add another 512KB for all other allocations on gart */
+70 −0
Original line number Diff line number Diff line
@@ -183,6 +183,13 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,

	mutex_lock(&dqm->lock);

	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
				dqm->total_queue_count);
		mutex_unlock(&dqm->lock);
		return -EPERM;
	}

	if (list_empty(&qpd->queues_list)) {
		retval = allocate_vmid(dqm, qpd, q);
		if (retval != 0) {
@@ -207,6 +214,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
	list_add(&q->list, &qpd->queues_list);
	dqm->queue_count++;

	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

	mutex_unlock(&dqm->lock);
	return 0;
}
@@ -326,6 +341,15 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
	if (list_empty(&qpd->queues_list))
		deallocate_vmid(dqm, qpd, q);
	dqm->queue_count--;

	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type
	 */
	dqm->total_queue_count--;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

out:
	mutex_unlock(&dqm->lock);
	return retval;
@@ -752,6 +776,21 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
	pr_debug("kfd: In func %s\n", __func__);

	mutex_lock(&dqm->lock);
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
		pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
				dqm->total_queue_count);
		mutex_unlock(&dqm->lock);
		return -EPERM;
	}

	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

	list_add(&kq->list, &qpd->priv_queue_list);
	dqm->queue_count++;
	qpd->is_debug = true;
@@ -775,6 +814,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
	dqm->queue_count--;
	qpd->is_debug = false;
	execute_queues_cpsch(dqm, false);
	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
	mutex_unlock(&dqm->lock);
}

@@ -793,6 +839,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,

	mutex_lock(&dqm->lock);

	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
				dqm->total_queue_count);
		retval = -EPERM;
		goto out;
	}

	mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
	if (mqd == NULL) {
		mutex_unlock(&dqm->lock);
@@ -810,6 +863,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
		retval = execute_queues_cpsch(dqm, false);
	}

	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;

	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

out:
	mutex_unlock(&dqm->lock);
	return retval;
@@ -930,6 +992,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,

	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);

	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type
	 */
	dqm->total_queue_count--;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

	mutex_unlock(&dqm->lock);

	return 0;
+1 −0
Original line number Diff line number Diff line
@@ -130,6 +130,7 @@ struct device_queue_manager {
	struct list_head	queues;
	unsigned int		processes_count;
	unsigned int		queue_count;
	unsigned int		total_queue_count;
	unsigned int		next_pipe_to_allocate;
	unsigned int		*allocated_queues;
	unsigned int		vmid_bitmap;
+8 −19
Original line number Diff line number Diff line
@@ -50,15 +50,10 @@ module_param(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
	"Kernel cmdline parameter that defines the amdkfd scheduling policy");

int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT;
module_param(max_num_of_processes, int, 0444);
MODULE_PARM_DESC(max_num_of_processes,
	"Kernel cmdline parameter that defines the amdkfd maximum number of supported processes");

int max_num_of_queues_per_process = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT;
module_param(max_num_of_queues_per_process, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_process,
	"Kernel cmdline parameter that defines the amdkfd maximum number of supported queues per process");
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");

bool kgd2kfd_init(unsigned interface_version,
		  const struct kfd2kgd_calls *f2g,
@@ -100,16 +95,10 @@ static int __init kfd_module_init(void)
	}

	/* Verify module parameters */
	if ((max_num_of_processes < 0) ||
		(max_num_of_processes > KFD_MAX_NUM_OF_PROCESSES)) {
		pr_err("kfd: max_num_of_processes must be between 0 to KFD_MAX_NUM_OF_PROCESSES\n");
		return -1;
	}

	if ((max_num_of_queues_per_process < 0) ||
		(max_num_of_queues_per_process >
			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)) {
		pr_err("kfd: max_num_of_queues_per_process must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_PROCESS\n");
	if ((max_num_of_queues_per_device < 0) ||
		(max_num_of_queues_per_device >
			KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
		pr_err("kfd: max_num_of_queues_per_device must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
		return -1;
	}

+1 −1
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ static DEFINE_MUTEX(pasid_mutex);

int kfd_pasid_init(void)
{
	pasid_limit = max_num_of_processes;
	pasid_limit = KFD_MAX_NUM_OF_PROCESSES;

	pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
	if (!pasid_bitmap)
Loading