Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 39e7f331 authored by Felix Kuehling's avatar Felix Kuehling Committed by Oded Gabbay
Browse files

drm/amdkfd: Add CU-masking ioctl to KFD



CU-masking allows a KFD client to control the set of CUs used by a
user mode queue for executing compute dispatches. This can be used
for optimizing the partitioning of the GPU and minimize conflicts
between concurrent tasks.

Signed-off-by: default avatarFlora Cui <flora.cui@amd.com>
Signed-off-by: default avatarKent Russell <kent.russell@amd.com>
Signed-off-by: default avatarEric Huang <JinHuiEric.Huang@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent a7fe68a1
Loading
Loading
Loading
Loading
+58 −0
Original line number Original line Diff line number Diff line
@@ -392,6 +392,61 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
	return retval;
	return retval;
}
}


static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
					void *data)
{
	int retval;
	const int max_num_cus = 1024;
	struct kfd_ioctl_set_cu_mask_args *args = data;
	struct queue_properties properties;
	uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
	size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);

	if ((args->num_cu_mask % 32) != 0) {
		pr_debug("num_cu_mask 0x%x must be a multiple of 32",
				args->num_cu_mask);
		return -EINVAL;
	}

	properties.cu_mask_count = args->num_cu_mask;
	if (properties.cu_mask_count == 0) {
		pr_debug("CU mask cannot be 0");
		return -EINVAL;
	}

	/* To prevent an unreasonably large CU mask size, set an arbitrary
	 * limit of max_num_cus bits.  We can then just drop any CU mask bits
	 * past max_num_cus bits and just use the first max_num_cus bits.
	 */
	if (properties.cu_mask_count > max_num_cus) {
		pr_debug("CU mask cannot be greater than 1024 bits");
		properties.cu_mask_count = max_num_cus;
		cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
	}

	properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
	if (!properties.cu_mask)
		return -ENOMEM;

	retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
	if (retval) {
		pr_debug("Could not copy CU mask from userspace");
		kfree(properties.cu_mask);
		return -EFAULT;
	}

	mutex_lock(&p->mutex);

	retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);

	mutex_unlock(&p->mutex);

	if (retval)
		kfree(properties.cu_mask);

	return retval;
}

static int kfd_ioctl_set_memory_policy(struct file *filep,
static int kfd_ioctl_set_memory_policy(struct file *filep,
					struct kfd_process *p, void *data)
					struct kfd_process *p, void *data)
{
{
@@ -1557,6 +1612,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
			kfd_ioctl_unmap_memory_from_gpu, 0),
			kfd_ioctl_unmap_memory_from_gpu, 0),


	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
			kfd_ioctl_set_cu_mask, 0),

};
};


#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
+1 −0
Original line number Original line Diff line number Diff line
@@ -123,6 +123,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
	prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
	prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
	prop.eop_ring_buffer_address = kq->eop_gpu_addr;
	prop.eop_ring_buffer_address = kq->eop_gpu_addr;
	prop.eop_ring_buffer_size = PAGE_SIZE;
	prop.eop_ring_buffer_size = PAGE_SIZE;
	prop.cu_mask = NULL;


	if (init_queue(&kq->queue, &prop) != 0)
	if (init_queue(&kq->queue, &prop) != 0)
		goto err_init_queue;
		goto err_init_queue;
+40 −1
Original line number Original line Diff line number Diff line
@@ -21,7 +21,7 @@
 *
 *
 */
 */


#include "kfd_priv.h"
#include "kfd_mqd_manager.h"


struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
					struct kfd_dev *dev)
					struct kfd_dev *dev)
@@ -48,3 +48,42 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,


	return NULL;
	return NULL;
}
}

void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
		const uint32_t *cu_mask, uint32_t cu_mask_count,
		uint32_t *se_mask)
{
	struct kfd_cu_info cu_info;
	uint32_t cu_per_sh[4] = {0};
	int i, se, cu = 0;

	mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info);

	if (cu_mask_count > cu_info.cu_active_number)
		cu_mask_count = cu_info.cu_active_number;

	for (se = 0; se < cu_info.num_shader_engines; se++)
		for (i = 0; i < 4; i++)
			cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]);

	/* Symmetrically map cu_mask to all SEs:
	 * cu_mask[0] bit0 -> se_mask[0] bit0;
	 * cu_mask[0] bit1 -> se_mask[1] bit0;
	 * ... (if # SE is 4)
	 * cu_mask[0] bit4 -> se_mask[0] bit1;
	 * ...
	 */
	se = 0;
	for (i = 0; i < cu_mask_count; i++) {
		if (cu_mask[i / 32] & (1 << (i % 32)))
			se_mask[se] |= 1 << cu;

		do {
			se++;
			if (se == cu_info.num_shader_engines) {
				se = 0;
				cu++;
			}
		} while (cu >= cu_per_sh[se] && cu < 32);
	}
}
+4 −0
Original line number Original line Diff line number Diff line
@@ -93,4 +93,8 @@ struct mqd_manager {
	struct kfd_dev	*dev;
	struct kfd_dev	*dev;
};
};


void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
		const uint32_t *cu_mask, uint32_t cu_mask_count,
		uint32_t *se_mask);

#endif /* KFD_MQD_MANAGER_H_ */
#endif /* KFD_MQD_MANAGER_H_ */
+27 −0
Original line number Original line Diff line number Diff line
@@ -41,6 +41,31 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
	return (struct cik_sdma_rlc_registers *)mqd;
	return (struct cik_sdma_rlc_registers *)mqd;
}
}


static void update_cu_mask(struct mqd_manager *mm, void *mqd,
			struct queue_properties *q)
{
	struct cik_mqd *m;
	uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */

	if (q->cu_mask_count == 0)
		return;

	mqd_symmetrically_map_cu_mask(mm,
		q->cu_mask, q->cu_mask_count, se_mask);

	m = get_mqd(mqd);
	m->compute_static_thread_mgmt_se0 = se_mask[0];
	m->compute_static_thread_mgmt_se1 = se_mask[1];
	m->compute_static_thread_mgmt_se2 = se_mask[2];
	m->compute_static_thread_mgmt_se3 = se_mask[3];

	pr_debug("Update cu mask to %#x %#x %#x %#x\n",
		m->compute_static_thread_mgmt_se0,
		m->compute_static_thread_mgmt_se1,
		m->compute_static_thread_mgmt_se2,
		m->compute_static_thread_mgmt_se3);
}

static int init_mqd(struct mqd_manager *mm, void **mqd,
static int init_mqd(struct mqd_manager *mm, void **mqd,
		struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
		struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
		struct queue_properties *q)
		struct queue_properties *q)
@@ -196,6 +221,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
	if (q->format == KFD_QUEUE_FORMAT_AQL)
	if (q->format == KFD_QUEUE_FORMAT_AQL)
		m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
		m->cp_hqd_pq_control |= NO_UPDATE_RPTR;


	update_cu_mask(mm, mqd, q);

	q->is_active = (q->queue_size > 0 &&
	q->is_active = (q->queue_size > 0 &&
			q->queue_address != 0 &&
			q->queue_address != 0 &&
			q->queue_percent > 0 &&
			q->queue_percent > 0 &&
Loading