Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 373d7080 authored by Felix Kuehling's avatar Felix Kuehling Committed by Oded Gabbay
Browse files

drm/amdkfd: Add CWSR support



This hardware feature allows the GPU to preempt shader execution in
the middle of a compute wave, save the state and restore it later
to resume execution.

Memory for saving the state is allocated per queue in user mode and
the address and size passed to the create_queue ioctl. The size
depends on the number of waves that can be in flight simultaneously
on a given ASIC.

Signed-off-by: default avatarShaoyun.liu <shaoyun.liu@amd.com>
Signed-off-by: default avatarYong Zhao <yong.zhao@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 449fea61
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -117,7 +117,7 @@ static int kfd_open(struct inode *inode, struct file *filep)
		return -EPERM;
	}

	process = kfd_create_process(current);
	process = kfd_create_process(filep);
	if (IS_ERR(process))
		return PTR_ERR(process);

@@ -206,6 +206,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
	q_properties->ctx_save_restore_area_address =
			args->ctx_save_restore_address;
	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
	q_properties->ctl_stack_size = args->ctl_stack_size;
	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
@@ -1088,6 +1089,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
			KFD_MMAP_EVENTS_MASK) {
		vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
		return kfd_event_mmap(process, vma);
	} else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
			KFD_MMAP_RESERVED_MEM_MASK) {
		vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
		return kfd_reserved_mem_mmap(process, vma);
	}

	return -EFAULT;
+18 −2
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
#include "cwsr_trap_handler_gfx8.asm"

#define MQD_SIZE_ALIGNED 768

@@ -38,7 +39,8 @@ static const struct kfd_device_info kaveri_device_info = {
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = false,
};

static const struct kfd_device_info carrizo_device_info = {
@@ -49,7 +51,8 @@ static const struct kfd_device_info carrizo_device_info = {
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.mqd_size_aligned = MQD_SIZE_ALIGNED
	.mqd_size_aligned = MQD_SIZE_ALIGNED,
	.supports_cwsr = true,
};

struct kfd_deviceid {
@@ -212,6 +215,17 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
	return AMD_IOMMU_INV_PRI_RSP_INVALID;
}

static void kfd_cwsr_init(struct kfd_dev *kfd)
{
	if (cwsr_enable && kfd->device_info->supports_cwsr) {
		BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);

		kfd->cwsr_isa = cwsr_trap_gfx8_hex;
		kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
		kfd->cwsr_enabled = true;
	}
}

bool kgd2kfd_device_init(struct kfd_dev *kfd,
			 const struct kgd2kfd_shared_resources *gpu_resources)
{
@@ -286,6 +300,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
		goto device_iommu_pasid_error;
	}

	kfd_cwsr_init(kfd);

	if (kfd_resume(kfd))
		goto kfd_resume_error;

+6 −0
Original line number Diff line number Diff line
@@ -173,6 +173,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
	*allocated_vmid = qpd->vmid;
	q->properties.vmid = qpd->vmid;

	q->properties.tba_addr = qpd->tba_addr;
	q->properties.tma_addr = qpd->tma_addr;

	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
		retval = create_compute_queue_nocpsch(dqm, q, qpd);
	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
@@ -846,6 +849,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
	}

	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);

	q->properties.tba_addr = qpd->tba_addr;
	q->properties.tma_addr = qpd->tma_addr;
	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
				&q->gart_mqd_addr, &q->properties);
	if (retval)
+4 −0
Original line number Diff line number Diff line
@@ -50,6 +50,10 @@ module_param(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
	"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");

int cwsr_enable = 1;
module_param(cwsr_enable, int, 0444);
MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");

int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
+27 −0
Original line number Diff line number Diff line
@@ -89,6 +89,28 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
	if (q->format == KFD_QUEUE_FORMAT_AQL)
		m->cp_hqd_iq_rptr = 1;

	if (q->tba_addr) {
		m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
		m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
		m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
		m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
		m->compute_pgm_rsrc2 |=
			(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
	}

	if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
		m->cp_hqd_persistent_state |=
			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
		m->cp_hqd_ctx_save_base_addr_lo =
			lower_32_bits(q->ctx_save_restore_area_address);
		m->cp_hqd_ctx_save_base_addr_hi =
			upper_32_bits(q->ctx_save_restore_area_address);
		m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
	}

	*mqd = m;
	if (gart_addr)
		*gart_addr = addr;
@@ -167,6 +189,11 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
	}

	if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
		m->cp_hqd_ctx_save_control =
			atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
			mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;

	q->is_active = (q->queue_size > 0 &&
			q->queue_address != 0 &&
			q->queue_percent > 0);
Loading