Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ada2b29c authored by Felix Kuehling's avatar Felix Kuehling Committed by Oded Gabbay
Browse files

drm/amdkfd: Make doorbell size ASIC-dependent



This prepares for GFXv9 (Vega10), which has 64-bit doorbells.

Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 642a0e80
Loading
Loading
Loading
Loading
+10 −0
Original line number Original line Diff line number Diff line
@@ -41,6 +41,7 @@ static const struct kfd_device_info kaveri_device_info = {
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	/* max num of queues for KV.TODO should be a dynamic value */
	/* max num of queues for KV.TODO should be a dynamic value */
	.max_no_of_hqd	= 24,
	.max_no_of_hqd	= 24,
	.doorbell_size  = 4,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.num_of_watch_points = 4,
@@ -55,6 +56,7 @@ static const struct kfd_device_info carrizo_device_info = {
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	/* max num of queues for CZ.TODO should be a dynamic value */
	/* max num of queues for CZ.TODO should be a dynamic value */
	.max_no_of_hqd	= 24,
	.max_no_of_hqd	= 24,
	.doorbell_size  = 4,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.num_of_watch_points = 4,
@@ -70,6 +72,7 @@ static const struct kfd_device_info hawaii_device_info = {
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	/* max num of queues for KV.TODO should be a dynamic value */
	/* max num of queues for KV.TODO should be a dynamic value */
	.max_no_of_hqd	= 24,
	.max_no_of_hqd	= 24,
	.doorbell_size  = 4,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.num_of_watch_points = 4,
@@ -83,6 +86,7 @@ static const struct kfd_device_info tonga_device_info = {
	.asic_family = CHIP_TONGA,
	.asic_family = CHIP_TONGA,
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.max_no_of_hqd  = 24,
	.doorbell_size  = 4,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.num_of_watch_points = 4,
@@ -96,6 +100,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
	.asic_family = CHIP_TONGA,
	.asic_family = CHIP_TONGA,
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.max_no_of_hqd  = 24,
	.doorbell_size  = 4,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.num_of_watch_points = 4,
@@ -109,6 +114,7 @@ static const struct kfd_device_info fiji_device_info = {
	.asic_family = CHIP_FIJI,
	.asic_family = CHIP_FIJI,
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.max_no_of_hqd  = 24,
	.doorbell_size  = 4,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.num_of_watch_points = 4,
@@ -122,6 +128,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
	.asic_family = CHIP_FIJI,
	.asic_family = CHIP_FIJI,
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.max_no_of_hqd  = 24,
	.doorbell_size  = 4,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.num_of_watch_points = 4,
@@ -136,6 +143,7 @@ static const struct kfd_device_info polaris10_device_info = {
	.asic_family = CHIP_POLARIS10,
	.asic_family = CHIP_POLARIS10,
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.max_no_of_hqd  = 24,
	.doorbell_size  = 4,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.num_of_watch_points = 4,
@@ -149,6 +157,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
	.asic_family = CHIP_POLARIS10,
	.asic_family = CHIP_POLARIS10,
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.max_no_of_hqd  = 24,
	.doorbell_size  = 4,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.num_of_watch_points = 4,
@@ -162,6 +171,7 @@ static const struct kfd_device_info polaris11_device_info = {
	.asic_family = CHIP_POLARIS11,
	.asic_family = CHIP_POLARIS11,
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	.max_no_of_hqd  = 24,
	.max_no_of_hqd  = 24,
	.doorbell_size  = 4,
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.ih_ring_entry_size = 4 * sizeof(uint32_t),
	.event_interrupt_class = &event_interrupt_class_cik,
	.event_interrupt_class = &event_interrupt_class_cik,
	.num_of_watch_points = 4,
	.num_of_watch_points = 4,
+25 −23
Original line number Original line Diff line number Diff line
@@ -33,7 +33,6 @@


static DEFINE_IDA(doorbell_ida);
static DEFINE_IDA(doorbell_ida);
static unsigned int max_doorbell_slices;
static unsigned int max_doorbell_slices;
#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4


/*
/*
 * Each device exposes a doorbell aperture, a PCI MMIO aperture that
 * Each device exposes a doorbell aperture, a PCI MMIO aperture that
@@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices;
 */
 */


/* # of doorbell bytes allocated for each process. */
/* # of doorbell bytes allocated for each process. */
static inline size_t doorbell_process_allocation(void)
static size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
{
	return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
	return roundup(kfd->device_info->doorbell_size *
			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
			PAGE_SIZE);
			PAGE_SIZE);
}
}
@@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd)


	doorbell_start_offset =
	doorbell_start_offset =
			roundup(kfd->shared_resources.doorbell_start_offset,
			roundup(kfd->shared_resources.doorbell_start_offset,
					doorbell_process_allocation());
					kfd_doorbell_process_slice(kfd));


	doorbell_aperture_size =
	doorbell_aperture_size =
			rounddown(kfd->shared_resources.doorbell_aperture_size,
			rounddown(kfd->shared_resources.doorbell_aperture_size,
					doorbell_process_allocation());
					kfd_doorbell_process_slice(kfd));


	if (doorbell_aperture_size > doorbell_start_offset)
	if (doorbell_aperture_size > doorbell_start_offset)
		doorbell_process_limit =
		doorbell_process_limit =
			(doorbell_aperture_size - doorbell_start_offset) /
			(doorbell_aperture_size - doorbell_start_offset) /
						doorbell_process_allocation();
						kfd_doorbell_process_slice(kfd);
	else
	else
		return -ENOSPC;
		return -ENOSPC;


@@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
	kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
	kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);


	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
						doorbell_process_allocation());
					   kfd_doorbell_process_slice(kfd));


	if (!kfd->doorbell_kernel_ptr)
	if (!kfd->doorbell_kernel_ptr)
		return -ENOMEM;
		return -ENOMEM;
@@ -132,16 +131,16 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
	phys_addr_t address;
	phys_addr_t address;
	struct kfd_dev *dev;
	struct kfd_dev *dev;


	/* Find kfd device according to gpu id */
	dev = kfd_device_by_id(vma->vm_pgoff);
	if (!dev)
		return -EINVAL;

	/*
	/*
	 * For simplicitly we only allow mapping of the entire doorbell
	 * For simplicitly we only allow mapping of the entire doorbell
	 * allocation of a single device & process.
	 * allocation of a single device & process.
	 */
	 */
	if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
	if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
		return -EINVAL;

	/* Find kfd device according to gpu id */
	dev = kfd_device_by_id(vma->vm_pgoff);
	if (!dev)
		return -EINVAL;
		return -EINVAL;


	/* Calculate physical address of doorbell */
	/* Calculate physical address of doorbell */
@@ -158,19 +157,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
		 "     vm_flags            == 0x%04lX\n"
		 "     vm_flags            == 0x%04lX\n"
		 "     size                == 0x%04lX\n",
		 "     size                == 0x%04lX\n",
		 (unsigned long long) vma->vm_start, address, vma->vm_flags,
		 (unsigned long long) vma->vm_start, address, vma->vm_flags,
		 doorbell_process_allocation());
		 kfd_doorbell_process_slice(dev));




	return io_remap_pfn_range(vma,
	return io_remap_pfn_range(vma,
				vma->vm_start,
				vma->vm_start,
				address >> PAGE_SHIFT,
				address >> PAGE_SHIFT,
				doorbell_process_allocation(),
				kfd_doorbell_process_slice(dev),
				vma->vm_page_prot);
				vma->vm_page_prot);
}
}




/* get kernel iomem pointer for a doorbell */
/* get kernel iomem pointer for a doorbell */
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
					unsigned int *doorbell_off)
					unsigned int *doorbell_off)
{
{
	u32 inx;
	u32 inx;
@@ -185,6 +184,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
		return NULL;
		return NULL;


	inx *= kfd->device_info->doorbell_size / sizeof(u32);

	/*
	/*
	 * Calculating the kernel doorbell offset using the first
	 * Calculating the kernel doorbell offset using the first
	 * doorbell page.
	 * doorbell page.
@@ -210,7 +211,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
	mutex_unlock(&kfd->doorbell_mutex);
	mutex_unlock(&kfd->doorbell_mutex);
}
}


inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
void write_kernel_doorbell(void __iomem *db, u32 value)
{
{
	if (db) {
	if (db) {
		writel(value, db);
		writel(value, db);
@@ -228,20 +229,21 @@ unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
{
{
	/*
	/*
	 * doorbell_id_offset accounts for doorbells taken by KGD.
	 * doorbell_id_offset accounts for doorbells taken by KGD.
	 * index * doorbell_process_allocation/sizeof(u32) adjusts to
	 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
	 * the process's doorbells.
	 * the process's doorbells. The offset returned is in dword
	 * units regardless of the ASIC-dependent doorbell size.
	 */
	 */
	return kfd->doorbell_id_offset +
	return kfd->doorbell_id_offset +
		process->doorbell_index
		process->doorbell_index
		* doorbell_process_allocation() / sizeof(u32) +
		* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
		queue_id;
		queue_id * kfd->device_info->doorbell_size / sizeof(u32);
}
}


uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
{
{
	uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
	uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
				kfd->shared_resources.doorbell_start_offset) /
				kfd->shared_resources.doorbell_start_offset) /
					doorbell_process_allocation() + 1;
					kfd_doorbell_process_slice(kfd) + 1;


	return num_of_elems;
	return num_of_elems;


@@ -251,7 +253,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
					struct kfd_process *process)
					struct kfd_process *process)
{
{
	return dev->doorbell_base +
	return dev->doorbell_base +
		process->doorbell_index * doorbell_process_allocation();
		process->doorbell_index * kfd_doorbell_process_slice(dev);
}
}


int kfd_alloc_process_doorbells(struct kfd_process *process)
int kfd_alloc_process_doorbells(struct kfd_process *process)
+4 −3
Original line number Original line Diff line number Diff line
@@ -160,6 +160,7 @@ struct kfd_device_info {
	const struct kfd_event_interrupt_class *event_interrupt_class;
	const struct kfd_event_interrupt_class *event_interrupt_class;
	unsigned int max_pasid_bits;
	unsigned int max_pasid_bits;
	unsigned int max_no_of_hqd;
	unsigned int max_no_of_hqd;
	unsigned int doorbell_size;
	size_t ih_ring_entry_size;
	size_t ih_ring_entry_size;
	uint8_t num_of_watch_points;
	uint8_t num_of_watch_points;
	uint16_t mqd_size_aligned;
	uint16_t mqd_size_aligned;
@@ -364,7 +365,7 @@ struct queue_properties {
	uint32_t queue_percent;
	uint32_t queue_percent;
	uint32_t *read_ptr;
	uint32_t *read_ptr;
	uint32_t *write_ptr;
	uint32_t *write_ptr;
	uint32_t __iomem *doorbell_ptr;
	void __iomem *doorbell_ptr;
	uint32_t doorbell_off;
	uint32_t doorbell_off;
	bool is_interop;
	bool is_interop;
	bool is_evicted;
	bool is_evicted;
@@ -728,11 +729,11 @@ void kfd_pasid_free(unsigned int pasid);
int kfd_doorbell_init(struct kfd_dev *kfd);
int kfd_doorbell_init(struct kfd_dev *kfd);
void kfd_doorbell_fini(struct kfd_dev *kfd);
void kfd_doorbell_fini(struct kfd_dev *kfd);
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
					unsigned int *doorbell_off);
					unsigned int *doorbell_off);
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
u32 read_kernel_doorbell(u32 __iomem *db);
u32 read_kernel_doorbell(u32 __iomem *db);
void write_kernel_doorbell(u32 __iomem *db, u32 value);
void write_kernel_doorbell(void __iomem *db, u32 value);
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
					struct kfd_process *process,
					struct kfd_process *process,
					unsigned int queue_id);
					unsigned int queue_id);