Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 19f6d2a6 authored by Oded Gabbay's avatar Oded Gabbay
Browse files

amdkfd: Add basic modules to amdkfd



This patch adds the process module and three helper modules:

- kfd_process, which handles process which open /dev/kfd

- kfd_doorbell, which provides helper functions for doorbell allocation,
  release and mapping to userspace

- kfd_pasid, which provides helper functions for pasid allocation and release

- kfd_aperture, which provides helper functions for managing the LDS, Local GPU
  memory and Scratch memory apertures of the process

This patch only contains the basic kfd_process module, which doesn't contain
the reference to the queue scheduler. This was done to allow easier code review.

Also, this patch doesn't contain the calls to the IOMMU driver for binding the
pasid to the device. Again, this was done to allow easier code review

The kfd_process object is created when a process opens /dev/kfd and is closed
when the mm_struct of that process is teared-down.

v3:

Removed kfd_vidmem.c file
Replaced direct mmput call to mmu_notifier release
Removed typedefs
Moved bool field to end of the structure
Added new kernel params for gart usage limitation
Added initialization of sa manager
Fixed debug messages
Remove support for LDS in 32 bit
Changed code to support mmap of doorbell pages from userspace
Added documentation for apertures

v4: Replaced RCU by SRCU for kfd_process list management

v5:

Move amdkfd from drm/radeon/ to drm/amd/
Rename kfd_aperture.c to kfd_flat_memory.c
Protect against multiple init calls
MQD size is H/W dependent so moved it to device info structure
Rename kfd_mem_obj structure's members
Use delayed function for process tear-down

Signed-off-by: default avatarOded Gabbay <oded.gabbay@amd.com>
parent 5b5c4e40
Loading
Loading
Loading
Loading
+3 −1
Original line number Original line Diff line number Diff line
@@ -4,6 +4,8 @@


ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/
ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/


amdkfd-y	:= kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o
amdkfd-y	:= kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
		kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
		kfd_process.o


obj-$(CONFIG_HSA_AMD)	+= amdkfd.o
obj-$(CONFIG_HSA_AMD)	+= amdkfd.o
+29 −2
Original line number Original line Diff line number Diff line
@@ -38,6 +38,7 @@


static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
static int kfd_open(struct inode *, struct file *);
static int kfd_mmap(struct file *, struct vm_area_struct *);


static const char kfd_dev_name[] = "kfd";
static const char kfd_dev_name[] = "kfd";


@@ -46,6 +47,7 @@ static const struct file_operations kfd_fops = {
	.unlocked_ioctl = kfd_ioctl,
	.unlocked_ioctl = kfd_ioctl,
	.compat_ioctl = kfd_ioctl,
	.compat_ioctl = kfd_ioctl,
	.open = kfd_open,
	.open = kfd_open,
	.mmap = kfd_mmap,
};
};


static int kfd_char_dev_major = -1;
static int kfd_char_dev_major = -1;
@@ -98,9 +100,22 @@ struct device *kfd_chardev(void)


static int kfd_open(struct inode *inode, struct file *filep)
static int kfd_open(struct inode *inode, struct file *filep)
{
{
	struct kfd_process *process;

	if (iminor(inode) != 0)
	if (iminor(inode) != 0)
		return -ENODEV;
		return -ENODEV;


	process = kfd_create_process(current);
	if (IS_ERR(process))
		return PTR_ERR(process);

	process->is_32bit_user_mode = is_compat_task();

	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
		process->pasid, process->is_32bit_user_mode);

	kfd_init_apertures(process);

	return 0;
	return 0;
}
}


@@ -156,8 +171,9 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
		"ioctl cmd 0x%x (#%d), arg 0x%lx\n",
		"ioctl cmd 0x%x (#%d), arg 0x%lx\n",
		cmd, _IOC_NR(cmd), arg);
		cmd, _IOC_NR(cmd), arg);


	/* TODO: add function that retrieves process */
	process = kfd_get_process(current);
	process = NULL;
	if (IS_ERR(process))
		return PTR_ERR(process);


	switch (cmd) {
	switch (cmd) {
	case KFD_IOC_GET_VERSION:
	case KFD_IOC_GET_VERSION:
@@ -208,3 +224,14 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)


	return err;
	return err;
}
}

static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct kfd_process *process;

	process = kfd_get_process(current);
	if (IS_ERR(process))
		return PTR_ERR(process);

	return kfd_doorbell_mmap(process, vma);
}
+40 −6
Original line number Original line Diff line number Diff line
@@ -26,8 +26,11 @@
#include <linux/slab.h>
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_priv.h"


#define MQD_SIZE_ALIGNED 768

static const struct kfd_device_info kaveri_device_info = {
static const struct kfd_device_info kaveri_device_info = {
	.max_pasid_bits = 16,
	.max_pasid_bits = 16,
	.mqd_size_aligned = MQD_SIZE_ALIGNED
};
};


struct kfd_deviceid {
struct kfd_deviceid {
@@ -92,6 +95,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
	kfd->kgd = kgd;
	kfd->kgd = kgd;
	kfd->device_info = device_info;
	kfd->device_info = device_info;
	kfd->pdev = pdev;
	kfd->pdev = pdev;
	kfd->init_complete = false;


	return kfd;
	return kfd;
}
}
@@ -99,23 +103,53 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
bool kgd2kfd_device_init(struct kfd_dev *kfd,
bool kgd2kfd_device_init(struct kfd_dev *kfd,
			 const struct kgd2kfd_shared_resources *gpu_resources)
			 const struct kgd2kfd_shared_resources *gpu_resources)
{
{
	unsigned int size;

	kfd->shared_resources = *gpu_resources;
	kfd->shared_resources = *gpu_resources;


	if (kfd_topology_add_device(kfd) != 0)
	/* calculate max size of mqds needed for queues */
		return false;
	size = max_num_of_processes *
		max_num_of_queues_per_process *
		kfd->device_info->mqd_size_aligned;

	/* add another 512KB for all other allocations on gart */
	size += 512 * 1024;

	if (kfd2kgd->init_sa_manager(kfd->kgd, size)) {
		dev_err(kfd_device,
			"Error initializing sa manager for device (%x:%x)\n",
			kfd->pdev->vendor, kfd->pdev->device);
		goto out;
	}

	kfd_doorbell_init(kfd);

	if (kfd_topology_add_device(kfd) != 0) {
		dev_err(kfd_device,
			"Error adding device (%x:%x) to topology\n",
			kfd->pdev->vendor, kfd->pdev->device);
		goto kfd_topology_add_device_error;
	}



	kfd->init_complete = true;
	kfd->init_complete = true;
	dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
	dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
		 kfd->pdev->device);
		 kfd->pdev->device);


	return true;
	goto out;

kfd_topology_add_device_error:
	kfd2kgd->fini_sa_manager(kfd->kgd);
	dev_err(kfd_device,
		"device (%x:%x) NOT added due to errors\n",
		kfd->pdev->vendor, kfd->pdev->device);
out:
	return kfd->init_complete;
}
}


void kgd2kfd_device_exit(struct kfd_dev *kfd)
void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
{
	int err = kfd_topology_remove_device(kfd);
	kfd_topology_remove_device(kfd);

	BUG_ON(err != 0);


	kfree(kfd);
	kfree(kfd);
}
}
+255 −0
Original line number Original line Diff line number Diff line
/*
 * Copyright 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
#include "kfd_priv.h"
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/slab.h>

/*
 * This extension supports a kernel level doorbells management for
 * the kernel queues.
 * Basically the last doorbells page is devoted to kernel queues
 * and that's assures that any user process won't get access to the
 * kernel doorbells page
 */
static DEFINE_MUTEX(doorbell_mutex);
static unsigned long doorbell_available_index[
	DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)] = { 0 };

#define KERNEL_DOORBELL_PASID 1
#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4

/*
 * Each device exposes a doorbell aperture, a PCI MMIO aperture that
 * receives 32-bit writes that are passed to queues as wptr values.
 * The doorbells are intended to be written by applications as part
 * of queueing work on user-mode queues.
 * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
 * We map the doorbell address space into user-mode when a process creates
 * its first queue on each device.
 * Although the mapping is done by KFD, it is equivalent to an mmap of
 * the /dev/kfd with the particular device encoded in the mmap offset.
 * There will be other uses for mmap of /dev/kfd, so only a range of
 * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
 */

/* # of doorbell bytes allocated for each process. */
static inline size_t doorbell_process_allocation(void)
{
	return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
			PAGE_SIZE);
}

/* Doorbell calculations for device init. */
void kfd_doorbell_init(struct kfd_dev *kfd)
{
	size_t doorbell_start_offset;
	size_t doorbell_aperture_size;
	size_t doorbell_process_limit;

	/*
	 * We start with calculations in bytes because the input data might
	 * only be byte-aligned.
	 * Only after we have done the rounding can we assume any alignment.
	 */

	doorbell_start_offset =
			roundup(kfd->shared_resources.doorbell_start_offset,
					doorbell_process_allocation());

	doorbell_aperture_size =
			rounddown(kfd->shared_resources.doorbell_aperture_size,
					doorbell_process_allocation());

	if (doorbell_aperture_size > doorbell_start_offset)
		doorbell_process_limit =
			(doorbell_aperture_size - doorbell_start_offset) /
						doorbell_process_allocation();
	else
		doorbell_process_limit = 0;

	kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
				doorbell_start_offset;

	kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
	kfd->doorbell_process_limit = doorbell_process_limit - 1;

	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
						doorbell_process_allocation());

	BUG_ON(!kfd->doorbell_kernel_ptr);

	pr_debug("kfd: doorbell initialization:\n");
	pr_debug("kfd: doorbell base           == 0x%08lX\n",
			(uintptr_t)kfd->doorbell_base);

	pr_debug("kfd: doorbell_id_offset      == 0x%08lX\n",
			kfd->doorbell_id_offset);

	pr_debug("kfd: doorbell_process_limit  == 0x%08lX\n",
			doorbell_process_limit);

	pr_debug("kfd: doorbell_kernel_offset  == 0x%08lX\n",
			(uintptr_t)kfd->doorbell_base);

	pr_debug("kfd: doorbell aperture size  == 0x%08lX\n",
			kfd->shared_resources.doorbell_aperture_size);

	pr_debug("kfd: doorbell kernel address == 0x%08lX\n",
			(uintptr_t)kfd->doorbell_kernel_ptr);
}

int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
{
	phys_addr_t address;
	struct kfd_dev *dev;

	/*
	 * For simplicitly we only allow mapping of the entire doorbell
	 * allocation of a single device & process.
	 */
	if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
		return -EINVAL;

	/* Find kfd device according to gpu id */
	dev = kfd_device_by_id(vma->vm_pgoff);
	if (dev == NULL)
		return -EINVAL;

	/* Find if pdd exists for combination of process and gpu id */
	if (!kfd_get_process_device_data(dev, process, 0))
		return -EINVAL;

	/* Calculate physical address of doorbell */
	address = kfd_get_process_doorbells(dev, process);

	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
				VM_DONTDUMP | VM_PFNMAP;

	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);

	pr_debug("kfd: mapping doorbell page in kfd_doorbell_mmap\n"
		 "     target user address == 0x%08llX\n"
		 "     physical address    == 0x%08llX\n"
		 "     vm_flags            == 0x%04lX\n"
		 "     size                == 0x%04lX\n",
		 (unsigned long long) vma->vm_start, address, vma->vm_flags,
		 doorbell_process_allocation());


	return io_remap_pfn_range(vma,
				vma->vm_start,
				address >> PAGE_SHIFT,
				doorbell_process_allocation(),
				vma->vm_page_prot);
}


/* get kernel iomem pointer for a doorbell */
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
					unsigned int *doorbell_off)
{
	u32 inx;

	BUG_ON(!kfd || !doorbell_off);

	mutex_lock(&doorbell_mutex);
	inx = find_first_zero_bit(doorbell_available_index,
					KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);

	__set_bit(inx, doorbell_available_index);
	mutex_unlock(&doorbell_mutex);

	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
		return NULL;

	/*
	 * Calculating the kernel doorbell offset using "faked" kernel
	 * pasid that allocated for kernel queues only
	 */
	*doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation() /
							sizeof(u32)) + inx;

	pr_debug("kfd: get kernel queue doorbell\n"
			 "     doorbell offset   == 0x%08d\n"
			 "     kernel address    == 0x%08lX\n",
		*doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));

	return kfd->doorbell_kernel_ptr + inx;
}

void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
{
	unsigned int inx;

	BUG_ON(!kfd || !db_addr);

	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);

	mutex_lock(&doorbell_mutex);
	__clear_bit(inx, doorbell_available_index);
	mutex_unlock(&doorbell_mutex);
}

inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
{
	if (db) {
		writel(value, db);
		pr_debug("writing %d to doorbell address 0x%p\n", value, db);
	}
}

/*
 * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1
 * to doorbells with the process's doorbell page
 */
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
					struct kfd_process *process,
					unsigned int queue_id)
{
	/*
	 * doorbell_id_offset accounts for doorbells taken by KGD.
	 * pasid * doorbell_process_allocation/sizeof(u32) adjusts
	 * to the process's doorbells
	 */
	return kfd->doorbell_id_offset +
		process->pasid * (doorbell_process_allocation()/sizeof(u32)) +
		queue_id;
}

uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
{
	uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
				kfd->shared_resources.doorbell_start_offset) /
					doorbell_process_allocation() + 1;

	return num_of_elems;

}

phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
					struct kfd_process *process)
{
	return dev->doorbell_base +
		process->pasid * doorbell_process_allocation();
}
+355 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading