Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 19f470b2 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-amdkfd-next-2017-10-18' of git://people.freedesktop.org/~gabbayo/linux into drm-next

This is the amdkfd pull request for 4.15 merge window.
The patches here are relevant only for Kaveri and Carrizo. Still no dGPU patches.

The main goal is to continue alignment with the internal AMD development tree.

The following is a summary of the changes:
- Improvements and fixes to suspend/resume code
- Improvements and fixes to process termination code
- Cleanups in the queue unmapping functionality
- Reuse code from amdgpu

* tag 'drm-amdkfd-next-2017-10-18' of git://people.freedesktop.org/~gabbayo/linux:
  drm/amdkfd: Improve multiple SDMA queues support per process
  drm/amdkfd: Limit queue number per process and device to 127
  drm/amdkfd: Clean up process queue management
  drm/amdkfd: Compress unnecessary function parameters
  drm/amdkfd: Improve process termination handling
  drm/amdkfd: Avoid submitting an unnecessary packet to HWS
  drm/amdkfd: Fix MQD updates
  drm/amdkfd: Pass filter params to unmap_queues_cpsch
  drm/amdkfd: move locking outside of unmap_queues_cpsch
  drm/amdkfd: Avoid name confusion involved in queue unmapping
  drm/amdkfd: Drop _nocpsch suffix from shared functions
  drm/amdkfd: Reuse CHIP_* from amdgpu v2
  drm/amdkfd: Use VMID bitmap from KGD v2
  drm/amdkfd: Adjust dequeue latencies and timeouts
  drm/amdkfd: Rectify the jiffies calculation error with milliseconds v2
  drm/amdkfd: Fix suspend/resume issue on Carrizo v2
  drm/amdkfd: Reorganize kfd resume code
parents 282dc832 e139cd2a
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -282,8 +282,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
			p->pasid,
			dev->id);

	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties,
				0, q_properties.type, &queue_id);
	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
	if (err != 0)
		goto err_create_queue;

+5 −9
Original line number Diff line number Diff line
@@ -184,9 +184,10 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
	struct kernel_queue *kq = NULL;
	int status;

	properties.type = KFD_QUEUE_TYPE_DIQ;

	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
				&properties, 0, KFD_QUEUE_TYPE_DIQ,
				&qid);
				&properties, &qid);

	if (status) {
		pr_err("Failed to create DIQ\n");
@@ -769,13 +770,8 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
	union GRBM_GFX_INDEX_BITS reg_gfx_index;
	struct kfd_process_device *pdd;
	struct dbg_wave_control_info wac_info;
	int temp;
	int first_vmid_to_scan = 8;
	int last_vmid_to_scan = 15;

	first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
	temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
	last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;

	reg_sq_cmd.u32All = 0;
	status = 0;
+61 −45
Original line number Diff line number Diff line
@@ -92,6 +92,8 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
				unsigned int chunk_size);
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);

static int kfd_resume(struct kfd_dev *kfd);

static const struct kfd_device_info *lookup_device_info(unsigned short did)
{
	size_t i;
@@ -169,15 +171,8 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd)
			(unsigned int)(1 << kfd->device_info->max_pasid_bits),
			iommu_info.max_pasids);

	err = amd_iommu_init_device(kfd->pdev, pasid_limit);
	if (err < 0) {
		dev_err(kfd_device, "error initializing iommu device\n");
		return false;
	}

	if (!kfd_set_pasid_limit(pasid_limit)) {
		dev_err(kfd_device, "error setting pasid limit\n");
		amd_iommu_free_device(kfd->pdev);
		return false;
	}

@@ -189,7 +184,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
	struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);

	if (dev)
		kfd_unbind_process_from_device(dev, pasid);
		kfd_process_iommu_unbind_callback(dev, pasid);
}

/*
@@ -224,6 +219,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,

	kfd->shared_resources = *gpu_resources;

	kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
	kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
	kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
			- kfd->vm_info.first_vmid_kfd + 1;

	/* calculate max size of mqds needed for queues */
	size = max_num_of_queues_per_device *
			kfd->device_info->mqd_size_aligned;
@@ -273,29 +273,22 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
		goto kfd_interrupt_error;
	}

	if (!device_iommu_pasid_init(kfd)) {
		dev_err(kfd_device,
			"Error initializing iommuv2 for device %x:%x\n",
			kfd->pdev->vendor, kfd->pdev->device);
		goto device_iommu_pasid_error;
	}
	amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
						iommu_pasid_shutdown_callback);
	amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb);

	kfd->dqm = device_queue_manager_init(kfd);
	if (!kfd->dqm) {
		dev_err(kfd_device, "Error initializing queue manager\n");
		goto device_queue_manager_error;
	}

	if (kfd->dqm->ops.start(kfd->dqm)) {
	if (!device_iommu_pasid_init(kfd)) {
		dev_err(kfd_device,
			"Error starting queue manager for device %x:%x\n",
			"Error initializing iommuv2 for device %x:%x\n",
			kfd->pdev->vendor, kfd->pdev->device);
		goto dqm_start_error;
		goto device_iommu_pasid_error;
	}

	if (kfd_resume(kfd))
		goto kfd_resume_error;

	kfd->dbgmgr = NULL;

	kfd->init_complete = true;
@@ -307,11 +300,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,

	goto out;

dqm_start_error:
kfd_resume_error:
device_iommu_pasid_error:
	device_queue_manager_uninit(kfd->dqm);
device_queue_manager_error:
	amd_iommu_free_device(kfd->pdev);
device_iommu_pasid_error:
	kfd_interrupt_exit(kfd);
kfd_interrupt_error:
	kfd_topology_remove_device(kfd);
@@ -331,8 +323,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
	if (kfd->init_complete) {
		kgd2kfd_suspend(kfd);
		device_queue_manager_uninit(kfd->dqm);
		amd_iommu_free_device(kfd->pdev);
		kfd_interrupt_exit(kfd);
		kfd_topology_remove_device(kfd);
		kfd_doorbell_fini(kfd);
@@ -345,35 +337,59 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)

void kgd2kfd_suspend(struct kfd_dev *kfd)
{
	if (kfd->init_complete) {
	if (!kfd->init_complete)
		return;

	kfd->dqm->ops.stop(kfd->dqm);

	kfd_unbind_processes_from_device(kfd);

	amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
	amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
	amd_iommu_free_device(kfd->pdev);
}
}

int kgd2kfd_resume(struct kfd_dev *kfd)
{
	unsigned int pasid_limit;
	int err;
	if (!kfd->init_complete)
		return 0;

	pasid_limit = kfd_get_pasid_limit();
	return kfd_resume(kfd);

	if (kfd->init_complete) {
		err = amd_iommu_init_device(kfd->pdev, pasid_limit);
		if (err < 0) {
			dev_err(kfd_device, "failed to initialize iommu\n");
			return -ENXIO;
}

static int kfd_resume(struct kfd_dev *kfd)
{
	int err = 0;
	unsigned int pasid_limit = kfd_get_pasid_limit();

	err = amd_iommu_init_device(kfd->pdev, pasid_limit);
	if (err)
		return -ENXIO;
	amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
					iommu_pasid_shutdown_callback);
		amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb);
		kfd->dqm->ops.start(kfd->dqm);
	amd_iommu_set_invalid_ppr_cb(kfd->pdev,
				     iommu_invalid_ppr_cb);

	err = kfd_bind_processes_to_device(kfd);
	if (err)
		goto processes_bind_error;

	err = kfd->dqm->ops.start(kfd->dqm);
	if (err) {
		dev_err(kfd_device,
			"Error starting queue manager for device %x:%x\n",
			kfd->pdev->vendor, kfd->pdev->device);
		goto dqm_start_error;
	}

	return 0;
	return err;

dqm_start_error:
processes_bind_error:
	amd_iommu_free_device(kfd->pdev);

	return err;
}

/* This is called directly from KGD at ISR. */
+288 −152

File changed.

Preview size limit exceeded, changes collapsed.

+8 −5
Original line number Diff line number Diff line
@@ -29,11 +29,9 @@
#include "kfd_priv.h"
#include "kfd_mqd_manager.h"

#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS	(500)
#define CIK_VMID_NUM				(8)
#define KFD_VMID_START_OFFSET			(8)
#define VMID_PER_DEVICE				CIK_VMID_NUM
#define KFD_DQM_FIRST_PIPE			(0)
#define KFD_UNMAP_LATENCY_MS			(4000)
#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000)

#define CIK_SDMA_QUEUES				(4)
#define CIK_SDMA_QUEUES_PER_ENGINE		(2)
#define CIK_SDMA_ENGINE_NUM			(2)
@@ -79,6 +77,8 @@ struct device_process_node {
 * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
 * memory apertures.
 *
 * @process_termination: Clears all process queues belongs to that device.
 *
 */

struct device_queue_manager_ops {
@@ -122,6 +122,9 @@ struct device_queue_manager_ops {
					   enum cache_policy alternate_policy,
					   void __user *alternate_aperture_base,
					   uint64_t alternate_aperture_size);

	int (*process_termination)(struct device_queue_manager *dqm,
			struct qcm_process_device *qpd);
};

struct device_queue_manager_asic_ops {
Loading