Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9fd3f1bf authored by Felix Kuehling's avatar Felix Kuehling Committed by Oded Gabbay
Browse files

drm/amdkfd: Improve process termination handling



Separate device queue termination from process queue manager
termination. Unmap all queues at once instead of one at a time.
Unmap device queues before the PASID is unbound, in the
kfd_process_iommu_unbind_callback.

When resetting wavefronts in non-HWS mode, do it before the VMID is
released.

Signed-off-by: default avatarBen Goz <ben.goz@amd.com>
Signed-off-by: default avatarshaoyun liu <shaoyun.liu@amd.com>
Signed-off-by: default avatarAmber Lin <Amber.Lin@amd.com>
Signed-off-by: default avatarYong Zhao <Yong.Zhao@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent c4744e24
Loading
Loading
Loading
Loading
+147 −38
Original line number Diff line number Diff line
@@ -296,65 +296,73 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
	return retval;
}

static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
 * to avoid asynchronized access
 */
static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;
	struct mqd_manager *mqd;

	retval = 0;

	mutex_lock(&dqm->lock);
	mqd = dqm->ops.get_mqd_manager(dqm,
		get_mqd_type_from_queue_type(q->properties.type));
	if (!mqd)
		return -ENOMEM;

	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
		if (mqd == NULL) {
			retval = -ENOMEM;
			goto out;
		}
		deallocate_hqd(dqm, q);
	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
		if (mqd == NULL) {
			retval = -ENOMEM;
			goto out;
		}
		dqm->sdma_queue_count--;
		deallocate_sdma_queue(dqm, q->sdma_id);
	} else {
		pr_debug("q->properties.type %d is invalid\n",
				q->properties.type);
		retval = -EINVAL;
		goto out;
		return -EINVAL;
	}
	dqm->total_queue_count--;

	retval = mqd->destroy_mqd(mqd, q->mqd,
				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
				KFD_UNMAP_LATENCY_MS,
				q->pipe, q->queue);

	if (retval)
		goto out;
	if (retval == -ETIME)
		qpd->reset_wavefronts = true;

	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);

	list_del(&q->list);
	if (list_empty(&qpd->queues_list))
	if (list_empty(&qpd->queues_list)) {
		if (qpd->reset_wavefronts) {
			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
					dqm->dev);
			/* dbgdev_wave_reset_wavefronts has to be called before
			 * deallocate_vmid(), i.e. when vmid is still in use.
			 */
			dbgdev_wave_reset_wavefronts(dqm->dev,
					qpd->pqm->process);
			qpd->reset_wavefronts = false;
		}

		deallocate_vmid(dqm, qpd, q);
	}
	if (q->properties.is_active)
		dqm->queue_count--;

	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type
	 */
	dqm->total_queue_count--;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
	return retval;
}

out:
static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;

	mutex_lock(&dqm->lock);
	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
	mutex_unlock(&dqm->lock);

	return retval;
}

@@ -921,10 +929,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param)
{
	int retval;
	struct kfd_process_device *pdd;

	retval = 0;
	int retval = 0;

	if (!dqm->active_runlist)
		return retval;
@@ -948,12 +953,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
	/* should be timed out */
	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
	if (retval) {
		pdd = kfd_get_process_device_data(dqm->dev,
				kfd_get_process(current));
		pdd->reset_wavefronts = true;
	if (retval)
		return retval;
	}

	pm_release_ib(&dqm->packets);
	dqm->active_runlist = false;

@@ -1015,7 +1017,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
	if (q->properties.is_active)
		dqm->queue_count--;

	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
	retval = execute_queues_cpsch(dqm,
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
	if (retval == -ETIME)
		qpd->reset_wavefronts = true;

	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);

@@ -1105,6 +1110,108 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
	return retval;
}

static int process_termination_nocpsch(struct device_queue_manager *dqm,
		struct qcm_process_device *qpd)
{
	struct queue *q, *next;
	struct device_process_node *cur, *next_dpn;
	int retval = 0;

	mutex_lock(&dqm->lock);

	/* Clear all user mode queues */
	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
		int ret;

		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
		if (ret)
			retval = ret;
	}

	/* Unregister process */
	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
			kfree(cur);
			dqm->processes_count--;
			break;
		}
	}

	mutex_unlock(&dqm->lock);
	return retval;
}


static int process_termination_cpsch(struct device_queue_manager *dqm,
		struct qcm_process_device *qpd)
{
	int retval;
	struct queue *q, *next;
	struct kernel_queue *kq, *kq_next;
	struct mqd_manager *mqd;
	struct device_process_node *cur, *next_dpn;
	enum kfd_unmap_queues_filter filter =
		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;

	retval = 0;

	mutex_lock(&dqm->lock);

	/* Clean all kernel queues */
	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
		list_del(&kq->list);
		dqm->queue_count--;
		qpd->is_debug = false;
		dqm->total_queue_count--;
		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
	}

	/* Clear all user mode queues */
	list_for_each_entry(q, &qpd->queues_list, list) {
		if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
			dqm->sdma_queue_count--;

		if (q->properties.is_active)
			dqm->queue_count--;

		dqm->total_queue_count--;
	}

	/* Unregister process */
	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
			kfree(cur);
			dqm->processes_count--;
			break;
		}
	}

	retval = execute_queues_cpsch(dqm, filter, 0);
	if (retval || qpd->reset_wavefronts) {
		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
		qpd->reset_wavefronts = false;
	}

	/* lastly, free mqd resources */
	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
		mqd = dqm->ops.get_mqd_manager(dqm,
			get_mqd_type_from_queue_type(q->properties.type));
		if (!mqd) {
			retval = -ENOMEM;
			goto out;
		}
		list_del(&q->list);
		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
	}

out:
	mutex_unlock(&dqm->lock);
	return retval;
}

struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
{
	struct device_queue_manager *dqm;
@@ -1133,6 +1240,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
		dqm->ops.process_termination = process_termination_cpsch;
		break;
	case KFD_SCHED_POLICY_NO_HWS:
		/* initialize dqm for no cp scheduling */
@@ -1147,6 +1255,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
		dqm->ops.initialize = initialize_nocpsch;
		dqm->ops.uninitialize = uninitialize;
		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
		dqm->ops.process_termination = process_termination_nocpsch;
		break;
	default:
		pr_err("Invalid scheduling policy %d\n", sched_policy);
+5 −0
Original line number Diff line number Diff line
@@ -77,6 +77,8 @@ struct device_process_node {
 * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
 * memory apertures.
 *
 * @process_termination: Clears all process queues belongs to that device.
 *
 */

struct device_queue_manager_ops {
@@ -120,6 +122,9 @@ struct device_queue_manager_ops {
					   enum cache_policy alternate_policy,
					   void __user *alternate_aperture_base,
					   uint64_t alternate_aperture_size);

	int (*process_termination)(struct device_queue_manager *dqm,
			struct qcm_process_device *qpd);
};

struct device_queue_manager_asic_ops {
+15 −3
Original line number Diff line number Diff line
@@ -421,6 +421,12 @@ struct qcm_process_device {
	unsigned int queue_count;
	unsigned int vmid;
	bool is_debug;

	/* This flag tells if we should reset all wavefronts on
	 * process termination
	 */
	bool reset_wavefronts;

	/*
	 * All the memory management data should be here too
	 */
@@ -454,6 +460,8 @@ struct kfd_process_device {
	/* The device that owns this data. */
	struct kfd_dev *dev;

	/* The process that owns this kfd_process_device. */
	struct kfd_process *process;

	/* per-process-per device QCM data structure */
	struct qcm_process_device qpd;
@@ -469,10 +477,12 @@ struct kfd_process_device {
	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
	enum kfd_pdd_bound bound;

	/* This flag tells if we should reset all
	 * wavefronts on process termination
	/* Flag used to tell the pdd has dequeued from the dqm.
	 * This is used to prevent dev->dqm->ops.process_termination() from
	 * being called twice when it is already called in IOMMU callback
	 * function.
	 */
	bool reset_wavefronts;
	bool already_dequeued;
};

#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -659,6 +669,8 @@ struct process_queue_node {
	struct list_head process_queue_list;
};

void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
void pqm_uninit(struct process_queue_manager *pqm);
int pqm_create_queue(struct process_queue_manager *pqm,
+10 −26
Original line number Diff line number Diff line
@@ -171,9 +171,6 @@ static void kfd_process_wq_release(struct work_struct *work)
		pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
				pdd->dev->id, p->pasid);

		if (pdd->reset_wavefronts)
			dbgdev_wave_reset_wavefronts(pdd->dev, p);

		if (pdd->bound == PDD_BOUND)
			amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);

@@ -237,24 +234,17 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,

	mutex_lock(&p->mutex);

	/* In case our notifier is called before IOMMU notifier */
	kfd_process_dequeue_from_all_devices(p);
	pqm_uninit(&p->pqm);

	/* Iterate over all process device data structure and check
	 * if we should delete debug managers and reset all wavefronts
	 * if we should delete debug managers
	 */
	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
		if ((pdd->dev->dbgmgr) &&
				(pdd->dev->dbgmgr->pasid == p->pasid))
			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);

		if (pdd->reset_wavefronts) {
			pr_warn("Resetting all wave fronts\n");
			dbgdev_wave_reset_wavefronts(pdd->dev, p);
			pdd->reset_wavefronts = false;
		}
	}

	mutex_unlock(&p->mutex);

	/*
@@ -368,8 +358,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
		INIT_LIST_HEAD(&pdd->qpd.queues_list);
		INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
		pdd->qpd.dqm = dev->dqm;
		pdd->reset_wavefronts = false;
		pdd->process = p;
		pdd->bound = PDD_UNBOUND;
		pdd->already_dequeued = false;
		list_add(&pdd->per_device_list, &p->per_device_data);
	}

@@ -498,19 +489,12 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
	if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
		kfd_dbgmgr_destroy(dev->dbgmgr);

	pqm_uninit(&p->pqm);

	pdd = kfd_get_process_device_data(dev, p);

	if (!pdd) {
		mutex_unlock(&p->mutex);
		return;
	}

	if (pdd->reset_wavefronts) {
		dbgdev_wave_reset_wavefronts(pdd->dev, p);
		pdd->reset_wavefronts = false;
	}
	if (pdd)
		/* For GPU relying on IOMMU, we need to dequeue here
		 * when PASID is still bound.
		 */
		kfd_process_dequeue_from_device(pdd);

	mutex_unlock(&p->mutex);
}
+23 −14
Original line number Diff line number Diff line
@@ -63,6 +63,25 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
	return 0;
}

void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
{
	struct kfd_dev *dev = pdd->dev;

	if (pdd->already_dequeued)
		return;

	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
	pdd->already_dequeued = true;
}

void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
{
	struct kfd_process_device *pdd;

	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
		kfd_process_dequeue_from_device(pdd);
}

int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
{
	INIT_LIST_HEAD(&pqm->queues);
@@ -78,21 +97,14 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)

void pqm_uninit(struct process_queue_manager *pqm)
{
	int retval;
	struct process_queue_node *pqn, *next;

	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
		retval = pqm_destroy_queue(
				pqm,
				(pqn->q != NULL) ?
					pqn->q->properties.queue_id :
					pqn->kq->queue->properties.queue_id);

		if (retval != 0) {
			pr_err("failed to destroy queue\n");
			return;
		}
		uninit_queue(pqn->q);
		list_del(&pqn->process_queue_list);
		kfree(pqn);
	}

	kfree(pqm->queue_slot_bitmap);
	pqm->queue_slot_bitmap = NULL;
}
@@ -290,9 +302,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
	if (pqn->q) {
		dqm = pqn->q->device->dqm;
		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
		if (retval != 0)
			return retval;

		uninit_queue(pqn->q);
	}