Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 552764b6 authored by Felix Kuehling's avatar Felix Kuehling Committed by Oded Gabbay
Browse files

drm/amdkfd: Add TC flush on VMID deallocation for Hawaii



On GFX7 the CP does not perform a TC flush when queues are unmapped.
To avoid TC eviction from accessing an invalid VMID, flush it
explicitly before releasing a VMID.

v2: Fix unnecessary list_for_each_entry_safe
v3: Moved allocation to kfd_process_device_init_vm

Signed-off-by: default avatarAmber Lin <Amber.Lin@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent f35751b8
Loading
Loading
Loading
Loading
+21 −1
Original line number Diff line number Diff line
@@ -142,12 +142,31 @@ static int allocate_vmid(struct device_queue_manager *dqm,
	return 0;
}

static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
				struct qcm_process_device *qpd)
{
	uint32_t len;

	if (!qpd->ib_kaddr)
		return -ENOMEM;

	len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);

	return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
				qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
}

static void deallocate_vmid(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;

	/* On GFX v7, CP doesn't flush TC at dequeue */
	if (q->device->device_info->asic_family == CHIP_HAWAII)
		if (flush_texture_cache_nocpsch(q->device, qpd))
			pr_err("Failed to flush TC\n");

	kfd_flush_tlb(qpd_to_pdd(qpd));

	/* Release the vmid mapping */
@@ -792,11 +811,12 @@ static void uninitialize(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm)
{
	init_interrupts(dqm);
	return 0;
	return pm_init(&dqm->packets, dqm);
}

static int stop_nocpsch(struct device_queue_manager *dqm)
{
	pm_uninit(&dqm->packets);
	return 0;
}

+37 −0
Original line number Diff line number Diff line
@@ -356,6 +356,43 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
	return retval;
}

/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
 *     of this packet
 *     @gpu_addr - GPU address of the packet. It's a virtual address.
 *     @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
 *     Return - length of the packet
 */
uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
{
	struct pm4_mec_release_mem *packet;

	WARN_ON(!buffer);

	packet = (struct pm4_mec_release_mem *)buffer;
	memset(buffer, 0, sizeof(*packet));

	packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
						 sizeof(*packet));

	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
	packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
	packet->bitfields2.tcl1_action_ena = 1;
	packet->bitfields2.tc_action_ena = 1;
	packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
	packet->bitfields2.atc = 0;

	packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
	packet->bitfields3.int_sel =
		int_sel___release_mem__send_interrupt_after_write_confirm;

	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
	packet->address_hi = upper_32_bits(gpu_addr);

	packet->data_lo = 0;

	return sizeof(*packet) / sizeof(unsigned int);
}

int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
	pm->dqm = dqm;
+3 −0
Original line number Diff line number Diff line
@@ -494,6 +494,7 @@ struct qcm_process_device {

	/* IB memory */
	uint64_t ib_base;
	void *ib_kaddr;
};

/* KFD Memory Eviction */
@@ -834,6 +835,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,

void pm_release_ib(struct packet_manager *pm);

uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);

uint64_t kfd_get_number_elems(struct kfd_dev *kfd);

/* Events */
+34 −0
Original line number Diff line number Diff line
@@ -149,6 +149,36 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
	return err;
}

/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
 *	process for IB usage The memory reserved is for KFD to submit
 *	IB to AMDGPU from kernel.  If the memory is reserved
 *	successfully, ib_kaddr will have the CPU/kernel
 *	address. Check ib_kaddr before accessing the memory.
 */
static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
{
	struct qcm_process_device *qpd = &pdd->qpd;
	uint32_t flags = ALLOC_MEM_FLAGS_GTT |
			 ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
			 ALLOC_MEM_FLAGS_WRITABLE |
			 ALLOC_MEM_FLAGS_EXECUTABLE;
	void *kaddr;
	int ret;

	if (qpd->ib_kaddr || !qpd->ib_base)
		return 0;

	/* ib_base is only set for dGPU */
	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
				      &kaddr);
	if (ret)
		return ret;

	qpd->ib_kaddr = kaddr;

	return 0;
}

struct kfd_process *kfd_create_process(struct file *filep)
{
	struct kfd_process *process;
@@ -610,6 +640,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
		return ret;
	}

	ret = kfd_process_device_reserve_ib_mem(pdd);
	if (ret)
		goto err_reserve_ib_mem;
	ret = kfd_process_device_init_cwsr_dgpu(pdd);
	if (ret)
		goto err_init_cwsr;
@@ -619,6 +652,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
	return 0;

err_init_cwsr:
err_reserve_ib_mem:
	kfd_process_device_free_bos(pdd);
	if (!drm_file)
		dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);