Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 58e69886 authored by Lan Xiao's avatar Lan Xiao Committed by Oded Gabbay
Browse files

drm/amdkfd: fix zero reading of VMID and PASID for Hawaii



Upon VM Fault, the VMID and PASID written by HW are zeros in
Hawaii. Instead of reading from ih_ring_entry, read directly
from the registers. This workaround fix the soft hang issues
caused by mishandled VM Fault in Hawaii.

Signed-off-by: default avatarLan Xiao <Lan.Xiao@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 2640c3fa
Loading
Loading
Loading
Loading
+19 −1
Original line number Diff line number Diff line
@@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
		uint32_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);

/* Because of REG_GET_FIELD() being used, we put this function in the
 * asic specific file.
@@ -216,7 +217,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
	.invalidate_tlbs = invalidate_tlbs,
	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
	.submit_ib = amdgpu_amdkfd_submit_ib,
	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg
};

struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -912,3 +914,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
	RREG32(mmVM_INVALIDATE_RESPONSE);
	return 0;
}

 /**
  * read_vmid_from_vmfault_reg - read vmid from register
  *
  * adev: amdgpu_device pointer
  * @vmid: vmid pointer
  * read vmid from register (CIK).
  */
static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);

	uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);

	return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
}
+28 −1
Original line number Diff line number Diff line
@@ -25,12 +25,39 @@
#include "cik_int.h"

static bool cik_event_interrupt_isr(struct kfd_dev *dev,
					const uint32_t *ih_ring_entry)
					const uint32_t *ih_ring_entry,
					uint32_t *patched_ihre,
					bool *patched_flag)
{
	const struct cik_ih_ring_entry *ihre =
			(const struct cik_ih_ring_entry *)ih_ring_entry;
	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
	unsigned int vmid, pasid;

	/* This workaround is due to HW/FW limitation on Hawaii that
	 * VMID and PASID are not written into ih_ring_entry
	 */
	if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
		dev->device_info->asic_family == CHIP_HAWAII) {
		struct cik_ih_ring_entry *tmp_ihre =
			(struct cik_ih_ring_entry *)patched_ihre;

		*patched_flag = true;
		*tmp_ihre = *ihre;

		vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
		pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);

		tmp_ihre->ring_id &= 0x000000ff;
		tmp_ihre->ring_id |= vmid << 8;
		tmp_ihre->ring_id |= pasid << 16;

		return (pasid != 0) &&
			vmid >= dev->vm_info.first_vmid_kfd &&
			vmid <= dev->vm_info.last_vmid_kfd;
	}

	/* Only handle interrupts from KFD VMIDs */
	vmid  = (ihre->ring_id & 0x0000ff00) >> 8;
	if (vmid < dev->vm_info.first_vmid_kfd ||
+12 −2
Original line number Diff line number Diff line
@@ -577,14 +577,24 @@ static int kfd_resume(struct kfd_dev *kfd)
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
	uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
	bool is_patched = false;

	if (!kfd->init_complete)
		return;

	if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
		dev_err_once(kfd_device, "Ring entry too small\n");
		return;
	}

	spin_lock(&kfd->interrupt_lock);

	if (kfd->interrupts_active
	    && interrupt_is_wanted(kfd, ih_ring_entry)
	    && enqueue_ih_ring_entry(kfd, ih_ring_entry))
	    && interrupt_is_wanted(kfd, ih_ring_entry,
				   patched_ihre, &is_patched)
	    && enqueue_ih_ring_entry(kfd,
				     is_patched ? patched_ihre : ih_ring_entry))
		queue_work(kfd->ih_wq, &kfd->interrupt_work);

	spin_unlock(&kfd->interrupt_lock);
+3 −1
Original line number Diff line number Diff line
@@ -26,7 +26,9 @@


static bool event_interrupt_isr_v9(struct kfd_dev *dev,
					const uint32_t *ih_ring_entry)
					const uint32_t *ih_ring_entry,
					uint32_t *patched_ihre,
					bool *patched_flag)
{
	uint16_t source_id, client_id, pasid, vmid;
	const uint32_t *data = ih_ring_entry;
+4 −2
Original line number Diff line number Diff line
@@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work)
								ih_ring_entry);
}

bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry)
bool interrupt_is_wanted(struct kfd_dev *dev,
			const uint32_t *ih_ring_entry,
			uint32_t *patched_ihre, bool *flag)
{
	/* integer and bitwise OR so there is no boolean short-circuiting */
	unsigned int wanted = 0;

	wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
								ih_ring_entry);
					 ih_ring_entry, patched_ihre, flag);

	return wanted != 0;
}
Loading