Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 54e2e49c authored by Alex Deucher's avatar Alex Deucher
Browse files

drm/radeon: add fault decode function for cayman/TN (v2)



Helpful for debugging GPUVM errors as we can see what
hw block and page generated the fault in the log.

v2: simplify fault decoding

Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
parent 0a168933
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -139,6 +139,8 @@ void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
void evergreen_program_aspm(struct radeon_device *rdev);
extern void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
				     int ring, u32 cp_int_cntl);
extern void cayman_vm_decode_fault(struct radeon_device *rdev,
				   u32 status, u32 addr);

static const u32 evergreen_golden_registers[] =
{
@@ -4586,6 +4588,7 @@ int evergreen_irq_process(struct radeon_device *rdev)
	bool queue_hotplug = false;
	bool queue_hdmi = false;
	bool queue_thermal = false;
	u32 status, addr;

	if (!rdev->ih.enabled || rdev->shutdown)
		return IRQ_NONE;
@@ -4872,11 +4875,14 @@ int evergreen_irq_process(struct radeon_device *rdev)
			break;
		case 146:
		case 147:
			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
				RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
				addr);
			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
				RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
				status);
			cayman_vm_decode_fault(rdev, status, addr);
			/* reset addr and status */
			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
			break;
+161 −0
Original line number Diff line number Diff line
@@ -2450,6 +2450,167 @@ void cayman_vm_fini(struct radeon_device *rdev)
{
}

/**
 * cayman_vm_decode_fault - print human readable fault info
 *
 * @rdev: radeon_device pointer
 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
 *
 * Print human readable fault information (cayman/TN).
 */
void cayman_vm_decode_fault(struct radeon_device *rdev,
			    u32 status, u32 addr)
{
	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
	char *block;

	switch (mc_id) {
	case 32:
	case 16:
	case 96:
	case 80:
	case 160:
	case 144:
	case 224:
	case 208:
		block = "CB";
		break;
	case 33:
	case 17:
	case 97:
	case 81:
	case 161:
	case 145:
	case 225:
	case 209:
		block = "CB_FMASK";
		break;
	case 34:
	case 18:
	case 98:
	case 82:
	case 162:
	case 146:
	case 226:
	case 210:
		block = "CB_CMASK";
		break;
	case 35:
	case 19:
	case 99:
	case 83:
	case 163:
	case 147:
	case 227:
	case 211:
		block = "CB_IMMED";
		break;
	case 36:
	case 20:
	case 100:
	case 84:
	case 164:
	case 148:
	case 228:
	case 212:
		block = "DB";
		break;
	case 37:
	case 21:
	case 101:
	case 85:
	case 165:
	case 149:
	case 229:
	case 213:
		block = "DB_HTILE";
		break;
	case 38:
	case 22:
	case 102:
	case 86:
	case 166:
	case 150:
	case 230:
	case 214:
		block = "SX";
		break;
	case 39:
	case 23:
	case 103:
	case 87:
	case 167:
	case 151:
	case 231:
	case 215:
		block = "DB_STEN";
		break;
	case 40:
	case 24:
	case 104:
	case 88:
	case 232:
	case 216:
	case 168:
	case 152:
		block = "TC_TFETCH";
		break;
	case 41:
	case 25:
	case 105:
	case 89:
	case 233:
	case 217:
	case 169:
	case 153:
		block = "TC_VFETCH";
		break;
	case 42:
	case 26:
	case 106:
	case 90:
	case 234:
	case 218:
	case 170:
	case 154:
		block = "VC";
		break;
	case 112:
		block = "CP";
		break;
	case 113:
	case 114:
		block = "SH";
		break;
	case 115:
		block = "VGT";
		break;
	case 178:
		block = "IH";
		break;
	case 51:
		block = "RLC";
		break;
	case 55:
		block = "DMA";
		break;
	case 56:
		block = "HDP";
		break;
	default:
		block = "unknown";
		break;
	}

	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
	       protections, vmid, addr,
	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
	       block, mc_id);
}

#define R600_ENTRY_VALID   (1 << 0)
#define R600_PTE_SYSTEM    (1 << 1)
#define R600_PTE_SNOOPED   (1 << 2)
+16 −0
Original line number Diff line number Diff line
@@ -133,6 +133,22 @@
#define VM_CONTEXT1_CNTL2				0x1434
#define VM_INVALIDATE_REQUEST				0x1478
#define VM_INVALIDATE_RESPONSE				0x147c
#define	VM_CONTEXT1_PROTECTION_FAULT_ADDR		0x14FC
#define	VM_CONTEXT1_PROTECTION_FAULT_STATUS		0x14DC
#define		PROTECTIONS_MASK			(0xf << 0)
#define		PROTECTIONS_SHIFT			0
		/* bit 0: range
		 * bit 2: pde0
		 * bit 3: valid
		 * bit 4: read
		 * bit 5: write
		 */
#define		MEMORY_CLIENT_ID_MASK			(0xff << 12)
#define		MEMORY_CLIENT_ID_SHIFT			12
#define		MEMORY_CLIENT_RW_MASK			(1 << 24)
#define		MEMORY_CLIENT_RW_SHIFT			24
#define		FAULT_VMID_MASK				(0x7 << 25)
#define		FAULT_VMID_SHIFT			25
#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR	0x1518
#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR	0x151c
#define	VM_CONTEXT0_PAGE_TABLE_BASE_ADDR		0x153C