Loading drivers/gpu/msm/adreno_a5xx_snapshot.c +234 −201 Original line number Diff line number Diff line Loading @@ -414,48 +414,47 @@ static const unsigned int a5xx_registers[] = { 0xB000, 0xB97F, 0xB9A0, 0xB9BF, }; struct a5xx_hlsq_sp_tp_regs { unsigned int statetype; unsigned int ahbaddr; unsigned int size; }; static const struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_sp_tp_registers[] = { /* HLSQ CTX 0 2D */ { 0x31, 0x2080, 0x1 }, /* HLSQ CTX 1 2D */ { 0x33, 0x2480, 0x1 }, /* HLSQ CTX 0 3D */ { 0x32, 0xE780, 0x7f }, /* HLSQ CTX 1 3D */ { 0x34, 0xEF80, 0x7f }, /* * The HLSQ registers can only be read via the crash dumper (not AHB) so they * need to be in their own array because the array above does double duty for * the fallback path too */ static const unsigned int a5xx_hlsq_registers[] = { /* SP non context */ { 0x3f, 0x0EC0, 0x40 }, 0x0EC0, 0xEC2, 0xED0, 0xEE0, 0xEF0, 0xEF2, 0xEFA, 0xEFF, /* SP CTX 0 2D */ { 0x3d, 0x2040, 0x1 }, 0x2040, 0x2040, /* SP CTX 1 2D */ { 0x3b, 0x2440, 0x1 }, /* SP CTX 0 3D */ { 0x3e, 0xE580, 0x180 }, /* SP CTX 1 3D */ { 0x3c, 0xED80, 0x180 }, 0x2440, 0x2440, /* SP CTXT 0 3D */ 0xE580, 0xE580, 0xE584, 0xE58B, 0xE590, 0xE5B1, 0xE5C0, 0xE5DF, 0xE5F0, 0xE5F9, 0xE600, 0xE608, 0xE610, 0xE631, 0xE640, 0xE661, 0xE670, 0xE673, 0xE6F0, 0xE6F0, /* SP CTXT 1 3D */ 0xED80, 0xED80, 0xED84, 0xED8B, 0xED90, 0xEDB1, 0xEDC0, 0xEDDF, 0xEDF0, 0xEDF9, 0xEE00, 0xEE08, 0xEE10, 0xEE31, 0xEE40, 0xEE61, 0xEE70, 0xEE73, 0xEEF0, 0xEEF0, /* TP non context */ { 0x3a, 0x0F00, 0x40 }, 0xF00, 0xF03, 0xF08, 0xF08, 0xF10, 0xF1B, /* TP CTX 0 2D */ { 0x38, 0x2000, 0x10 }, 0x2000, 0x2009, /* TP CTX 1 2D */ { 0x36, 0x2400, 0x10 }, 0x2400, 0x2409, /* TP CTX 0 3D */ { 0x39, 0xE700, 0x128 }, 0xE700, 0xE707, 0xE70E, 0xE731, 0xE750, 0xE751, 0xE75A, 0xE764, 0xE76C, 0xE77F, /* TP CTX 1 3D */ { 0x37, 0xEF00, 0x128 }, }; /* HLSQ non context registers - can't be read on A530v1 */ static const struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_non_ctx_registers = { 0x35, 0xE00, 0x1C 0xEF00, 0xEF07, 0xEF0E, 0xEF31, 0xEF50, 0xEF51, 0xEF5A, 0xEF64, 0xEF6C, 0xEF7F, /* HLSQ non context */ 0xE00, 0xE01, 0xE04, 0xE06, 0xE08, 0xE09, 0xE10, 0xE17, 0xE20, 0xE25, /* HLSQ CTXT 0 3D */ 0xE784, 0xE789, 0xE78B, 0xE796, 0xE7A0, 0xE7A2, 0xE7B0, 0xE7BB, 0xE7C0, 0xE7DD, 0xE7E0, 0xE7E1, /* HLSQ CTXT 1 3D */ 0xEF84, 0xEF89, 0xEF8B, 0xEF96, 0xEFA0, 0xEFA2, 0xEFB0, 0xEFBB, 0xEFC0, 0xEFDD, 0xEFE0, 0xEFE1, }; #define A5XX_NUM_SHADER_BANKS 4 Loading Loading @@ -520,14 +519,16 @@ enum a5xx_shader_obj { struct a5xx_shader_block { unsigned int statetype; unsigned int sz; uint64_t offset; }; struct a5xx_shader_block_info { const struct a5xx_shader_block *shader_block; unsigned int shader_num; struct a5xx_shader_block *block; unsigned int bank; uint64_t offset; }; static const struct a5xx_shader_block a5xx_shader_blocks[] = { static struct a5xx_shader_block a5xx_shader_blocks[] = { {A5XX_TP_W_MEMOBJ, 0x200}, {A5XX_TP_W_MIPMAP_BASE, 0x3C0}, {A5XX_TP_W_SAMPLER_TAG, 0x40}, Loading Loading @@ -582,181 +583,174 @@ static const struct a5xx_shader_block a5xx_shader_blocks[] = { {A5XX_TP_POWER_RESTORE_RAM, 0x40}, }; static struct kgsl_memdesc capturescript; static struct kgsl_memdesc registers; static bool crash_dump_valid; static size_t a5xx_snapshot_shader_memory(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { struct kgsl_snapshot_shader *header = (struct kgsl_snapshot_shader *) buf; unsigned int *data = (unsigned int *)(buf + sizeof(*header)); unsigned int i; struct a5xx_shader_block_info *shader_block_info = struct a5xx_shader_block_info *info = (struct a5xx_shader_block_info *) priv; unsigned int statetype = shader_block_info->shader_block->statetype; unsigned int size = shader_block_info->shader_block->sz; unsigned int shader_num = shader_block_info->shader_num; struct a5xx_shader_block *block = info->block; unsigned int *data = (unsigned int *) (buf + sizeof(*header)); if (remain < SHADER_SECTION_SZ(size)) { if (remain < SHADER_SECTION_SZ(block->sz)) { SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); return 0; } kgsl_regwrite(device, A5XX_HLSQ_DBG_READ_SEL, ((statetype << A5XX_SHADER_STATETYPE_SHIFT) | shader_num)); header->type = block->statetype; header->index = info->bank; header->size = block->sz; header->type = statetype; header->index = shader_num; header->size = size; for (i = 0; i < size; i++) kgsl_regread(device, A5XX_HLSQ_DBG_AHB_READ_APERTURE + i, data++); memcpy(data, registers.hostptr + info->offset, block->sz); return SHADER_SECTION_SZ(size); return SHADER_SECTION_SZ(block->sz); } static void a5xx_snapshot_shader(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { unsigned int i, j; struct a5xx_shader_block_info blk; struct a5xx_shader_block_info info; /* Shader blocks can only be read by the crash dumper */ if (crash_dump_valid == false) return; for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) { blk.shader_block = &a5xx_shader_blocks[i]; blk.shader_num = j; info.block = &a5xx_shader_blocks[i]; info.bank = j; info.offset = a5xx_shader_blocks[i].offset + (j * a5xx_shader_blocks[i].sz); /* Shader working/shadow memory */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_SHADER, snapshot, a5xx_snapshot_shader_memory, &blk); snapshot, a5xx_snapshot_shader_memory, &info); } } } static int get_hlsq_registers(struct kgsl_device *device, const struct a5xx_hlsq_sp_tp_regs *regs, unsigned int *data) static size_t a5xx_legacy_snapshot_registers(struct kgsl_device *device, u8 *buf, size_t remain) { int j; unsigned int val; kgsl_regwrite(device, A5XX_HLSQ_DBG_READ_SEL, (regs->statetype << A5XX_SHADER_STATETYPE_SHIFT)); struct kgsl_snapshot_registers regs = { .regs = a5xx_registers, .count = ARRAY_SIZE(a5xx_registers) / 2, }; for (j = 0; j < regs->size; j++) { kgsl_regread(device, A5XX_HLSQ_DBG_AHB_READ_APERTURE + j, &val); *data++ = regs->ahbaddr + j; *data++ = val; return kgsl_snapshot_dump_registers(device, buf, remain, ®s); } return (regs->size * 2); } static struct cdregs { const unsigned int *regs; unsigned int size; } _a5xx_cd_registers[] = { { a5xx_registers, ARRAY_SIZE(a5xx_registers) }, { a5xx_hlsq_registers, ARRAY_SIZE(a5xx_hlsq_registers) }, }; static size_t a5xx_snapshot_dump_hlsq_sp_tp_regs(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) #define REG_PAIR_COUNT(_a, _i) \ (((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1) static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; unsigned int *data = (unsigned int *)(buf + sizeof(*header)); int count = 0, i; /* Figure out how many registers we are going to dump */ for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) count += a5xx_hlsq_sp_tp_registers[i].size; unsigned int *src = (unsigned int *) registers.hostptr; unsigned int i, j, k; unsigned int count = 0; /* the HLSQ non context registers cannot be dumped on A530v1 */ if (!adreno_is_a530v1(adreno_dev)) count += a5xx_hlsq_non_ctx_registers.size; if (crash_dump_valid == false) return a5xx_legacy_snapshot_registers(device, buf, remain); if (remain < (count * 8) + sizeof(*header)) { if (remain < sizeof(*header)) { SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); return 0; } for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) data += get_hlsq_registers(device, &a5xx_hlsq_sp_tp_registers[i], data); remain -= sizeof(*header); if (!adreno_is_a530v1(adreno_dev)) data += get_hlsq_registers(device, &a5xx_hlsq_non_ctx_registers, data); for (i = 0; i < ARRAY_SIZE(_a5xx_cd_registers); i++) { struct cdregs *regs = &_a5xx_cd_registers[i]; header->count = count; for (j = 0; j < regs->size / 2; j++) { unsigned int start = regs->regs[2 * j]; unsigned int end = regs->regs[(2 * j) + 1]; /* Return the size of the section */ return (count * 8) + sizeof(*header); if (remain < ((end - start) + 1) * 8) { SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); goto out; } static size_t a5xx_legacy_snapshot_registers(struct kgsl_device *device, u8 *buf, size_t remain) { struct kgsl_snapshot_registers regs = { .regs = a5xx_registers, .count = ARRAY_SIZE(a5xx_registers) / 2, }; remain -= ((end - start) + 1) * 8; return kgsl_snapshot_dump_registers(device, buf, remain, ®s); for (k = start; k <= end; k++, count++) { *data++ = k; *data++ = *src++; } } } static struct kgsl_memdesc capturescript; static struct kgsl_memdesc registers; out: header->count = count; #define REG_PAIR_COUNT(_a, _i) \ (((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1) /* Return the size of the section */ return (count * 8) + sizeof(*header); } static inline unsigned int count_registers(void) /* Snapshot a preemption record buffer */ static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { unsigned int i, count = 0; struct kgsl_memdesc *memdesc = priv; for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) count += REG_PAIR_COUNT(a5xx_registers, i); struct kgsl_snapshot_gpu_object_v2 *header = (struct kgsl_snapshot_gpu_object_v2 *)buf; return count; u8 *ptr = buf + sizeof(*header); if (remain < (SZ_64K + sizeof(*header))) { SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); return 0; } static unsigned int copy_registers(unsigned int *dst) { unsigned int *src = (unsigned int *) registers.hostptr; unsigned int i, count = 0; header->size = SZ_64K >> 2; header->gpuaddr = memdesc->gpuaddr; header->ptbase = kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) { unsigned int j; unsigned int start = a5xx_registers[2 * i]; unsigned int end = a5xx_registers[(2 * i) + 1]; memcpy(ptr, memdesc->hostptr, SZ_64K); for (j = start; j <= end; j++, count++) { *dst++ = j; *dst++ = *src++; } return SZ_64K + sizeof(*header); } return count; } static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) static void _a5xx_do_crashdump(struct kgsl_device *device) { struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; unsigned int *data = (unsigned int *)(buf + sizeof(*header)); unsigned long wait_time; unsigned int reg = 0; unsigned int val; /* Jump to legacy if the crash dump script was not initialized */ crash_dump_valid = false; if (capturescript.gpuaddr == 0 || registers.gpuaddr == 0) return a5xx_legacy_snapshot_registers(device, buf, remain); return; /* * If we got here because we are stalled on fault the crash dumper has * won't work */ /* IF the SMMU is stalled we cannot do a crash dump */ kgsl_regread(device, A5XX_RBBM_STATUS3, &val); if (val & BIT(24)) return a5xx_legacy_snapshot_registers(device, buf, remain); return; if (remain < (count_registers() * 8) + sizeof(*header)) { SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); return 0; } /* Turn on APRIV so we can access the buffers */ kgsl_regwrite(device, A5XX_CP_CNTL, 1); kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_LO, lower_32_bits(capturescript.gpuaddr)); Loading @@ -772,42 +766,14 @@ static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf, cpu_relax(); } kgsl_regwrite(device, A5XX_CP_CNTL, 0); if (!(reg & 0x4)) { KGSL_CORE_ERR("Crash dump timed out: 0x%X\n", reg); return a5xx_legacy_snapshot_registers(device, buf, remain); } header->count = copy_registers(data); /* Return the size of the section */ return (header->count * 8) + sizeof(*header); return; } /* Snapshot a preemption record buffer */ static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { struct kgsl_memdesc *memdesc = priv; struct kgsl_snapshot_gpu_object_v2 *header = (struct kgsl_snapshot_gpu_object_v2 *)buf; u8 *ptr = buf + sizeof(*header); if (remain < (SZ_64K + sizeof(*header))) { SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); return 0; } header->size = SZ_64K >> 2; header->gpuaddr = memdesc->gpuaddr; header->ptbase = kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; memcpy(ptr, memdesc->hostptr, SZ_64K); return SZ_64K + sizeof(*header); crash_dump_valid = true; } /* Loading @@ -830,6 +796,9 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, /* Disable Clock gating temporarily for the debug bus to work */ a5xx_hwcg_set(adreno_dev, false); /* Try to run the crash dumper */ _a5xx_do_crashdump(device); kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, a5xx_snapshot_registers, NULL); Loading @@ -837,10 +806,6 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, a5xx_vbif_snapshot_registers, ARRAY_SIZE(a5xx_vbif_snapshot_registers)); /* Dump SP TP HLSQ registers */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, a5xx_snapshot_dump_hlsq_sp_tp_regs, NULL); /* CP_PFP indexed registers */ kgsl_snapshot_indexed_registers(device, snapshot, A5XX_CP_PFP_STAT_ADDR, A5XX_CP_PFP_STAT_DATA, Loading Loading @@ -913,34 +878,93 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, } static int _a5xx_crashdump_init(struct a5xx_shader_block *block, uint64_t *ptr, uint64_t *offset) { int qwords = 0; unsigned int j; /* Capture each bank in the block */ for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) { /* Program the aperture */ ptr[qwords++] = (block->statetype << A5XX_SHADER_STATETYPE_SHIFT) | j; ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) | (1 << 21) | 1; /* Read all the data in one chunk */ ptr[qwords++] = registers.gpuaddr + *offset; ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) | block->sz; /* Remember the offset of the first bank for easy access */ if (j == 0) block->offset = *offset; *offset += block->sz * sizeof(unsigned int); } return qwords; } void a5xx_crashdump_init(struct adreno_device *adreno_dev) { unsigned int i, count; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int script_size = 0; unsigned int data_size = 0; unsigned int i, j; uint64_t *ptr; uint64_t gpuaddr; uint64_t offset = 0; if (capturescript.gpuaddr != 0 && registers.gpuaddr != 0) return; /* * For the capture script two blocks of memory are needed: A block of * GPU readonly memory for the special capture script and a destination * block for the register values. The size of the capture script needs * is 128 bits (4 dwords) per register pair and 4 dwords at the end. * The destination block needs to be big enough to hold all the * registers that we will capture. * We need to allocate two buffers: * 1 - the buffer to hold the draw script * 2 - the buffer to hold the data */ if (kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &capturescript, ((ARRAY_SIZE(a5xx_registers) / 2) * 16) + 16, KGSL_MEMFLAGS_GPUREADONLY, 0)) return; /* * To save the registers, we need 16 bytes per register pair for the * script and a dword for each register int the data */ for (i = 0; i < ARRAY_SIZE(_a5xx_cd_registers); i++) { struct cdregs *regs = &_a5xx_cd_registers[i]; /* Each pair needs 16 bytes (2 qwords) */ script_size += (regs->size / 2) * 16; /* Each register needs a dword in the data */ for (j = 0; j < regs->size / 2; j++) data_size += REG_PAIR_COUNT(regs->regs, j) * sizeof(unsigned int); } /* * To save the shader blocks for each block in each type we need 32 * bytes for the script (16 bytes to program the aperture and 16 to * read the data) and then a block specific number of bytes to hold * the data */ for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { script_size += 32 * A5XX_NUM_SHADER_BANKS; data_size += a5xx_shader_blocks[i].sz * sizeof(unsigned int) * A5XX_NUM_SHADER_BANKS; } /* Now allocate the script and data buffers */ /* Count the total number of registers to capture */ count = count_registers(); /* The script buffers needs 2 extra qwords on the end */ if (kgsl_allocate_global(device, &capturescript, script_size + 16, KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED)) return; if (kgsl_allocate_global(KGSL_DEVICE(adreno_dev), ®isters, count * sizeof(unsigned int), 0, 0)) { if (kgsl_allocate_global(device, ®isters, data_size, 0, KGSL_MEMDESC_PRIVILEGED)) { kgsl_free_global(KGSL_DEVICE(adreno_dev), &capturescript); return; } Loading @@ -948,14 +972,23 @@ void a5xx_crashdump_init(struct adreno_device *adreno_dev) /* Build the crash script */ ptr = (uint64_t *) capturescript.hostptr; gpuaddr = registers.gpuaddr; for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) { unsigned int regs = REG_PAIR_COUNT(a5xx_registers, i); *ptr++ = gpuaddr; *ptr++ = (((uint64_t) a5xx_registers[2 * i]) << 44) | regs; /* For the registers, program a read command for each pair */ for (i = 0; i < ARRAY_SIZE(_a5xx_cd_registers); i++) { struct cdregs *regs = &_a5xx_cd_registers[i]; gpuaddr += regs * sizeof(unsigned int); for (j = 0; j < regs->size / 2; j++) { unsigned int r = REG_PAIR_COUNT(regs->regs, j); *ptr++ = registers.gpuaddr + offset; *ptr++ = (((uint64_t) regs->regs[2 * j]) << 44) | r; offset += r * sizeof(unsigned int); } } /* Program each shader block */ for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { ptr += _a5xx_crashdump_init(&a5xx_shader_blocks[i], ptr, &offset); } *ptr++ = 0; Loading Loading
drivers/gpu/msm/adreno_a5xx_snapshot.c +234 −201 Original line number Diff line number Diff line Loading @@ -414,48 +414,47 @@ static const unsigned int a5xx_registers[] = { 0xB000, 0xB97F, 0xB9A0, 0xB9BF, }; struct a5xx_hlsq_sp_tp_regs { unsigned int statetype; unsigned int ahbaddr; unsigned int size; }; static const struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_sp_tp_registers[] = { /* HLSQ CTX 0 2D */ { 0x31, 0x2080, 0x1 }, /* HLSQ CTX 1 2D */ { 0x33, 0x2480, 0x1 }, /* HLSQ CTX 0 3D */ { 0x32, 0xE780, 0x7f }, /* HLSQ CTX 1 3D */ { 0x34, 0xEF80, 0x7f }, /* * The HLSQ registers can only be read via the crash dumper (not AHB) so they * need to be in their own array because the array above does double duty for * the fallback path too */ static const unsigned int a5xx_hlsq_registers[] = { /* SP non context */ { 0x3f, 0x0EC0, 0x40 }, 0x0EC0, 0xEC2, 0xED0, 0xEE0, 0xEF0, 0xEF2, 0xEFA, 0xEFF, /* SP CTX 0 2D */ { 0x3d, 0x2040, 0x1 }, 0x2040, 0x2040, /* SP CTX 1 2D */ { 0x3b, 0x2440, 0x1 }, /* SP CTX 0 3D */ { 0x3e, 0xE580, 0x180 }, /* SP CTX 1 3D */ { 0x3c, 0xED80, 0x180 }, 0x2440, 0x2440, /* SP CTXT 0 3D */ 0xE580, 0xE580, 0xE584, 0xE58B, 0xE590, 0xE5B1, 0xE5C0, 0xE5DF, 0xE5F0, 0xE5F9, 0xE600, 0xE608, 0xE610, 0xE631, 0xE640, 0xE661, 0xE670, 0xE673, 0xE6F0, 0xE6F0, /* SP CTXT 1 3D */ 0xED80, 0xED80, 0xED84, 0xED8B, 0xED90, 0xEDB1, 0xEDC0, 0xEDDF, 0xEDF0, 0xEDF9, 0xEE00, 0xEE08, 0xEE10, 0xEE31, 0xEE40, 0xEE61, 0xEE70, 0xEE73, 0xEEF0, 0xEEF0, /* TP non context */ { 0x3a, 0x0F00, 0x40 }, 0xF00, 0xF03, 0xF08, 0xF08, 0xF10, 0xF1B, /* TP CTX 0 2D */ { 0x38, 0x2000, 0x10 }, 0x2000, 0x2009, /* TP CTX 1 2D */ { 0x36, 0x2400, 0x10 }, 0x2400, 0x2409, /* TP CTX 0 3D */ { 0x39, 0xE700, 0x128 }, 0xE700, 0xE707, 0xE70E, 0xE731, 0xE750, 0xE751, 0xE75A, 0xE764, 0xE76C, 0xE77F, /* TP CTX 1 3D */ { 0x37, 0xEF00, 0x128 }, }; /* HLSQ non context registers - can't be read on A530v1 */ static const struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_non_ctx_registers = { 0x35, 0xE00, 0x1C 0xEF00, 0xEF07, 0xEF0E, 0xEF31, 0xEF50, 0xEF51, 0xEF5A, 0xEF64, 0xEF6C, 0xEF7F, /* HLSQ non context */ 0xE00, 0xE01, 0xE04, 0xE06, 0xE08, 0xE09, 0xE10, 0xE17, 0xE20, 0xE25, /* HLSQ CTXT 0 3D */ 0xE784, 0xE789, 0xE78B, 0xE796, 0xE7A0, 0xE7A2, 0xE7B0, 0xE7BB, 0xE7C0, 0xE7DD, 0xE7E0, 0xE7E1, /* HLSQ CTXT 1 3D */ 0xEF84, 0xEF89, 0xEF8B, 0xEF96, 0xEFA0, 0xEFA2, 0xEFB0, 0xEFBB, 0xEFC0, 0xEFDD, 0xEFE0, 0xEFE1, }; #define A5XX_NUM_SHADER_BANKS 4 Loading Loading @@ -520,14 +519,16 @@ enum a5xx_shader_obj { struct a5xx_shader_block { unsigned int statetype; unsigned int sz; uint64_t offset; }; struct a5xx_shader_block_info { const struct a5xx_shader_block *shader_block; unsigned int shader_num; struct a5xx_shader_block *block; unsigned int bank; uint64_t offset; }; static const struct a5xx_shader_block a5xx_shader_blocks[] = { static struct a5xx_shader_block a5xx_shader_blocks[] = { {A5XX_TP_W_MEMOBJ, 0x200}, {A5XX_TP_W_MIPMAP_BASE, 0x3C0}, {A5XX_TP_W_SAMPLER_TAG, 0x40}, Loading Loading @@ -582,181 +583,174 @@ static const struct a5xx_shader_block a5xx_shader_blocks[] = { {A5XX_TP_POWER_RESTORE_RAM, 0x40}, }; static struct kgsl_memdesc capturescript; static struct kgsl_memdesc registers; static bool crash_dump_valid; static size_t a5xx_snapshot_shader_memory(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { struct kgsl_snapshot_shader *header = (struct kgsl_snapshot_shader *) buf; unsigned int *data = (unsigned int *)(buf + sizeof(*header)); unsigned int i; struct a5xx_shader_block_info *shader_block_info = struct a5xx_shader_block_info *info = (struct a5xx_shader_block_info *) priv; unsigned int statetype = shader_block_info->shader_block->statetype; unsigned int size = shader_block_info->shader_block->sz; unsigned int shader_num = shader_block_info->shader_num; struct a5xx_shader_block *block = info->block; unsigned int *data = (unsigned int *) (buf + sizeof(*header)); if (remain < SHADER_SECTION_SZ(size)) { if (remain < SHADER_SECTION_SZ(block->sz)) { SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); return 0; } kgsl_regwrite(device, A5XX_HLSQ_DBG_READ_SEL, ((statetype << A5XX_SHADER_STATETYPE_SHIFT) | shader_num)); header->type = block->statetype; header->index = info->bank; header->size = block->sz; header->type = statetype; header->index = shader_num; header->size = size; for (i = 0; i < size; i++) kgsl_regread(device, A5XX_HLSQ_DBG_AHB_READ_APERTURE + i, data++); memcpy(data, registers.hostptr + info->offset, block->sz); return SHADER_SECTION_SZ(size); return SHADER_SECTION_SZ(block->sz); } static void a5xx_snapshot_shader(struct kgsl_device *device, struct kgsl_snapshot *snapshot) { unsigned int i, j; struct a5xx_shader_block_info blk; struct a5xx_shader_block_info info; /* Shader blocks can only be read by the crash dumper */ if (crash_dump_valid == false) return; for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) { blk.shader_block = &a5xx_shader_blocks[i]; blk.shader_num = j; info.block = &a5xx_shader_blocks[i]; info.bank = j; info.offset = a5xx_shader_blocks[i].offset + (j * a5xx_shader_blocks[i].sz); /* Shader working/shadow memory */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_SHADER, snapshot, a5xx_snapshot_shader_memory, &blk); snapshot, a5xx_snapshot_shader_memory, &info); } } } static int get_hlsq_registers(struct kgsl_device *device, const struct a5xx_hlsq_sp_tp_regs *regs, unsigned int *data) static size_t a5xx_legacy_snapshot_registers(struct kgsl_device *device, u8 *buf, size_t remain) { int j; unsigned int val; kgsl_regwrite(device, A5XX_HLSQ_DBG_READ_SEL, (regs->statetype << A5XX_SHADER_STATETYPE_SHIFT)); struct kgsl_snapshot_registers regs = { .regs = a5xx_registers, .count = ARRAY_SIZE(a5xx_registers) / 2, }; for (j = 0; j < regs->size; j++) { kgsl_regread(device, A5XX_HLSQ_DBG_AHB_READ_APERTURE + j, &val); *data++ = regs->ahbaddr + j; *data++ = val; return kgsl_snapshot_dump_registers(device, buf, remain, ®s); } return (regs->size * 2); } static struct cdregs { const unsigned int *regs; unsigned int size; } _a5xx_cd_registers[] = { { a5xx_registers, ARRAY_SIZE(a5xx_registers) }, { a5xx_hlsq_registers, ARRAY_SIZE(a5xx_hlsq_registers) }, }; static size_t a5xx_snapshot_dump_hlsq_sp_tp_regs(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) #define REG_PAIR_COUNT(_a, _i) \ (((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1) static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; unsigned int *data = (unsigned int *)(buf + sizeof(*header)); int count = 0, i; /* Figure out how many registers we are going to dump */ for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) count += a5xx_hlsq_sp_tp_registers[i].size; unsigned int *src = (unsigned int *) registers.hostptr; unsigned int i, j, k; unsigned int count = 0; /* the HLSQ non context registers cannot be dumped on A530v1 */ if (!adreno_is_a530v1(adreno_dev)) count += a5xx_hlsq_non_ctx_registers.size; if (crash_dump_valid == false) return a5xx_legacy_snapshot_registers(device, buf, remain); if (remain < (count * 8) + sizeof(*header)) { if (remain < sizeof(*header)) { SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); return 0; } for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) data += get_hlsq_registers(device, &a5xx_hlsq_sp_tp_registers[i], data); remain -= sizeof(*header); if (!adreno_is_a530v1(adreno_dev)) data += get_hlsq_registers(device, &a5xx_hlsq_non_ctx_registers, data); for (i = 0; i < ARRAY_SIZE(_a5xx_cd_registers); i++) { struct cdregs *regs = &_a5xx_cd_registers[i]; header->count = count; for (j = 0; j < regs->size / 2; j++) { unsigned int start = regs->regs[2 * j]; unsigned int end = regs->regs[(2 * j) + 1]; /* Return the size of the section */ return (count * 8) + sizeof(*header); if (remain < ((end - start) + 1) * 8) { SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); goto out; } static size_t a5xx_legacy_snapshot_registers(struct kgsl_device *device, u8 *buf, size_t remain) { struct kgsl_snapshot_registers regs = { .regs = a5xx_registers, .count = ARRAY_SIZE(a5xx_registers) / 2, }; remain -= ((end - start) + 1) * 8; return kgsl_snapshot_dump_registers(device, buf, remain, ®s); for (k = start; k <= end; k++, count++) { *data++ = k; *data++ = *src++; } } } static struct kgsl_memdesc capturescript; static struct kgsl_memdesc registers; out: header->count = count; #define REG_PAIR_COUNT(_a, _i) \ (((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1) /* Return the size of the section */ return (count * 8) + sizeof(*header); } static inline unsigned int count_registers(void) /* Snapshot a preemption record buffer */ static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { unsigned int i, count = 0; struct kgsl_memdesc *memdesc = priv; for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) count += REG_PAIR_COUNT(a5xx_registers, i); struct kgsl_snapshot_gpu_object_v2 *header = (struct kgsl_snapshot_gpu_object_v2 *)buf; return count; u8 *ptr = buf + sizeof(*header); if (remain < (SZ_64K + sizeof(*header))) { SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); return 0; } static unsigned int copy_registers(unsigned int *dst) { unsigned int *src = (unsigned int *) registers.hostptr; unsigned int i, count = 0; header->size = SZ_64K >> 2; header->gpuaddr = memdesc->gpuaddr; header->ptbase = kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) { unsigned int j; unsigned int start = a5xx_registers[2 * i]; unsigned int end = a5xx_registers[(2 * i) + 1]; memcpy(ptr, memdesc->hostptr, SZ_64K); for (j = start; j <= end; j++, count++) { *dst++ = j; *dst++ = *src++; } return SZ_64K + sizeof(*header); } return count; } static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) static void _a5xx_do_crashdump(struct kgsl_device *device) { struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; unsigned int *data = (unsigned int *)(buf + sizeof(*header)); unsigned long wait_time; unsigned int reg = 0; unsigned int val; /* Jump to legacy if the crash dump script was not initialized */ crash_dump_valid = false; if (capturescript.gpuaddr == 0 || registers.gpuaddr == 0) return a5xx_legacy_snapshot_registers(device, buf, remain); return; /* * If we got here because we are stalled on fault the crash dumper has * won't work */ /* IF the SMMU is stalled we cannot do a crash dump */ kgsl_regread(device, A5XX_RBBM_STATUS3, &val); if (val & BIT(24)) return a5xx_legacy_snapshot_registers(device, buf, remain); return; if (remain < (count_registers() * 8) + sizeof(*header)) { SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); return 0; } /* Turn on APRIV so we can access the buffers */ kgsl_regwrite(device, A5XX_CP_CNTL, 1); kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_LO, lower_32_bits(capturescript.gpuaddr)); Loading @@ -772,42 +766,14 @@ static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf, cpu_relax(); } kgsl_regwrite(device, A5XX_CP_CNTL, 0); if (!(reg & 0x4)) { KGSL_CORE_ERR("Crash dump timed out: 0x%X\n", reg); return a5xx_legacy_snapshot_registers(device, buf, remain); } header->count = copy_registers(data); /* Return the size of the section */ return (header->count * 8) + sizeof(*header); return; } /* Snapshot a preemption record buffer */ static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) { struct kgsl_memdesc *memdesc = priv; struct kgsl_snapshot_gpu_object_v2 *header = (struct kgsl_snapshot_gpu_object_v2 *)buf; u8 *ptr = buf + sizeof(*header); if (remain < (SZ_64K + sizeof(*header))) { SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD"); return 0; } header->size = SZ_64K >> 2; header->gpuaddr = memdesc->gpuaddr; header->ptbase = kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; memcpy(ptr, memdesc->hostptr, SZ_64K); return SZ_64K + sizeof(*header); crash_dump_valid = true; } /* Loading @@ -830,6 +796,9 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, /* Disable Clock gating temporarily for the debug bus to work */ a5xx_hwcg_set(adreno_dev, false); /* Try to run the crash dumper */ _a5xx_do_crashdump(device); kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, a5xx_snapshot_registers, NULL); Loading @@ -837,10 +806,6 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, a5xx_vbif_snapshot_registers, ARRAY_SIZE(a5xx_vbif_snapshot_registers)); /* Dump SP TP HLSQ registers */ kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, a5xx_snapshot_dump_hlsq_sp_tp_regs, NULL); /* CP_PFP indexed registers */ kgsl_snapshot_indexed_registers(device, snapshot, A5XX_CP_PFP_STAT_ADDR, A5XX_CP_PFP_STAT_DATA, Loading Loading @@ -913,34 +878,93 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, } static int _a5xx_crashdump_init(struct a5xx_shader_block *block, uint64_t *ptr, uint64_t *offset) { int qwords = 0; unsigned int j; /* Capture each bank in the block */ for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) { /* Program the aperture */ ptr[qwords++] = (block->statetype << A5XX_SHADER_STATETYPE_SHIFT) | j; ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) | (1 << 21) | 1; /* Read all the data in one chunk */ ptr[qwords++] = registers.gpuaddr + *offset; ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) | block->sz; /* Remember the offset of the first bank for easy access */ if (j == 0) block->offset = *offset; *offset += block->sz * sizeof(unsigned int); } return qwords; } void a5xx_crashdump_init(struct adreno_device *adreno_dev) { unsigned int i, count; struct kgsl_device *device = KGSL_DEVICE(adreno_dev); unsigned int script_size = 0; unsigned int data_size = 0; unsigned int i, j; uint64_t *ptr; uint64_t gpuaddr; uint64_t offset = 0; if (capturescript.gpuaddr != 0 && registers.gpuaddr != 0) return; /* * For the capture script two blocks of memory are needed: A block of * GPU readonly memory for the special capture script and a destination * block for the register values. The size of the capture script needs * is 128 bits (4 dwords) per register pair and 4 dwords at the end. * The destination block needs to be big enough to hold all the * registers that we will capture. * We need to allocate two buffers: * 1 - the buffer to hold the draw script * 2 - the buffer to hold the data */ if (kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &capturescript, ((ARRAY_SIZE(a5xx_registers) / 2) * 16) + 16, KGSL_MEMFLAGS_GPUREADONLY, 0)) return; /* * To save the registers, we need 16 bytes per register pair for the * script and a dword for each register int the data */ for (i = 0; i < ARRAY_SIZE(_a5xx_cd_registers); i++) { struct cdregs *regs = &_a5xx_cd_registers[i]; /* Each pair needs 16 bytes (2 qwords) */ script_size += (regs->size / 2) * 16; /* Each register needs a dword in the data */ for (j = 0; j < regs->size / 2; j++) data_size += REG_PAIR_COUNT(regs->regs, j) * sizeof(unsigned int); } /* * To save the shader blocks for each block in each type we need 32 * bytes for the script (16 bytes to program the aperture and 16 to * read the data) and then a block specific number of bytes to hold * the data */ for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { script_size += 32 * A5XX_NUM_SHADER_BANKS; data_size += a5xx_shader_blocks[i].sz * sizeof(unsigned int) * A5XX_NUM_SHADER_BANKS; } /* Now allocate the script and data buffers */ /* Count the total number of registers to capture */ count = count_registers(); /* The script buffers needs 2 extra qwords on the end */ if (kgsl_allocate_global(device, &capturescript, script_size + 16, KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED)) return; if (kgsl_allocate_global(KGSL_DEVICE(adreno_dev), ®isters, count * sizeof(unsigned int), 0, 0)) { if (kgsl_allocate_global(device, ®isters, data_size, 0, KGSL_MEMDESC_PRIVILEGED)) { kgsl_free_global(KGSL_DEVICE(adreno_dev), &capturescript); return; } Loading @@ -948,14 +972,23 @@ void a5xx_crashdump_init(struct adreno_device *adreno_dev) /* Build the crash script */ ptr = (uint64_t *) capturescript.hostptr; gpuaddr = registers.gpuaddr; for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) { unsigned int regs = REG_PAIR_COUNT(a5xx_registers, i); *ptr++ = gpuaddr; *ptr++ = (((uint64_t) a5xx_registers[2 * i]) << 44) | regs; /* For the registers, program a read command for each pair */ for (i = 0; i < ARRAY_SIZE(_a5xx_cd_registers); i++) { struct cdregs *regs = &_a5xx_cd_registers[i]; gpuaddr += regs * sizeof(unsigned int); for (j = 0; j < regs->size / 2; j++) { unsigned int r = REG_PAIR_COUNT(regs->regs, j); *ptr++ = registers.gpuaddr + offset; *ptr++ = (((uint64_t) regs->regs[2 * j]) << 44) | r; offset += r * sizeof(unsigned int); } } /* Program each shader block */ for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { ptr += _a5xx_crashdump_init(&a5xx_shader_blocks[i], ptr, &offset); } *ptr++ = 0; Loading