Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
0003 
0004 #include <linux/ascii85.h>
0005 #include "msm_gem.h"
0006 #include "a6xx_gpu.h"
0007 #include "a6xx_gmu.h"
0008 #include "a6xx_gpu_state.h"
0009 #include "a6xx_gmu.xml.h"
0010 
0011 struct a6xx_gpu_state_obj {
0012     const void *handle;
0013     u32 *data;
0014 };
0015 
0016 struct a6xx_gpu_state {
0017     struct msm_gpu_state base;
0018 
0019     struct a6xx_gpu_state_obj *gmu_registers;
0020     int nr_gmu_registers;
0021 
0022     struct a6xx_gpu_state_obj *registers;
0023     int nr_registers;
0024 
0025     struct a6xx_gpu_state_obj *shaders;
0026     int nr_shaders;
0027 
0028     struct a6xx_gpu_state_obj *clusters;
0029     int nr_clusters;
0030 
0031     struct a6xx_gpu_state_obj *dbgahb_clusters;
0032     int nr_dbgahb_clusters;
0033 
0034     struct a6xx_gpu_state_obj *indexed_regs;
0035     int nr_indexed_regs;
0036 
0037     struct a6xx_gpu_state_obj *debugbus;
0038     int nr_debugbus;
0039 
0040     struct a6xx_gpu_state_obj *vbif_debugbus;
0041 
0042     struct a6xx_gpu_state_obj *cx_debugbus;
0043     int nr_cx_debugbus;
0044 
0045     struct msm_gpu_state_bo *gmu_log;
0046     struct msm_gpu_state_bo *gmu_hfi;
0047     struct msm_gpu_state_bo *gmu_debug;
0048 
0049     s32 hfi_queue_history[2][HFI_HISTORY_SZ];
0050 
0051     struct list_head objs;
0052 
0053     bool gpu_initialized;
0054 };
0055 
0056 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
0057 {
0058     in[0] = val;
0059     in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
0060 
0061     return 2;
0062 }
0063 
0064 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
0065 {
0066     in[0] = target;
0067     in[1] = (((u64) reg) << 44 | dwords);
0068 
0069     return 2;
0070 }
0071 
0072 static inline int CRASHDUMP_FINI(u64 *in)
0073 {
0074     in[0] = 0;
0075     in[1] = 0;
0076 
0077     return 2;
0078 }
0079 
0080 struct a6xx_crashdumper {
0081     void *ptr;
0082     struct drm_gem_object *bo;
0083     u64 iova;
0084 };
0085 
0086 struct a6xx_state_memobj {
0087     struct list_head node;
0088     unsigned long long data[];
0089 };
0090 
0091 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
0092 {
0093     struct a6xx_state_memobj *obj =
0094         kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
0095 
0096     if (!obj)
0097         return NULL;
0098 
0099     list_add_tail(&obj->node, &a6xx_state->objs);
0100     return &obj->data;
0101 }
0102 
0103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
0104         size_t size)
0105 {
0106     void *dst = state_kcalloc(a6xx_state, 1, size);
0107 
0108     if (dst)
0109         memcpy(dst, src, size);
0110     return dst;
0111 }
0112 
0113 /*
0114  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
0115  * the rest for the data
0116  */
0117 #define A6XX_CD_DATA_OFFSET 8192
0118 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
0119 
0120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
0121         struct a6xx_crashdumper *dumper)
0122 {
0123     dumper->ptr = msm_gem_kernel_new(gpu->dev,
0124         SZ_1M, MSM_BO_WC, gpu->aspace,
0125         &dumper->bo, &dumper->iova);
0126 
0127     if (!IS_ERR(dumper->ptr))
0128         msm_gem_object_set_name(dumper->bo, "crashdump");
0129 
0130     return PTR_ERR_OR_ZERO(dumper->ptr);
0131 }
0132 
0133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
0134         struct a6xx_crashdumper *dumper)
0135 {
0136     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0137     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
0138     u32 val;
0139     int ret;
0140 
0141     if (IS_ERR_OR_NULL(dumper->ptr))
0142         return -EINVAL;
0143 
0144     if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
0145         return -EINVAL;
0146 
0147     /* Make sure all pending memory writes are posted */
0148     wmb();
0149 
0150     gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
0151         REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
0152 
0153     gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
0154 
0155     ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
0156         val & 0x02, 100, 10000);
0157 
0158     gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
0159 
0160     return ret;
0161 }
0162 
0163 /* read a value from the GX debug bus */
0164 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
0165         u32 *data)
0166 {
0167     u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
0168         A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
0169 
0170     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
0171     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
0172     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
0173     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
0174 
0175     /* Wait 1 us to make sure the data is flowing */
0176     udelay(1);
0177 
0178     data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
0179     data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
0180 
0181     return 2;
0182 }
0183 
0184 #define cxdbg_write(ptr, offset, val) \
0185     msm_writel((val), (ptr) + ((offset) << 2))
0186 
0187 #define cxdbg_read(ptr, offset) \
0188     msm_readl((ptr) + ((offset) << 2))
0189 
0190 /* read a value from the CX debug bus */
0191 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
0192         u32 *data)
0193 {
0194     u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
0195         A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
0196 
0197     cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
0198     cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
0199     cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
0200     cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
0201 
0202     /* Wait 1 us to make sure the data is flowing */
0203     udelay(1);
0204 
0205     data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
0206     data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
0207 
0208     return 2;
0209 }
0210 
0211 /* Read a chunk of data from the VBIF debug bus */
0212 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
0213         u32 reg, int count, u32 *data)
0214 {
0215     int i;
0216 
0217     gpu_write(gpu, ctrl0, reg);
0218 
0219     for (i = 0; i < count; i++) {
0220         gpu_write(gpu, ctrl1, i);
0221         data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
0222     }
0223 
0224     return count;
0225 }
0226 
0227 #define AXI_ARB_BLOCKS 2
0228 #define XIN_AXI_BLOCKS 5
0229 #define XIN_CORE_BLOCKS 4
0230 
0231 #define VBIF_DEBUGBUS_BLOCK_SIZE \
0232     ((16 * AXI_ARB_BLOCKS) + \
0233      (18 * XIN_AXI_BLOCKS) + \
0234      (12 * XIN_CORE_BLOCKS))
0235 
0236 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
0237         struct a6xx_gpu_state *a6xx_state,
0238         struct a6xx_gpu_state_obj *obj)
0239 {
0240     u32 clk, *ptr;
0241     int i;
0242 
0243     obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
0244         sizeof(u32));
0245     if (!obj->data)
0246         return;
0247 
0248     obj->handle = NULL;
0249 
0250     /* Get the current clock setting */
0251     clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
0252 
0253     /* Force on the bus so we can read it */
0254     gpu_write(gpu, REG_A6XX_VBIF_CLKON,
0255         clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
0256 
0257     /* We will read from BUS2 first, so disable BUS1 */
0258     gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
0259 
0260     /* Enable the VBIF bus for reading */
0261     gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
0262 
0263     ptr = obj->data;
0264 
0265     for (i = 0; i < AXI_ARB_BLOCKS; i++)
0266         ptr += vbif_debugbus_read(gpu,
0267             REG_A6XX_VBIF_TEST_BUS2_CTRL0,
0268             REG_A6XX_VBIF_TEST_BUS2_CTRL1,
0269             1 << (i + 16), 16, ptr);
0270 
0271     for (i = 0; i < XIN_AXI_BLOCKS; i++)
0272         ptr += vbif_debugbus_read(gpu,
0273             REG_A6XX_VBIF_TEST_BUS2_CTRL0,
0274             REG_A6XX_VBIF_TEST_BUS2_CTRL1,
0275             1 << i, 18, ptr);
0276 
0277     /* Stop BUS2 so we can turn on BUS1 */
0278     gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
0279 
0280     for (i = 0; i < XIN_CORE_BLOCKS; i++)
0281         ptr += vbif_debugbus_read(gpu,
0282             REG_A6XX_VBIF_TEST_BUS1_CTRL0,
0283             REG_A6XX_VBIF_TEST_BUS1_CTRL1,
0284             1 << i, 12, ptr);
0285 
0286     /* Restore the VBIF clock setting */
0287     gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
0288 }
0289 
0290 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
0291         struct a6xx_gpu_state *a6xx_state,
0292         const struct a6xx_debugbus_block *block,
0293         struct a6xx_gpu_state_obj *obj)
0294 {
0295     int i;
0296     u32 *ptr;
0297 
0298     obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
0299     if (!obj->data)
0300         return;
0301 
0302     obj->handle = block;
0303 
0304     for (ptr = obj->data, i = 0; i < block->count; i++)
0305         ptr += debugbus_read(gpu, block->id, i, ptr);
0306 }
0307 
0308 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
0309         struct a6xx_gpu_state *a6xx_state,
0310         const struct a6xx_debugbus_block *block,
0311         struct a6xx_gpu_state_obj *obj)
0312 {
0313     int i;
0314     u32 *ptr;
0315 
0316     obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
0317     if (!obj->data)
0318         return;
0319 
0320     obj->handle = block;
0321 
0322     for (ptr = obj->data, i = 0; i < block->count; i++)
0323         ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
0324 }
0325 
0326 static void a6xx_get_debugbus(struct msm_gpu *gpu,
0327         struct a6xx_gpu_state *a6xx_state)
0328 {
0329     struct resource *res;
0330     void __iomem *cxdbg = NULL;
0331     int nr_debugbus_blocks;
0332 
0333     /* Set up the GX debug bus */
0334 
0335     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
0336         A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
0337 
0338     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
0339         A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
0340 
0341     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
0342     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
0343     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
0344     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
0345 
0346     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
0347     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
0348 
0349     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
0350     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
0351     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
0352     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
0353 
0354     /* Set up the CX debug bus - it lives elsewhere in the system so do a
0355      * temporary ioremap for the registers
0356      */
0357     res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
0358             "cx_dbgc");
0359 
0360     if (res)
0361         cxdbg = ioremap(res->start, resource_size(res));
0362 
0363     if (cxdbg) {
0364         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
0365             A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
0366 
0367         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
0368             A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
0369 
0370         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
0371         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
0372         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
0373         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
0374 
0375         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
0376             0x76543210);
0377         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
0378             0xFEDCBA98);
0379 
0380         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
0381         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
0382         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
0383         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
0384     }
0385 
0386     nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
0387         (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
0388 
0389     a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
0390             sizeof(*a6xx_state->debugbus));
0391 
0392     if (a6xx_state->debugbus) {
0393         int i;
0394 
0395         for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
0396             a6xx_get_debugbus_block(gpu,
0397                 a6xx_state,
0398                 &a6xx_debugbus_blocks[i],
0399                 &a6xx_state->debugbus[i]);
0400 
0401         a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
0402 
0403         /*
0404          * GBIF has same debugbus as of other GPU blocks, fall back to
0405          * default path if GPU uses GBIF, also GBIF uses exactly same
0406          * ID as of VBIF.
0407          */
0408         if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
0409             a6xx_get_debugbus_block(gpu, a6xx_state,
0410                 &a6xx_gbif_debugbus_block,
0411                 &a6xx_state->debugbus[i]);
0412 
0413             a6xx_state->nr_debugbus += 1;
0414         }
0415     }
0416 
0417     /*  Dump the VBIF debugbus on applicable targets */
0418     if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
0419         a6xx_state->vbif_debugbus =
0420             state_kcalloc(a6xx_state, 1,
0421                     sizeof(*a6xx_state->vbif_debugbus));
0422 
0423         if (a6xx_state->vbif_debugbus)
0424             a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
0425                     a6xx_state->vbif_debugbus);
0426     }
0427 
0428     if (cxdbg) {
0429         a6xx_state->cx_debugbus =
0430             state_kcalloc(a6xx_state,
0431             ARRAY_SIZE(a6xx_cx_debugbus_blocks),
0432             sizeof(*a6xx_state->cx_debugbus));
0433 
0434         if (a6xx_state->cx_debugbus) {
0435             int i;
0436 
0437             for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
0438                 a6xx_get_cx_debugbus_block(cxdbg,
0439                     a6xx_state,
0440                     &a6xx_cx_debugbus_blocks[i],
0441                     &a6xx_state->cx_debugbus[i]);
0442 
0443             a6xx_state->nr_cx_debugbus =
0444                 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
0445         }
0446 
0447         iounmap(cxdbg);
0448     }
0449 }
0450 
0451 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
0452 
0453 /* Read a data cluster from behind the AHB aperture */
0454 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
0455         struct a6xx_gpu_state *a6xx_state,
0456         const struct a6xx_dbgahb_cluster *dbgahb,
0457         struct a6xx_gpu_state_obj *obj,
0458         struct a6xx_crashdumper *dumper)
0459 {
0460     u64 *in = dumper->ptr;
0461     u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
0462     size_t datasize;
0463     int i, regcount = 0;
0464 
0465     for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
0466         int j;
0467 
0468         in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
0469             (dbgahb->statetype + i * 2) << 8);
0470 
0471         for (j = 0; j < dbgahb->count; j += 2) {
0472             int count = RANGE(dbgahb->registers, j);
0473             u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
0474                 dbgahb->registers[j] - (dbgahb->base >> 2);
0475 
0476             in += CRASHDUMP_READ(in, offset, count, out);
0477 
0478             out += count * sizeof(u32);
0479 
0480             if (i == 0)
0481                 regcount += count;
0482         }
0483     }
0484 
0485     CRASHDUMP_FINI(in);
0486 
0487     datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
0488 
0489     if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
0490         return;
0491 
0492     if (a6xx_crashdumper_run(gpu, dumper))
0493         return;
0494 
0495     obj->handle = dbgahb;
0496     obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
0497         datasize);
0498 }
0499 
0500 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
0501         struct a6xx_gpu_state *a6xx_state,
0502         struct a6xx_crashdumper *dumper)
0503 {
0504     int i;
0505 
0506     a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
0507         ARRAY_SIZE(a6xx_dbgahb_clusters),
0508         sizeof(*a6xx_state->dbgahb_clusters));
0509 
0510     if (!a6xx_state->dbgahb_clusters)
0511         return;
0512 
0513     a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
0514 
0515     for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
0516         a6xx_get_dbgahb_cluster(gpu, a6xx_state,
0517             &a6xx_dbgahb_clusters[i],
0518             &a6xx_state->dbgahb_clusters[i], dumper);
0519 }
0520 
0521 /* Read a data cluster from the CP aperture with the crashdumper */
0522 static void a6xx_get_cluster(struct msm_gpu *gpu,
0523         struct a6xx_gpu_state *a6xx_state,
0524         const struct a6xx_cluster *cluster,
0525         struct a6xx_gpu_state_obj *obj,
0526         struct a6xx_crashdumper *dumper)
0527 {
0528     u64 *in = dumper->ptr;
0529     u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
0530     size_t datasize;
0531     int i, regcount = 0;
0532 
0533     /* Some clusters need a selector register to be programmed too */
0534     if (cluster->sel_reg)
0535         in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
0536 
0537     for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
0538         int j;
0539 
0540         in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
0541             (cluster->id << 8) | (i << 4) | i);
0542 
0543         for (j = 0; j < cluster->count; j += 2) {
0544             int count = RANGE(cluster->registers, j);
0545 
0546             in += CRASHDUMP_READ(in, cluster->registers[j],
0547                 count, out);
0548 
0549             out += count * sizeof(u32);
0550 
0551             if (i == 0)
0552                 regcount += count;
0553         }
0554     }
0555 
0556     CRASHDUMP_FINI(in);
0557 
0558     datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
0559 
0560     if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
0561         return;
0562 
0563     if (a6xx_crashdumper_run(gpu, dumper))
0564         return;
0565 
0566     obj->handle = cluster;
0567     obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
0568         datasize);
0569 }
0570 
0571 static void a6xx_get_clusters(struct msm_gpu *gpu,
0572         struct a6xx_gpu_state *a6xx_state,
0573         struct a6xx_crashdumper *dumper)
0574 {
0575     int i;
0576 
0577     a6xx_state->clusters = state_kcalloc(a6xx_state,
0578         ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
0579 
0580     if (!a6xx_state->clusters)
0581         return;
0582 
0583     a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
0584 
0585     for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
0586         a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
0587             &a6xx_state->clusters[i], dumper);
0588 }
0589 
0590 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
0591 static void a6xx_get_shader_block(struct msm_gpu *gpu,
0592         struct a6xx_gpu_state *a6xx_state,
0593         const struct a6xx_shader_block *block,
0594         struct a6xx_gpu_state_obj *obj,
0595         struct a6xx_crashdumper *dumper)
0596 {
0597     u64 *in = dumper->ptr;
0598     size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
0599     int i;
0600 
0601     if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
0602         return;
0603 
0604     for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
0605         in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
0606             (block->type << 8) | i);
0607 
0608         in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
0609             block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
0610     }
0611 
0612     CRASHDUMP_FINI(in);
0613 
0614     if (a6xx_crashdumper_run(gpu, dumper))
0615         return;
0616 
0617     obj->handle = block;
0618     obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
0619         datasize);
0620 }
0621 
0622 static void a6xx_get_shaders(struct msm_gpu *gpu,
0623         struct a6xx_gpu_state *a6xx_state,
0624         struct a6xx_crashdumper *dumper)
0625 {
0626     int i;
0627 
0628     a6xx_state->shaders = state_kcalloc(a6xx_state,
0629         ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
0630 
0631     if (!a6xx_state->shaders)
0632         return;
0633 
0634     a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
0635 
0636     for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
0637         a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
0638             &a6xx_state->shaders[i], dumper);
0639 }
0640 
0641 /* Read registers from behind the HLSQ aperture with the crashdumper */
0642 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
0643         struct a6xx_gpu_state *a6xx_state,
0644         const struct a6xx_registers *regs,
0645         struct a6xx_gpu_state_obj *obj,
0646         struct a6xx_crashdumper *dumper)
0647 
0648 {
0649     u64 *in = dumper->ptr;
0650     u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
0651     int i, regcount = 0;
0652 
0653     in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
0654 
0655     for (i = 0; i < regs->count; i += 2) {
0656         u32 count = RANGE(regs->registers, i);
0657         u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
0658             regs->registers[i] - (regs->val0 >> 2);
0659 
0660         in += CRASHDUMP_READ(in, offset, count, out);
0661 
0662         out += count * sizeof(u32);
0663         regcount += count;
0664     }
0665 
0666     CRASHDUMP_FINI(in);
0667 
0668     if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
0669         return;
0670 
0671     if (a6xx_crashdumper_run(gpu, dumper))
0672         return;
0673 
0674     obj->handle = regs;
0675     obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
0676         regcount * sizeof(u32));
0677 }
0678 
0679 /* Read a block of registers using the crashdumper */
0680 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
0681         struct a6xx_gpu_state *a6xx_state,
0682         const struct a6xx_registers *regs,
0683         struct a6xx_gpu_state_obj *obj,
0684         struct a6xx_crashdumper *dumper)
0685 
0686 {
0687     u64 *in = dumper->ptr;
0688     u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
0689     int i, regcount = 0;
0690 
0691     /* Some blocks might need to program a selector register first */
0692     if (regs->val0)
0693         in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
0694 
0695     for (i = 0; i < regs->count; i += 2) {
0696         u32 count = RANGE(regs->registers, i);
0697 
0698         in += CRASHDUMP_READ(in, regs->registers[i], count, out);
0699 
0700         out += count * sizeof(u32);
0701         regcount += count;
0702     }
0703 
0704     CRASHDUMP_FINI(in);
0705 
0706     if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
0707         return;
0708 
0709     if (a6xx_crashdumper_run(gpu, dumper))
0710         return;
0711 
0712     obj->handle = regs;
0713     obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
0714         regcount * sizeof(u32));
0715 }
0716 
0717 /* Read a block of registers via AHB */
0718 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
0719         struct a6xx_gpu_state *a6xx_state,
0720         const struct a6xx_registers *regs,
0721         struct a6xx_gpu_state_obj *obj)
0722 {
0723     int i, regcount = 0, index = 0;
0724 
0725     for (i = 0; i < regs->count; i += 2)
0726         regcount += RANGE(regs->registers, i);
0727 
0728     obj->handle = (const void *) regs;
0729     obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
0730     if (!obj->data)
0731         return;
0732 
0733     for (i = 0; i < regs->count; i += 2) {
0734         u32 count = RANGE(regs->registers, i);
0735         int j;
0736 
0737         for (j = 0; j < count; j++)
0738             obj->data[index++] = gpu_read(gpu,
0739                 regs->registers[i] + j);
0740     }
0741 }
0742 
0743 /* Read a block of GMU registers */
0744 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
0745         struct a6xx_gpu_state *a6xx_state,
0746         const struct a6xx_registers *regs,
0747         struct a6xx_gpu_state_obj *obj,
0748         bool rscc)
0749 {
0750     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0751     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
0752     struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
0753     int i, regcount = 0, index = 0;
0754 
0755     for (i = 0; i < regs->count; i += 2)
0756         regcount += RANGE(regs->registers, i);
0757 
0758     obj->handle = (const void *) regs;
0759     obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
0760     if (!obj->data)
0761         return;
0762 
0763     for (i = 0; i < regs->count; i += 2) {
0764         u32 count = RANGE(regs->registers, i);
0765         int j;
0766 
0767         for (j = 0; j < count; j++) {
0768             u32 offset = regs->registers[i] + j;
0769             u32 val;
0770 
0771             if (rscc)
0772                 val = gmu_read_rscc(gmu, offset);
0773             else
0774                 val = gmu_read(gmu, offset);
0775 
0776             obj->data[index++] = val;
0777         }
0778     }
0779 }
0780 
0781 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
0782         struct a6xx_gpu_state *a6xx_state)
0783 {
0784     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0785     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
0786 
0787     a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
0788         3, sizeof(*a6xx_state->gmu_registers));
0789 
0790     if (!a6xx_state->gmu_registers)
0791         return;
0792 
0793     a6xx_state->nr_gmu_registers = 3;
0794 
0795     /* Get the CX GMU registers from AHB */
0796     _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
0797         &a6xx_state->gmu_registers[0], false);
0798     _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
0799         &a6xx_state->gmu_registers[1], true);
0800 
0801     if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
0802         return;
0803 
0804     /* Set the fence to ALLOW mode so we can access the registers */
0805     gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
0806 
0807     _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
0808         &a6xx_state->gmu_registers[2], false);
0809 }
0810 
0811 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
0812         struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
0813 {
0814     struct msm_gpu_state_bo *snapshot;
0815 
0816     snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
0817     if (!snapshot)
0818         return NULL;
0819 
0820     snapshot->iova = bo->iova;
0821     snapshot->size = bo->size;
0822     snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
0823     if (!snapshot->data)
0824         return NULL;
0825 
0826     memcpy(snapshot->data, bo->virt, bo->size);
0827 
0828     return snapshot;
0829 }
0830 
0831 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
0832                       struct a6xx_gpu_state *a6xx_state)
0833 {
0834     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0835     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
0836     struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
0837     unsigned i, j;
0838 
0839     BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
0840 
0841     for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
0842         struct a6xx_hfi_queue *queue = &gmu->queues[i];
0843         for (j = 0; j < HFI_HISTORY_SZ; j++) {
0844             unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
0845             a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
0846         }
0847     }
0848 }
0849 
0850 #define A6XX_GBIF_REGLIST_SIZE   1
0851 static void a6xx_get_registers(struct msm_gpu *gpu,
0852         struct a6xx_gpu_state *a6xx_state,
0853         struct a6xx_crashdumper *dumper)
0854 {
0855     int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
0856         ARRAY_SIZE(a6xx_reglist) +
0857         ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
0858     int index = 0;
0859     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0860 
0861     a6xx_state->registers = state_kcalloc(a6xx_state,
0862         count, sizeof(*a6xx_state->registers));
0863 
0864     if (!a6xx_state->registers)
0865         return;
0866 
0867     a6xx_state->nr_registers = count;
0868 
0869     for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
0870         a6xx_get_ahb_gpu_registers(gpu,
0871             a6xx_state, &a6xx_ahb_reglist[i],
0872             &a6xx_state->registers[index++]);
0873 
0874     if (a6xx_has_gbif(adreno_gpu))
0875         a6xx_get_ahb_gpu_registers(gpu,
0876                 a6xx_state, &a6xx_gbif_reglist,
0877                 &a6xx_state->registers[index++]);
0878     else
0879         a6xx_get_ahb_gpu_registers(gpu,
0880                 a6xx_state, &a6xx_vbif_reglist,
0881                 &a6xx_state->registers[index++]);
0882     if (!dumper) {
0883         /*
0884          * We can't use the crashdumper when the SMMU is stalled,
0885          * because the GPU has no memory access until we resume
0886          * translation (but we don't want to do that until after
0887          * we have captured as much useful GPU state as possible).
0888          * So instead collect registers via the CPU:
0889          */
0890         for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
0891             a6xx_get_ahb_gpu_registers(gpu,
0892                 a6xx_state, &a6xx_reglist[i],
0893                 &a6xx_state->registers[index++]);
0894         return;
0895     }
0896 
0897     for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
0898         a6xx_get_crashdumper_registers(gpu,
0899             a6xx_state, &a6xx_reglist[i],
0900             &a6xx_state->registers[index++],
0901             dumper);
0902 
0903     for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
0904         a6xx_get_crashdumper_hlsq_registers(gpu,
0905             a6xx_state, &a6xx_hlsq_reglist[i],
0906             &a6xx_state->registers[index++],
0907             dumper);
0908 }
0909 
0910 /* Read a block of data from an indexed register pair */
0911 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
0912         struct a6xx_gpu_state *a6xx_state,
0913         const struct a6xx_indexed_registers *indexed,
0914         struct a6xx_gpu_state_obj *obj)
0915 {
0916     int i;
0917 
0918     obj->handle = (const void *) indexed;
0919     obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
0920     if (!obj->data)
0921         return;
0922 
0923     /* All the indexed banks start at address 0 */
0924     gpu_write(gpu, indexed->addr, 0);
0925 
0926     /* Read the data - each read increments the internal address by 1 */
0927     for (i = 0; i < indexed->count; i++)
0928         obj->data[i] = gpu_read(gpu, indexed->data);
0929 }
0930 
0931 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
0932         struct a6xx_gpu_state *a6xx_state)
0933 {
0934     u32 mempool_size;
0935     int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
0936     int i;
0937 
0938     a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
0939         sizeof(*a6xx_state->indexed_regs));
0940     if (!a6xx_state->indexed_regs)
0941         return;
0942 
0943     for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
0944         a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
0945             &a6xx_state->indexed_regs[i]);
0946 
0947     /* Set the CP mempool size to 0 to stabilize it while dumping */
0948     mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
0949     gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
0950 
0951     /* Get the contents of the CP mempool */
0952     a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
0953         &a6xx_state->indexed_regs[i]);
0954 
0955     /*
0956      * Offset 0x2000 in the mempool is the size - copy the saved size over
0957      * so the data is consistent
0958      */
0959     a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
0960 
0961     /* Restore the size in the hardware */
0962     gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
0963 
0964     a6xx_state->nr_indexed_regs = count;
0965 }
0966 
0967 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
0968 {
0969     struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
0970     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0971     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
0972     struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
0973         GFP_KERNEL);
0974     bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
0975             A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
0976 
0977     if (!a6xx_state)
0978         return ERR_PTR(-ENOMEM);
0979 
0980     INIT_LIST_HEAD(&a6xx_state->objs);
0981 
0982     /* Get the generic state from the adreno core */
0983     adreno_gpu_state_get(gpu, &a6xx_state->base);
0984 
0985     a6xx_get_gmu_registers(gpu, a6xx_state);
0986 
0987     a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
0988     a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
0989     a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
0990 
0991     a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
0992 
0993     /* If GX isn't on the rest of the data isn't going to be accessible */
0994     if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
0995         return &a6xx_state->base;
0996 
0997     /* Get the banks of indexed registers */
0998     a6xx_get_indexed_registers(gpu, a6xx_state);
0999 
1000     /*
1001      * Try to initialize the crashdumper, if we are not dumping state
1002      * with the SMMU stalled.  The crashdumper needs memory access to
1003      * write out GPU state, so we need to skip this when the SMMU is
1004      * stalled in response to an iova fault
1005      */
1006     if (!stalled && !gpu->needs_hw_init &&
1007         !a6xx_crashdumper_init(gpu, &_dumper)) {
1008         dumper = &_dumper;
1009     }
1010 
1011     a6xx_get_registers(gpu, a6xx_state, dumper);
1012 
1013     if (dumper) {
1014         a6xx_get_shaders(gpu, a6xx_state, dumper);
1015         a6xx_get_clusters(gpu, a6xx_state, dumper);
1016         a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1017 
1018         msm_gem_kernel_put(dumper->bo, gpu->aspace);
1019     }
1020 
1021     if (snapshot_debugbus)
1022         a6xx_get_debugbus(gpu, a6xx_state);
1023 
1024     a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1025 
1026     return  &a6xx_state->base;
1027 }
1028 
1029 static void a6xx_gpu_state_destroy(struct kref *kref)
1030 {
1031     struct a6xx_state_memobj *obj, *tmp;
1032     struct msm_gpu_state *state = container_of(kref,
1033             struct msm_gpu_state, ref);
1034     struct a6xx_gpu_state *a6xx_state = container_of(state,
1035             struct a6xx_gpu_state, base);
1036 
1037     if (a6xx_state->gmu_log)
1038         kvfree(a6xx_state->gmu_log->data);
1039 
1040     if (a6xx_state->gmu_hfi)
1041         kvfree(a6xx_state->gmu_hfi->data);
1042 
1043     list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
1044         kfree(obj);
1045 
1046     adreno_gpu_state_destroy(state);
1047     kfree(a6xx_state);
1048 }
1049 
1050 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1051 {
1052     if (IS_ERR_OR_NULL(state))
1053         return 1;
1054 
1055     return kref_put(&state->ref, a6xx_gpu_state_destroy);
1056 }
1057 
1058 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1059         struct drm_printer *p)
1060 {
1061     int i, index = 0;
1062 
1063     if (!data)
1064         return;
1065 
1066     for (i = 0; i < count; i += 2) {
1067         u32 count = RANGE(registers, i);
1068         u32 offset = registers[i];
1069         int j;
1070 
1071         for (j = 0; j < count; index++, offset++, j++) {
1072             if (data[index] == 0xdeafbead)
1073                 continue;
1074 
1075             drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1076                 offset << 2, data[index]);
1077         }
1078     }
1079 }
1080 
1081 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1082 {
1083     char out[ASCII85_BUFSZ];
1084     long i, l, datalen = 0;
1085 
1086     for (i = 0; i < len >> 2; i++) {
1087         if (data[i])
1088             datalen = (i + 1) << 2;
1089     }
1090 
1091     if (datalen == 0)
1092         return;
1093 
1094     drm_puts(p, "    data: !!ascii85 |\n");
1095     drm_puts(p, "      ");
1096 
1097 
1098     l = ascii85_encode_len(datalen);
1099 
1100     for (i = 0; i < l; i++)
1101         drm_puts(p, ascii85_encode(data[i], out));
1102 
1103     drm_puts(p, "\n");
1104 }
1105 
1106 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1107 {
1108     drm_puts(p, fmt);
1109     drm_puts(p, name);
1110     drm_puts(p, "\n");
1111 }
1112 
1113 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1114         struct drm_printer *p)
1115 {
1116     const struct a6xx_shader_block *block = obj->handle;
1117     int i;
1118 
1119     if (!obj->handle)
1120         return;
1121 
1122     print_name(p, "  - type: ", block->name);
1123 
1124     for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1125         drm_printf(p, "    - bank: %d\n", i);
1126         drm_printf(p, "      size: %d\n", block->size);
1127 
1128         if (!obj->data)
1129             continue;
1130 
1131         print_ascii85(p, block->size << 2,
1132             obj->data + (block->size * i));
1133     }
1134 }
1135 
1136 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1137         struct drm_printer *p)
1138 {
1139     int ctx, index = 0;
1140 
1141     for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1142         int j;
1143 
1144         drm_printf(p, "    - context: %d\n", ctx);
1145 
1146         for (j = 0; j < size; j += 2) {
1147             u32 count = RANGE(registers, j);
1148             u32 offset = registers[j];
1149             int k;
1150 
1151             for (k = 0; k < count; index++, offset++, k++) {
1152                 if (data[index] == 0xdeafbead)
1153                     continue;
1154 
1155                 drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1156                     offset << 2, data[index]);
1157             }
1158         }
1159     }
1160 }
1161 
1162 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1163         struct drm_printer *p)
1164 {
1165     const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1166 
1167     if (dbgahb) {
1168         print_name(p, "  - cluster-name: ", dbgahb->name);
1169         a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1170             obj->data, p);
1171     }
1172 }
1173 
1174 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1175         struct drm_printer *p)
1176 {
1177     const struct a6xx_cluster *cluster = obj->handle;
1178 
1179     if (cluster) {
1180         print_name(p, "  - cluster-name: ", cluster->name);
1181         a6xx_show_cluster_data(cluster->registers, cluster->count,
1182             obj->data, p);
1183     }
1184 }
1185 
1186 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1187         struct drm_printer *p)
1188 {
1189     const struct a6xx_indexed_registers *indexed = obj->handle;
1190 
1191     if (!indexed)
1192         return;
1193 
1194     print_name(p, "  - regs-name: ", indexed->name);
1195     drm_printf(p, "    dwords: %d\n", indexed->count);
1196 
1197     print_ascii85(p, indexed->count << 2, obj->data);
1198 }
1199 
1200 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1201         u32 *data, struct drm_printer *p)
1202 {
1203     if (block) {
1204         print_name(p, "  - debugbus-block: ", block->name);
1205 
1206         /*
1207          * count for regular debugbus data is in quadwords,
1208          * but print the size in dwords for consistency
1209          */
1210         drm_printf(p, "    count: %d\n", block->count << 1);
1211 
1212         print_ascii85(p, block->count << 3, data);
1213     }
1214 }
1215 
1216 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1217         struct drm_printer *p)
1218 {
1219     int i;
1220 
1221     for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1222         struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1223 
1224         a6xx_show_debugbus_block(obj->handle, obj->data, p);
1225     }
1226 
1227     if (a6xx_state->vbif_debugbus) {
1228         struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1229 
1230         drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1231         drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1232 
1233         /* vbif debugbus data is in dwords.  Confusing, huh? */
1234         print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1235     }
1236 
1237     for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1238         struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1239 
1240         a6xx_show_debugbus_block(obj->handle, obj->data, p);
1241     }
1242 }
1243 
1244 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1245         struct drm_printer *p)
1246 {
1247     struct a6xx_gpu_state *a6xx_state = container_of(state,
1248             struct a6xx_gpu_state, base);
1249     int i;
1250 
1251     if (IS_ERR_OR_NULL(state))
1252         return;
1253 
1254     drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1255 
1256     adreno_show(gpu, state, p);
1257 
1258     drm_puts(p, "gmu-log:\n");
1259     if (a6xx_state->gmu_log) {
1260         struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1261 
1262         drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1263         drm_printf(p, "    size: %zu\n", gmu_log->size);
1264         adreno_show_object(p, &gmu_log->data, gmu_log->size,
1265                 &gmu_log->encoded);
1266     }
1267 
1268     drm_puts(p, "gmu-hfi:\n");
1269     if (a6xx_state->gmu_hfi) {
1270         struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1271         unsigned i, j;
1272 
1273         drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1274         drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1275         for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1276             drm_printf(p, "    queue-history[%u]:", i);
1277             for (j = 0; j < HFI_HISTORY_SZ; j++) {
1278                 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1279             }
1280             drm_printf(p, "\n");
1281         }
1282         adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1283                 &gmu_hfi->encoded);
1284     }
1285 
1286     drm_puts(p, "gmu-debug:\n");
1287     if (a6xx_state->gmu_debug) {
1288         struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1289 
1290         drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1291         drm_printf(p, "    size: %zu\n", gmu_debug->size);
1292         adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1293                 &gmu_debug->encoded);
1294     }
1295 
1296     drm_puts(p, "registers:\n");
1297     for (i = 0; i < a6xx_state->nr_registers; i++) {
1298         struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1299         const struct a6xx_registers *regs = obj->handle;
1300 
1301         if (!obj->handle)
1302             continue;
1303 
1304         a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1305     }
1306 
1307     drm_puts(p, "registers-gmu:\n");
1308     for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1309         struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1310         const struct a6xx_registers *regs = obj->handle;
1311 
1312         if (!obj->handle)
1313             continue;
1314 
1315         a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1316     }
1317 
1318     drm_puts(p, "indexed-registers:\n");
1319     for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1320         a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1321 
1322     drm_puts(p, "shader-blocks:\n");
1323     for (i = 0; i < a6xx_state->nr_shaders; i++)
1324         a6xx_show_shader(&a6xx_state->shaders[i], p);
1325 
1326     drm_puts(p, "clusters:\n");
1327     for (i = 0; i < a6xx_state->nr_clusters; i++)
1328         a6xx_show_cluster(&a6xx_state->clusters[i], p);
1329 
1330     for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1331         a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1332 
1333     drm_puts(p, "debugbus:\n");
1334     a6xx_show_debugbus(a6xx_state, p);
1335 }