msm/adreno/a6xx_gpu_state.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
0003
0004 #include <linux/ascii85.h>
0005 #include "msm_gem.h"
0006 #include "a6xx_gpu.h"
0007 #include "a6xx_gmu.h"
0008 #include "a6xx_gpu_state.h"
0009 #include "a6xx_gmu.xml.h"
0010
0011 struct a6xx_gpu_state_obj {
0012     const void *handle;
0013     u32 *data;
0014 };
0015
0016 struct a6xx_gpu_state {
0017     struct msm_gpu_state base;
0018
0019     struct a6xx_gpu_state_obj *gmu_registers;
0020     int nr_gmu_registers;
0021
0022     struct a6xx_gpu_state_obj *registers;
0023     int nr_registers;
0024
0025     struct a6xx_gpu_state_obj *shaders;
0026     int nr_shaders;
0027
0028     struct a6xx_gpu_state_obj *clusters;
0029     int nr_clusters;
0030
0031     struct a6xx_gpu_state_obj *dbgahb_clusters;
0032     int nr_dbgahb_clusters;
0033
0034     struct a6xx_gpu_state_obj *indexed_regs;
0035     int nr_indexed_regs;
0036
0037     struct a6xx_gpu_state_obj *debugbus;
0038     int nr_debugbus;
0039
0040     struct a6xx_gpu_state_obj *vbif_debugbus;
0041
0042     struct a6xx_gpu_state_obj *cx_debugbus;
0043     int nr_cx_debugbus;
0044
0045     struct msm_gpu_state_bo *gmu_log;
0046     struct msm_gpu_state_bo *gmu_hfi;
0047     struct msm_gpu_state_bo *gmu_debug;
0048
0049     s32 hfi_queue_history[2][HFI_HISTORY_SZ];
0050
0051     struct list_head objs;
0052
0053     bool gpu_initialized;
0054 };
0055
0056 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
0057 {
0058     in[0] = val;
0059     in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
0060
0061     return 2;
0062 }
0063
0064 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
0065 {
0066     in[0] = target;
0067     in[1] = (((u64) reg) << 44 | dwords);
0068
0069     return 2;
0070 }
0071
0072 static inline int CRASHDUMP_FINI(u64 *in)
0073 {
0074     in[0] = 0;
0075     in[1] = 0;
0076
0077     return 2;
0078 }
0079
0080 struct a6xx_crashdumper {
0081     void *ptr;
0082     struct drm_gem_object *bo;
0083     u64 iova;
0084 };
0085
0086 struct a6xx_state_memobj {
0087     struct list_head node;
0088     unsigned long long data[];
0089 };
0090
0091 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
0092 {
0093     struct a6xx_state_memobj *obj =
0094         kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
0095
0096     if (!obj)
0097         return NULL;
0098
0099     list_add_tail(&obj->node, &a6xx_state->objs);
0100     return &obj->data;
0101 }
0102
0103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
0104         size_t size)
0105 {
0106     void *dst = state_kcalloc(a6xx_state, 1, size);
0107
0108     if (dst)
0109         memcpy(dst, src, size);
0110     return dst;
0111 }
0112
0113 /*
0114  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
0115  * the rest for the data
0116  */
0117 #define A6XX_CD_DATA_OFFSET 8192
0118 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
0119
0120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
0121         struct a6xx_crashdumper *dumper)
0122 {
0123     dumper->ptr = msm_gem_kernel_new(gpu->dev,
0124         SZ_1M, MSM_BO_WC, gpu->aspace,
0125         &dumper->bo, &dumper->iova);
0126
0127     if (!IS_ERR(dumper->ptr))
0128         msm_gem_object_set_name(dumper->bo, "crashdump");
0129
0130     return PTR_ERR_OR_ZERO(dumper->ptr);
0131 }
0132
0133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
0134         struct a6xx_crashdumper *dumper)
0135 {
0136     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0137     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
0138     u32 val;
0139     int ret;
0140
0141     if (IS_ERR_OR_NULL(dumper->ptr))
0142         return -EINVAL;
0143
0144     if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
0145         return -EINVAL;
0146
0147     /* Make sure all pending memory writes are posted */
0148     wmb();
0149
0150     gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
0151         REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
0152
0153     gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
0154
0155     ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
0156         val & 0x02, 100, 10000);
0157
0158     gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
0159
0160     return ret;
0161 }
0162
0163 /* read a value from the GX debug bus */
0164 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
0165         u32 *data)
0166 {
0167     u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
0168         A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
0169
0170     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
0171     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
0172     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
0173     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
0174
0175     /* Wait 1 us to make sure the data is flowing */
0176     udelay(1);
0177
0178     data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
0179     data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
0180
0181     return 2;
0182 }
0183
0184 #define cxdbg_write(ptr, offset, val) \
0185     msm_writel((val), (ptr) + ((offset) << 2))
0186
0187 #define cxdbg_read(ptr, offset) \
0188     msm_readl((ptr) + ((offset) << 2))
0189
0190 /* read a value from the CX debug bus */
0191 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
0192         u32 *data)
0193 {
0194     u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
0195         A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
0196
0197     cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
0198     cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
0199     cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
0200     cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
0201
0202     /* Wait 1 us to make sure the data is flowing */
0203     udelay(1);
0204
0205     data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
0206     data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
0207
0208     return 2;
0209 }
0210
0211 /* Read a chunk of data from the VBIF debug bus */
0212 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
0213         u32 reg, int count, u32 *data)
0214 {
0215     int i;
0216
0217     gpu_write(gpu, ctrl0, reg);
0218
0219     for (i = 0; i < count; i++) {
0220         gpu_write(gpu, ctrl1, i);
0221         data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
0222     }
0223
0224     return count;
0225 }
0226
0227 #define AXI_ARB_BLOCKS 2
0228 #define XIN_AXI_BLOCKS 5
0229 #define XIN_CORE_BLOCKS 4
0230
0231 #define VBIF_DEBUGBUS_BLOCK_SIZE \
0232     ((16 * AXI_ARB_BLOCKS) + \
0233      (18 * XIN_AXI_BLOCKS) + \
0234      (12 * XIN_CORE_BLOCKS))
0235
0236 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
0237         struct a6xx_gpu_state *a6xx_state,
0238         struct a6xx_gpu_state_obj *obj)
0239 {
0240     u32 clk, *ptr;
0241     int i;
0242
0243     obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
0244         sizeof(u32));
0245     if (!obj->data)
0246         return;
0247
0248     obj->handle = NULL;
0249
0250     /* Get the current clock setting */
0251     clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
0252
0253     /* Force on the bus so we can read it */
0254     gpu_write(gpu, REG_A6XX_VBIF_CLKON,
0255         clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
0256
0257     /* We will read from BUS2 first, so disable BUS1 */
0258     gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
0259
0260     /* Enable the VBIF bus for reading */
0261     gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
0262
0263     ptr = obj->data;
0264
0265     for (i = 0; i < AXI_ARB_BLOCKS; i++)
0266         ptr += vbif_debugbus_read(gpu,
0267             REG_A6XX_VBIF_TEST_BUS2_CTRL0,
0268             REG_A6XX_VBIF_TEST_BUS2_CTRL1,
0269             1 << (i + 16), 16, ptr);
0270
0271     for (i = 0; i < XIN_AXI_BLOCKS; i++)
0272         ptr += vbif_debugbus_read(gpu,
0273             REG_A6XX_VBIF_TEST_BUS2_CTRL0,
0274             REG_A6XX_VBIF_TEST_BUS2_CTRL1,
0275             1 << i, 18, ptr);
0276
0277     /* Stop BUS2 so we can turn on BUS1 */
0278     gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
0279
0280     for (i = 0; i < XIN_CORE_BLOCKS; i++)
0281         ptr += vbif_debugbus_read(gpu,
0282             REG_A6XX_VBIF_TEST_BUS1_CTRL0,
0283             REG_A6XX_VBIF_TEST_BUS1_CTRL1,
0284             1 << i, 12, ptr);
0285
0286     /* Restore the VBIF clock setting */
0287     gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
0288 }
0289
0290 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
0291         struct a6xx_gpu_state *a6xx_state,
0292         const struct a6xx_debugbus_block *block,
0293         struct a6xx_gpu_state_obj *obj)
0294 {
0295     int i;
0296     u32 *ptr;
0297
0298     obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
0299     if (!obj->data)
0300         return;
0301
0302     obj->handle = block;
0303
0304     for (ptr = obj->data, i = 0; i < block->count; i++)
0305         ptr += debugbus_read(gpu, block->id, i, ptr);
0306 }
0307
0308 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
0309         struct a6xx_gpu_state *a6xx_state,
0310         const struct a6xx_debugbus_block *block,
0311         struct a6xx_gpu_state_obj *obj)
0312 {
0313     int i;
0314     u32 *ptr;
0315
0316     obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
0317     if (!obj->data)
0318         return;
0319
0320     obj->handle = block;
0321
0322     for (ptr = obj->data, i = 0; i < block->count; i++)
0323         ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
0324 }
0325
0326 static void a6xx_get_debugbus(struct msm_gpu *gpu,
0327         struct a6xx_gpu_state *a6xx_state)
0328 {
0329     struct resource *res;
0330     void __iomem *cxdbg = NULL;
0331     int nr_debugbus_blocks;
0332
0333     /* Set up the GX debug bus */
0334
0335     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
0336         A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
0337
0338     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
0339         A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
0340
0341     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
0342     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
0343     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
0344     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
0345
0346     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
0347     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
0348
0349     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
0350     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
0351     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
0352     gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
0353
0354     /* Set up the CX debug bus - it lives elsewhere in the system so do a
0355      * temporary ioremap for the registers
0356      */
0357     res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
0358             "cx_dbgc");
0359
0360     if (res)
0361         cxdbg = ioremap(res->start, resource_size(res));
0362
0363     if (cxdbg) {
0364         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
0365             A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
0366
0367         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
0368             A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
0369
0370         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
0371         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
0372         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
0373         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
0374
0375         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
0376             0x76543210);
0377         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
0378             0xFEDCBA98);
0379
0380         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
0381         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
0382         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
0383         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
0384     }
0385
0386     nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
0387         (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
0388
0389     a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
0390             sizeof(*a6xx_state->debugbus));
0391
0392     if (a6xx_state->debugbus) {
0393         int i;
0394
0395         for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
0396             a6xx_get_debugbus_block(gpu,
0397                 a6xx_state,
0398                 &a6xx_debugbus_blocks[i],
0399                 &a6xx_state->debugbus[i]);
0400
0401         a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
0402
0403         /*
0404          * GBIF has same debugbus as of other GPU blocks, fall back to
0405          * default path if GPU uses GBIF, also GBIF uses exactly same
0406          * ID as of VBIF.
0407          */
0408         if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
0409             a6xx_get_debugbus_block(gpu, a6xx_state,
0410                 &a6xx_gbif_debugbus_block,
0411                 &a6xx_state->debugbus[i]);
0412
0413             a6xx_state->nr_debugbus += 1;
0414         }
0415     }
0416
0417     /*  Dump the VBIF debugbus on applicable targets */
0418     if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
0419         a6xx_state->vbif_debugbus =
0420             state_kcalloc(a6xx_state, 1,
0421                     sizeof(*a6xx_state->vbif_debugbus));
0422
0423         if (a6xx_state->vbif_debugbus)
0424             a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
0425                     a6xx_state->vbif_debugbus);
0426     }
0427
0428     if (cxdbg) {
0429         a6xx_state->cx_debugbus =
0430             state_kcalloc(a6xx_state,
0431             ARRAY_SIZE(a6xx_cx_debugbus_blocks),
0432             sizeof(*a6xx_state->cx_debugbus));
0433
0434         if (a6xx_state->cx_debugbus) {
0435             int i;
0436
0437             for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
0438                 a6xx_get_cx_debugbus_block(cxdbg,
0439                     a6xx_state,
0440                     &a6xx_cx_debugbus_blocks[i],
0441                     &a6xx_state->cx_debugbus[i]);
0442
0443             a6xx_state->nr_cx_debugbus =
0444                 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
0445         }
0446
0447         iounmap(cxdbg);
0448     }
0449 }
0450
0451 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
0452
0453 /* Read a data cluster from behind the AHB aperture */
0454 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
0455         struct a6xx_gpu_state *a6xx_state,
0456         const struct a6xx_dbgahb_cluster *dbgahb,
0457         struct a6xx_gpu_state_obj *obj,
0458         struct a6xx_crashdumper *dumper)
0459 {
0460     u64 *in = dumper->ptr;
0461     u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
0462     size_t datasize;
0463     int i, regcount = 0;
0464
0465     for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
0466         int j;
0467
0468         in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
0469             (dbgahb->statetype + i * 2) << 8);
0470
0471         for (j = 0; j < dbgahb->count; j += 2) {
0472             int count = RANGE(dbgahb->registers, j);
0473             u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
0474                 dbgahb->registers[j] - (dbgahb->base >> 2);
0475
0476             in += CRASHDUMP_READ(in, offset, count, out);
0477
0478             out += count * sizeof(u32);
0479
0480             if (i == 0)
0481                 regcount += count;
0482         }
0483     }
0484
0485     CRASHDUMP_FINI(in);
0486
0487     datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
0488
0489     if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
0490         return;
0491
0492     if (a6xx_crashdumper_run(gpu, dumper))
0493         return;
0494
0495     obj->handle = dbgahb;
0496     obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
0497         datasize);
0498 }
0499
0500 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
0501         struct a6xx_gpu_state *a6xx_state,
0502         struct a6xx_crashdumper *dumper)
0503 {
0504     int i;
0505
0506     a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
0507         ARRAY_SIZE(a6xx_dbgahb_clusters),
0508         sizeof(*a6xx_state->dbgahb_clusters));
0509
0510     if (!a6xx_state->dbgahb_clusters)
0511         return;
0512
0513     a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
0514
0515     for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
0516         a6xx_get_dbgahb_cluster(gpu, a6xx_state,
0517             &a6xx_dbgahb_clusters[i],
0518             &a6xx_state->dbgahb_clusters[i], dumper);
0519 }
0520
0521 /* Read a data cluster from the CP aperture with the crashdumper */
0522 static void a6xx_get_cluster(struct msm_gpu *gpu,
0523         struct a6xx_gpu_state *a6xx_state,
0524         const struct a6xx_cluster *cluster,
0525         struct a6xx_gpu_state_obj *obj,
0526         struct a6xx_crashdumper *dumper)
0527 {
0528     u64 *in = dumper->ptr;
0529     u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
0530     size_t datasize;
0531     int i, regcount = 0;
0532
0533     /* Some clusters need a selector register to be programmed too */
0534     if (cluster->sel_reg)
0535         in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
0536
0537     for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
0538         int j;
0539
0540         in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
0541             (cluster->id << 8) | (i << 4) | i);
0542
0543         for (j = 0; j < cluster->count; j += 2) {
0544             int count = RANGE(cluster->registers, j);
0545
0546             in += CRASHDUMP_READ(in, cluster->registers[j],
0547                 count, out);
0548
0549             out += count * sizeof(u32);
0550
0551             if (i == 0)
0552                 regcount += count;
0553         }
0554     }
0555
0556     CRASHDUMP_FINI(in);
0557
0558     datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
0559
0560     if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
0561         return;
0562
0563     if (a6xx_crashdumper_run(gpu, dumper))
0564         return;
0565
0566     obj->handle = cluster;
0567     obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
0568         datasize);
0569 }
0570
0571 static void a6xx_get_clusters(struct msm_gpu *gpu,
0572         struct a6xx_gpu_state *a6xx_state,
0573         struct a6xx_crashdumper *dumper)
0574 {
0575     int i;
0576
0577     a6xx_state->clusters = state_kcalloc(a6xx_state,
0578         ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
0579
0580     if (!a6xx_state->clusters)
0581         return;
0582
0583     a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
0584
0585     for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
0586         a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
0587             &a6xx_state->clusters[i], dumper);
0588 }
0589
0590 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
0591 static void a6xx_get_shader_block(struct msm_gpu *gpu,
0592         struct a6xx_gpu_state *a6xx_state,
0593         const struct a6xx_shader_block *block,
0594         struct a6xx_gpu_state_obj *obj,
0595         struct a6xx_crashdumper *dumper)
0596 {
0597     u64 *in = dumper->ptr;
0598     size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
0599     int i;
0600
0601     if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
0602         return;
0603
0604     for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
0605         in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
0606             (block->type << 8) | i);
0607
0608         in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
0609             block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
0610     }
0611
0612     CRASHDUMP_FINI(in);
0613
0614     if (a6xx_crashdumper_run(gpu, dumper))
0615         return;
0616
0617     obj->handle = block;
0618     obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
0619         datasize);
0620 }
0621
0622 static void a6xx_get_shaders(struct msm_gpu *gpu,
0623         struct a6xx_gpu_state *a6xx_state,
0624         struct a6xx_crashdumper *dumper)
0625 {
0626     int i;
0627
0628     a6xx_state->shaders = state_kcalloc(a6xx_state,
0629         ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
0630
0631     if (!a6xx_state->shaders)
0632         return;
0633
0634     a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
0635
0636     for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
0637         a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
0638             &a6xx_state->shaders[i], dumper);
0639 }
0640
0641 /* Read registers from behind the HLSQ aperture with the crashdumper */
0642 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
0643         struct a6xx_gpu_state *a6xx_state,
0644         const struct a6xx_registers *regs,
0645         struct a6xx_gpu_state_obj *obj,
0646         struct a6xx_crashdumper *dumper)
0647
0648 {
0649     u64 *in = dumper->ptr;
0650     u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
0651     int i, regcount = 0;
0652
0653     in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
0654
0655     for (i = 0; i < regs->count; i += 2) {
0656         u32 count = RANGE(regs->registers, i);
0657         u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
0658             regs->registers[i] - (regs->val0 >> 2);
0659
0660         in += CRASHDUMP_READ(in, offset, count, out);
0661
0662         out += count * sizeof(u32);
0663         regcount += count;
0664     }
0665
0666     CRASHDUMP_FINI(in);
0667
0668     if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
0669         return;
0670
0671     if (a6xx_crashdumper_run(gpu, dumper))
0672         return;
0673
0674     obj->handle = regs;
0675     obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
0676         regcount * sizeof(u32));
0677 }
0678
0679 /* Read a block of registers using the crashdumper */
0680 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
0681         struct a6xx_gpu_state *a6xx_state,
0682         const struct a6xx_registers *regs,
0683         struct a6xx_gpu_state_obj *obj,
0684         struct a6xx_crashdumper *dumper)
0685
0686 {
0687     u64 *in = dumper->ptr;
0688     u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
0689     int i, regcount = 0;
0690
0691     /* Some blocks might need to program a selector register first */
0692     if (regs->val0)
0693         in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
0694
0695     for (i = 0; i < regs->count; i += 2) {
0696         u32 count = RANGE(regs->registers, i);
0697
0698         in += CRASHDUMP_READ(in, regs->registers[i], count, out);
0699
0700         out += count * sizeof(u32);
0701         regcount += count;
0702     }
0703
0704     CRASHDUMP_FINI(in);
0705
0706     if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
0707         return;
0708
0709     if (a6xx_crashdumper_run(gpu, dumper))
0710         return;
0711
0712     obj->handle = regs;
0713     obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
0714         regcount * sizeof(u32));
0715 }
0716
0717 /* Read a block of registers via AHB */
0718 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
0719         struct a6xx_gpu_state *a6xx_state,
0720         const struct a6xx_registers *regs,
0721         struct a6xx_gpu_state_obj *obj)
0722 {
0723     int i, regcount = 0, index = 0;
0724
0725     for (i = 0; i < regs->count; i += 2)
0726         regcount += RANGE(regs->registers, i);
0727
0728     obj->handle = (const void *) regs;
0729     obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
0730     if (!obj->data)
0731         return;
0732
0733     for (i = 0; i < regs->count; i += 2) {
0734         u32 count = RANGE(regs->registers, i);
0735         int j;
0736
0737         for (j = 0; j < count; j++)
0738             obj->data[index++] = gpu_read(gpu,
0739                 regs->registers[i] + j);
0740     }
0741 }
0742
0743 /* Read a block of GMU registers */
0744 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
0745         struct a6xx_gpu_state *a6xx_state,
0746         const struct a6xx_registers *regs,
0747         struct a6xx_gpu_state_obj *obj,
0748         bool rscc)
0749 {
0750     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0751     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
0752     struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
0753     int i, regcount = 0, index = 0;
0754
0755     for (i = 0; i < regs->count; i += 2)
0756         regcount += RANGE(regs->registers, i);
0757
0758     obj->handle = (const void *) regs;
0759     obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
0760     if (!obj->data)
0761         return;
0762
0763     for (i = 0; i < regs->count; i += 2) {
0764         u32 count = RANGE(regs->registers, i);
0765         int j;
0766
0767         for (j = 0; j < count; j++) {
0768             u32 offset = regs->registers[i] + j;
0769             u32 val;
0770
0771             if (rscc)
0772                 val = gmu_read_rscc(gmu, offset);
0773             else
0774                 val = gmu_read(gmu, offset);
0775
0776             obj->data[index++] = val;
0777         }
0778     }
0779 }
0780
0781 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
0782         struct a6xx_gpu_state *a6xx_state)
0783 {
0784     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0785     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
0786
0787     a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
0788         3, sizeof(*a6xx_state->gmu_registers));
0789
0790     if (!a6xx_state->gmu_registers)
0791         return;
0792
0793     a6xx_state->nr_gmu_registers = 3;
0794
0795     /* Get the CX GMU registers from AHB */
0796     _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
0797         &a6xx_state->gmu_registers[0], false);
0798     _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
0799         &a6xx_state->gmu_registers[1], true);
0800
0801     if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
0802         return;
0803
0804     /* Set the fence to ALLOW mode so we can access the registers */
0805     gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
0806
0807     _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
0808         &a6xx_state->gmu_registers[2], false);
0809 }
0810
0811 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
0812         struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
0813 {
0814     struct msm_gpu_state_bo *snapshot;
0815
0816     snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
0817     if (!snapshot)
0818         return NULL;
0819
0820     snapshot->iova = bo->iova;
0821     snapshot->size = bo->size;
0822     snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
0823     if (!snapshot->data)
0824         return NULL;
0825
0826     memcpy(snapshot->data, bo->virt, bo->size);
0827
0828     return snapshot;
0829 }
0830
0831 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
0832                       struct a6xx_gpu_state *a6xx_state)
0833 {
0834     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0835     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
0836     struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
0837     unsigned i, j;
0838
0839     BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
0840
0841     for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
0842         struct a6xx_hfi_queue *queue = &gmu->queues[i];
0843         for (j = 0; j < HFI_HISTORY_SZ; j++) {
0844             unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
0845             a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
0846         }
0847     }
0848 }
0849
0850 #define A6XX_GBIF_REGLIST_SIZE   1
0851 static void a6xx_get_registers(struct msm_gpu *gpu,
0852         struct a6xx_gpu_state *a6xx_state,
0853         struct a6xx_crashdumper *dumper)
0854 {
0855     int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
0856         ARRAY_SIZE(a6xx_reglist) +
0857         ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
0858     int index = 0;
0859     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0860
0861     a6xx_state->registers = state_kcalloc(a6xx_state,
0862         count, sizeof(*a6xx_state->registers));
0863
0864     if (!a6xx_state->registers)
0865         return;
0866
0867     a6xx_state->nr_registers = count;
0868
0869     for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
0870         a6xx_get_ahb_gpu_registers(gpu,
0871             a6xx_state, &a6xx_ahb_reglist[i],
0872             &a6xx_state->registers[index++]);
0873
0874     if (a6xx_has_gbif(adreno_gpu))
0875         a6xx_get_ahb_gpu_registers(gpu,
0876                 a6xx_state, &a6xx_gbif_reglist,
0877                 &a6xx_state->registers[index++]);
0878     else
0879         a6xx_get_ahb_gpu_registers(gpu,
0880                 a6xx_state, &a6xx_vbif_reglist,
0881                 &a6xx_state->registers[index++]);
0882     if (!dumper) {
0883         /*
0884          * We can't use the crashdumper when the SMMU is stalled,
0885          * because the GPU has no memory access until we resume
0886          * translation (but we don't want to do that until after
0887          * we have captured as much useful GPU state as possible).
0888          * So instead collect registers via the CPU:
0889          */
0890         for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
0891             a6xx_get_ahb_gpu_registers(gpu,
0892                 a6xx_state, &a6xx_reglist[i],
0893                 &a6xx_state->registers[index++]);
0894         return;
0895     }
0896
0897     for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
0898         a6xx_get_crashdumper_registers(gpu,
0899             a6xx_state, &a6xx_reglist[i],
0900             &a6xx_state->registers[index++],
0901             dumper);
0902
0903     for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
0904         a6xx_get_crashdumper_hlsq_registers(gpu,
0905             a6xx_state, &a6xx_hlsq_reglist[i],
0906             &a6xx_state->registers[index++],
0907             dumper);
0908 }
0909
0910 /* Read a block of data from an indexed register pair */
0911 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
0912         struct a6xx_gpu_state *a6xx_state,
0913         const struct a6xx_indexed_registers *indexed,
0914         struct a6xx_gpu_state_obj *obj)
0915 {
0916     int i;
0917
0918     obj->handle = (const void *) indexed;
0919     obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
0920     if (!obj->data)
0921         return;
0922
0923     /* All the indexed banks start at address 0 */
0924     gpu_write(gpu, indexed->addr, 0);
0925
0926     /* Read the data - each read increments the internal address by 1 */
0927     for (i = 0; i < indexed->count; i++)
0928         obj->data[i] = gpu_read(gpu, indexed->data);
0929 }
0930
0931 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
0932         struct a6xx_gpu_state *a6xx_state)
0933 {
0934     u32 mempool_size;
0935     int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
0936     int i;
0937
0938     a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
0939         sizeof(*a6xx_state->indexed_regs));
0940     if (!a6xx_state->indexed_regs)
0941         return;
0942
0943     for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
0944         a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
0945             &a6xx_state->indexed_regs[i]);
0946
0947     /* Set the CP mempool size to 0 to stabilize it while dumping */
0948     mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
0949     gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
0950
0951     /* Get the contents of the CP mempool */
0952     a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
0953         &a6xx_state->indexed_regs[i]);
0954
0955     /*
0956      * Offset 0x2000 in the mempool is the size - copy the saved size over
0957      * so the data is consistent
0958      */
0959     a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
0960
0961     /* Restore the size in the hardware */
0962     gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
0963
0964     a6xx_state->nr_indexed_regs = count;
0965 }
0966
0967 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
0968 {
0969     struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
0970     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0971     struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
0972     struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
0973         GFP_KERNEL);
0974     bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
0975             A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
0976
0977     if (!a6xx_state)
0978         return ERR_PTR(-ENOMEM);
0979
0980     INIT_LIST_HEAD(&a6xx_state->objs);
0981
0982     /* Get the generic state from the adreno core */
0983     adreno_gpu_state_get(gpu, &a6xx_state->base);
0984
0985     a6xx_get_gmu_registers(gpu, a6xx_state);
0986
0987     a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
0988     a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
0989     a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
0990
0991     a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
0992
0993     /* If GX isn't on the rest of the data isn't going to be accessible */
0994     if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
0995         return &a6xx_state->base;
0996
0997     /* Get the banks of indexed registers */
0998     a6xx_get_indexed_registers(gpu, a6xx_state);
0999
1000     /*
1001      * Try to initialize the crashdumper, if we are not dumping state
1002      * with the SMMU stalled.  The crashdumper needs memory access to
1003      * write out GPU state, so we need to skip this when the SMMU is
1004      * stalled in response to an iova fault
1005      */
1006     if (!stalled && !gpu->needs_hw_init &&
1007         !a6xx_crashdumper_init(gpu, &_dumper)) {
1008         dumper = &_dumper;
1009     }
1010
1011     a6xx_get_registers(gpu, a6xx_state, dumper);
1012
1013     if (dumper) {
1014         a6xx_get_shaders(gpu, a6xx_state, dumper);
1015         a6xx_get_clusters(gpu, a6xx_state, dumper);
1016         a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1017
1018         msm_gem_kernel_put(dumper->bo, gpu->aspace);
1019     }
1020
1021     if (snapshot_debugbus)
1022         a6xx_get_debugbus(gpu, a6xx_state);
1023
1024     a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1025
1026     return  &a6xx_state->base;
1027 }
1028
1029 static void a6xx_gpu_state_destroy(struct kref *kref)
1030 {
1031     struct a6xx_state_memobj *obj, *tmp;
1032     struct msm_gpu_state *state = container_of(kref,
1033             struct msm_gpu_state, ref);
1034     struct a6xx_gpu_state *a6xx_state = container_of(state,
1035             struct a6xx_gpu_state, base);
1036
1037     if (a6xx_state->gmu_log)
1038         kvfree(a6xx_state->gmu_log->data);
1039
1040     if (a6xx_state->gmu_hfi)
1041         kvfree(a6xx_state->gmu_hfi->data);
1042
1043     list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
1044         kfree(obj);
1045
1046     adreno_gpu_state_destroy(state);
1047     kfree(a6xx_state);
1048 }
1049
1050 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1051 {
1052     if (IS_ERR_OR_NULL(state))
1053         return 1;
1054
1055     return kref_put(&state->ref, a6xx_gpu_state_destroy);
1056 }
1057
1058 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1059         struct drm_printer *p)
1060 {
1061     int i, index = 0;
1062
1063     if (!data)
1064         return;
1065
1066     for (i = 0; i < count; i += 2) {
1067         u32 count = RANGE(registers, i);
1068         u32 offset = registers[i];
1069         int j;
1070
1071         for (j = 0; j < count; index++, offset++, j++) {
1072             if (data[index] == 0xdeafbead)
1073                 continue;
1074
1075             drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1076                 offset << 2, data[index]);
1077         }
1078     }
1079 }
1080
1081 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1082 {
1083     char out[ASCII85_BUFSZ];
1084     long i, l, datalen = 0;
1085
1086     for (i = 0; i < len >> 2; i++) {
1087         if (data[i])
1088             datalen = (i + 1) << 2;
1089     }
1090
1091     if (datalen == 0)
1092         return;
1093
1094     drm_puts(p, "    data: !!ascii85 |\n");
1095     drm_puts(p, "      ");
1096
1097
1098     l = ascii85_encode_len(datalen);
1099
1100     for (i = 0; i < l; i++)
1101         drm_puts(p, ascii85_encode(data[i], out));
1102
1103     drm_puts(p, "\n");
1104 }
1105
1106 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1107 {
1108     drm_puts(p, fmt);
1109     drm_puts(p, name);
1110     drm_puts(p, "\n");
1111 }
1112
1113 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1114         struct drm_printer *p)
1115 {
1116     const struct a6xx_shader_block *block = obj->handle;
1117     int i;
1118
1119     if (!obj->handle)
1120         return;
1121
1122     print_name(p, "  - type: ", block->name);
1123
1124     for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1125         drm_printf(p, "    - bank: %d\n", i);
1126         drm_printf(p, "      size: %d\n", block->size);
1127
1128         if (!obj->data)
1129             continue;
1130
1131         print_ascii85(p, block->size << 2,
1132             obj->data + (block->size * i));
1133     }
1134 }
1135
1136 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1137         struct drm_printer *p)
1138 {
1139     int ctx, index = 0;
1140
1141     for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1142         int j;
1143
1144         drm_printf(p, "    - context: %d\n", ctx);
1145
1146         for (j = 0; j < size; j += 2) {
1147             u32 count = RANGE(registers, j);
1148             u32 offset = registers[j];
1149             int k;
1150
1151             for (k = 0; k < count; index++, offset++, k++) {
1152                 if (data[index] == 0xdeafbead)
1153                     continue;
1154
1155                 drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1156                     offset << 2, data[index]);
1157             }
1158         }
1159     }
1160 }
1161
1162 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1163         struct drm_printer *p)
1164 {
1165     const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1166
1167     if (dbgahb) {
1168         print_name(p, "  - cluster-name: ", dbgahb->name);
1169         a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1170             obj->data, p);
1171     }
1172 }
1173
1174 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1175         struct drm_printer *p)
1176 {
1177     const struct a6xx_cluster *cluster = obj->handle;
1178
1179     if (cluster) {
1180         print_name(p, "  - cluster-name: ", cluster->name);
1181         a6xx_show_cluster_data(cluster->registers, cluster->count,
1182             obj->data, p);
1183     }
1184 }
1185
1186 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1187         struct drm_printer *p)
1188 {
1189     const struct a6xx_indexed_registers *indexed = obj->handle;
1190
1191     if (!indexed)
1192         return;
1193
1194     print_name(p, "  - regs-name: ", indexed->name);
1195     drm_printf(p, "    dwords: %d\n", indexed->count);
1196
1197     print_ascii85(p, indexed->count << 2, obj->data);
1198 }
1199
1200 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1201         u32 *data, struct drm_printer *p)
1202 {
1203     if (block) {
1204         print_name(p, "  - debugbus-block: ", block->name);
1205
1206         /*
1207          * count for regular debugbus data is in quadwords,
1208          * but print the size in dwords for consistency
1209          */
1210         drm_printf(p, "    count: %d\n", block->count << 1);
1211
1212         print_ascii85(p, block->count << 3, data);
1213     }
1214 }
1215
1216 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1217         struct drm_printer *p)
1218 {
1219     int i;
1220
1221     for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1222         struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1223
1224         a6xx_show_debugbus_block(obj->handle, obj->data, p);
1225     }
1226
1227     if (a6xx_state->vbif_debugbus) {
1228         struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1229
1230         drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1231         drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1232
1233         /* vbif debugbus data is in dwords.  Confusing, huh? */
1234         print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1235     }
1236
1237     for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1238         struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1239
1240         a6xx_show_debugbus_block(obj->handle, obj->data, p);
1241     }
1242 }
1243
1244 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1245         struct drm_printer *p)
1246 {
1247     struct a6xx_gpu_state *a6xx_state = container_of(state,
1248             struct a6xx_gpu_state, base);
1249     int i;
1250
1251     if (IS_ERR_OR_NULL(state))
1252         return;
1253
1254     drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1255
1256     adreno_show(gpu, state, p);
1257
1258     drm_puts(p, "gmu-log:\n");
1259     if (a6xx_state->gmu_log) {
1260         struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1261
1262         drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1263         drm_printf(p, "    size: %zu\n", gmu_log->size);
1264         adreno_show_object(p, &gmu_log->data, gmu_log->size,
1265                 &gmu_log->encoded);
1266     }
1267
1268     drm_puts(p, "gmu-hfi:\n");
1269     if (a6xx_state->gmu_hfi) {
1270         struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1271         unsigned i, j;
1272
1273         drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1274         drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1275         for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1276             drm_printf(p, "    queue-history[%u]:", i);
1277             for (j = 0; j < HFI_HISTORY_SZ; j++) {
1278                 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1279             }
1280             drm_printf(p, "\n");
1281         }
1282         adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1283                 &gmu_hfi->encoded);
1284     }
1285
1286     drm_puts(p, "gmu-debug:\n");
1287     if (a6xx_state->gmu_debug) {
1288         struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1289
1290         drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1291         drm_printf(p, "    size: %zu\n", gmu_debug->size);
1292         adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1293                 &gmu_debug->encoded);
1294     }
1295
1296     drm_puts(p, "registers:\n");
1297     for (i = 0; i < a6xx_state->nr_registers; i++) {
1298         struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1299         const struct a6xx_registers *regs = obj->handle;
1300
1301         if (!obj->handle)
1302             continue;
1303
1304         a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1305     }
1306
1307     drm_puts(p, "registers-gmu:\n");
1308     for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1309         struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1310         const struct a6xx_registers *regs = obj->handle;
1311
1312         if (!obj->handle)
1313             continue;
1314
1315         a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1316     }
1317
1318     drm_puts(p, "indexed-registers:\n");
1319     for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1320         a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1321
1322     drm_puts(p, "shader-blocks:\n");
1323     for (i = 0; i < a6xx_state->nr_shaders; i++)
1324         a6xx_show_shader(&a6xx_state->shaders[i], p);
1325
1326     drm_puts(p, "clusters:\n");
1327     for (i = 0; i < a6xx_state->nr_clusters; i++)
1328         a6xx_show_cluster(&a6xx_state->clusters[i], p);
1329
1330     for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1331         a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1332
1333     drm_puts(p, "debugbus:\n");
1334     a6xx_show_debugbus(a6xx_state, p);
1335 }