Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2013 Red Hat
0004  * Author: Rob Clark <robdclark@gmail.com>
0005  *
0006  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
0007  */
0008 
0009 #include "a3xx_gpu.h"
0010 
0011 #define A3XX_INT0_MASK \
0012     (A3XX_INT0_RBBM_AHB_ERROR |        \
0013      A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
0014      A3XX_INT0_CP_T0_PACKET_IN_IB |    \
0015      A3XX_INT0_CP_OPCODE_ERROR |       \
0016      A3XX_INT0_CP_RESERVED_BIT_ERROR | \
0017      A3XX_INT0_CP_HW_FAULT |           \
0018      A3XX_INT0_CP_IB1_INT |            \
0019      A3XX_INT0_CP_IB2_INT |            \
0020      A3XX_INT0_CP_RB_INT |             \
0021      A3XX_INT0_CP_REG_PROTECT_FAULT |  \
0022      A3XX_INT0_CP_AHB_ERROR_HALT |     \
0023      A3XX_INT0_CACHE_FLUSH_TS |        \
0024      A3XX_INT0_UCHE_OOB_ACCESS)
0025 
0026 extern bool hang_debug;
0027 
0028 static void a3xx_dump(struct msm_gpu *gpu);
0029 static bool a3xx_idle(struct msm_gpu *gpu);
0030 
0031 static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
0032 {
0033     struct msm_ringbuffer *ring = submit->ring;
0034     unsigned int i;
0035 
0036     for (i = 0; i < submit->nr_cmds; i++) {
0037         switch (submit->cmd[i].type) {
0038         case MSM_SUBMIT_CMD_IB_TARGET_BUF:
0039             /* ignore IB-targets */
0040             break;
0041         case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
0042             /* ignore if there has not been a ctx switch: */
0043             if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
0044                 break;
0045             fallthrough;
0046         case MSM_SUBMIT_CMD_BUF:
0047             OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
0048             OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
0049             OUT_RING(ring, submit->cmd[i].size);
0050             OUT_PKT2(ring);
0051             break;
0052         }
0053     }
0054 
0055     OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
0056     OUT_RING(ring, submit->seqno);
0057 
0058     /* Flush HLSQ lazy updates to make sure there is nothing
0059      * pending for indirect loads after the timestamp has
0060      * passed:
0061      */
0062     OUT_PKT3(ring, CP_EVENT_WRITE, 1);
0063     OUT_RING(ring, HLSQ_FLUSH);
0064 
0065     /* wait for idle before cache flush/interrupt */
0066     OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
0067     OUT_RING(ring, 0x00000000);
0068 
0069     /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
0070     OUT_PKT3(ring, CP_EVENT_WRITE, 3);
0071     OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
0072     OUT_RING(ring, rbmemptr(ring, fence));
0073     OUT_RING(ring, submit->seqno);
0074 
0075 #if 0
0076     /* Dummy set-constant to trigger context rollover */
0077     OUT_PKT3(ring, CP_SET_CONSTANT, 2);
0078     OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
0079     OUT_RING(ring, 0x00000000);
0080 #endif
0081 
0082     adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
0083 }
0084 
0085 static bool a3xx_me_init(struct msm_gpu *gpu)
0086 {
0087     struct msm_ringbuffer *ring = gpu->rb[0];
0088 
0089     OUT_PKT3(ring, CP_ME_INIT, 17);
0090     OUT_RING(ring, 0x000003f7);
0091     OUT_RING(ring, 0x00000000);
0092     OUT_RING(ring, 0x00000000);
0093     OUT_RING(ring, 0x00000000);
0094     OUT_RING(ring, 0x00000080);
0095     OUT_RING(ring, 0x00000100);
0096     OUT_RING(ring, 0x00000180);
0097     OUT_RING(ring, 0x00006600);
0098     OUT_RING(ring, 0x00000150);
0099     OUT_RING(ring, 0x0000014e);
0100     OUT_RING(ring, 0x00000154);
0101     OUT_RING(ring, 0x00000001);
0102     OUT_RING(ring, 0x00000000);
0103     OUT_RING(ring, 0x00000000);
0104     OUT_RING(ring, 0x00000000);
0105     OUT_RING(ring, 0x00000000);
0106     OUT_RING(ring, 0x00000000);
0107 
0108     adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
0109     return a3xx_idle(gpu);
0110 }
0111 
0112 static int a3xx_hw_init(struct msm_gpu *gpu)
0113 {
0114     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0115     struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
0116     uint32_t *ptr, len;
0117     int i, ret;
0118 
0119     DBG("%s", gpu->name);
0120 
0121     if (adreno_is_a305(adreno_gpu)) {
0122         /* Set up 16 deep read/write request queues: */
0123         gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
0124         gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
0125         gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
0126         gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
0127         gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
0128         gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
0129         gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
0130         /* Enable WR-REQ: */
0131         gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
0132         /* Set up round robin arbitration between both AXI ports: */
0133         gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
0134         /* Set up AOOO: */
0135         gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
0136         gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
0137     } else if (adreno_is_a306(adreno_gpu)) {
0138         gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
0139         gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
0140         gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
0141     } else if (adreno_is_a320(adreno_gpu)) {
0142         /* Set up 16 deep read/write request queues: */
0143         gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
0144         gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
0145         gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
0146         gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
0147         gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
0148         gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
0149         gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
0150         /* Enable WR-REQ: */
0151         gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
0152         /* Set up round robin arbitration between both AXI ports: */
0153         gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
0154         /* Set up AOOO: */
0155         gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
0156         gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
0157         /* Enable 1K sort: */
0158         gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
0159         gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
0160 
0161     } else if (adreno_is_a330v2(adreno_gpu)) {
0162         /*
0163          * Most of the VBIF registers on 8974v2 have the correct
0164          * values at power on, so we won't modify those if we don't
0165          * need to
0166          */
0167         /* Enable 1k sort: */
0168         gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
0169         gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
0170         /* Enable WR-REQ: */
0171         gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
0172         gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
0173         /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
0174         gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
0175 
0176     } else if (adreno_is_a330(adreno_gpu)) {
0177         /* Set up 16 deep read/write request queues: */
0178         gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
0179         gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
0180         gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
0181         gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
0182         gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
0183         gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
0184         gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
0185         /* Enable WR-REQ: */
0186         gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
0187         /* Set up round robin arbitration between both AXI ports: */
0188         gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
0189         /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
0190         gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
0191         /* Set up AOOO: */
0192         gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
0193         gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
0194         /* Enable 1K sort: */
0195         gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
0196         gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
0197         /* Disable VBIF clock gating. This is to enable AXI running
0198          * higher frequency than GPU:
0199          */
0200         gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
0201 
0202     } else {
0203         BUG();
0204     }
0205 
0206     /* Make all blocks contribute to the GPU BUSY perf counter: */
0207     gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
0208 
0209     /* Tune the hystersis counters for SP and CP idle detection: */
0210     gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
0211     gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
0212 
0213     /* Enable the RBBM error reporting bits.  This lets us get
0214      * useful information on failure:
0215      */
0216     gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
0217 
0218     /* Enable AHB error reporting: */
0219     gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
0220 
0221     /* Turn on the power counters: */
0222     gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
0223 
0224     /* Turn on hang detection - this spews a lot of useful information
0225      * into the RBBM registers on a hang:
0226      */
0227     gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
0228 
0229     /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
0230     gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
0231 
0232     /* Enable Clock gating: */
0233     if (adreno_is_a306(adreno_gpu))
0234         gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
0235     else if (adreno_is_a320(adreno_gpu))
0236         gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
0237     else if (adreno_is_a330v2(adreno_gpu))
0238         gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
0239     else if (adreno_is_a330(adreno_gpu))
0240         gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
0241 
0242     if (adreno_is_a330v2(adreno_gpu))
0243         gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
0244     else if (adreno_is_a330(adreno_gpu))
0245         gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
0246 
0247     /* Set the OCMEM base address for A330, etc */
0248     if (a3xx_gpu->ocmem.hdl) {
0249         gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
0250             (unsigned int)(a3xx_gpu->ocmem.base >> 14));
0251     }
0252 
0253     /* Turn on performance counters: */
0254     gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
0255 
0256     /* Enable the perfcntrs that we use.. */
0257     for (i = 0; i < gpu->num_perfcntrs; i++) {
0258         const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
0259         gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
0260     }
0261 
0262     gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
0263 
0264     ret = adreno_hw_init(gpu);
0265     if (ret)
0266         return ret;
0267 
0268     /*
0269      * Use the default ringbuffer size and block size but disable the RPTR
0270      * shadow
0271      */
0272     gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
0273         MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
0274 
0275     /* Set the ringbuffer address */
0276     gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
0277 
0278     /* setup access protection: */
0279     gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
0280 
0281     /* RBBM registers */
0282     gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
0283     gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
0284     gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
0285     gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
0286     gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
0287     gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
0288 
0289     /* CP registers */
0290     gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
0291     gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
0292     gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
0293     gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
0294     gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
0295 
0296     /* RB registers */
0297     gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
0298 
0299     /* VBIF registers */
0300     gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
0301 
0302     /* NOTE: PM4/micro-engine firmware registers look to be the same
0303      * for a2xx and a3xx.. we could possibly push that part down to
0304      * adreno_gpu base class.  Or push both PM4 and PFP but
0305      * parameterize the pfp ucode addr/data registers..
0306      */
0307 
0308     /* Load PM4: */
0309     ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
0310     len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
0311     DBG("loading PM4 ucode version: %x", ptr[1]);
0312 
0313     gpu_write(gpu, REG_AXXX_CP_DEBUG,
0314             AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
0315             AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
0316     gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
0317     for (i = 1; i < len; i++)
0318         gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
0319 
0320     /* Load PFP: */
0321     ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
0322     len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
0323     DBG("loading PFP ucode version: %x", ptr[5]);
0324 
0325     gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
0326     for (i = 1; i < len; i++)
0327         gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
0328 
0329     /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
0330     if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
0331             adreno_is_a320(adreno_gpu)) {
0332         gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
0333                 AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
0334                 AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
0335                 AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
0336     } else if (adreno_is_a330(adreno_gpu)) {
0337         /* NOTE: this (value take from downstream android driver)
0338          * includes some bits outside of the known bitfields.  But
0339          * A330 has this "MERCIU queue" thing too, which might
0340          * explain a new bitfield or reshuffling:
0341          */
0342         gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
0343     }
0344 
0345     /* clear ME_HALT to start micro engine */
0346     gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
0347 
0348     return a3xx_me_init(gpu) ? 0 : -EINVAL;
0349 }
0350 
0351 static void a3xx_recover(struct msm_gpu *gpu)
0352 {
0353     int i;
0354 
0355     adreno_dump_info(gpu);
0356 
0357     for (i = 0; i < 8; i++) {
0358         printk("CP_SCRATCH_REG%d: %u\n", i,
0359             gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
0360     }
0361 
0362     /* dump registers before resetting gpu, if enabled: */
0363     if (hang_debug)
0364         a3xx_dump(gpu);
0365 
0366     gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
0367     gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
0368     gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
0369     adreno_recover(gpu);
0370 }
0371 
0372 static void a3xx_destroy(struct msm_gpu *gpu)
0373 {
0374     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0375     struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
0376 
0377     DBG("%s", gpu->name);
0378 
0379     adreno_gpu_cleanup(adreno_gpu);
0380 
0381     adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
0382 
0383     kfree(a3xx_gpu);
0384 }
0385 
0386 static bool a3xx_idle(struct msm_gpu *gpu)
0387 {
0388     /* wait for ringbuffer to drain: */
0389     if (!adreno_idle(gpu, gpu->rb[0]))
0390         return false;
0391 
0392     /* then wait for GPU to finish: */
0393     if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
0394             A3XX_RBBM_STATUS_GPU_BUSY))) {
0395         DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
0396 
0397         /* TODO maybe we need to reset GPU here to recover from hang? */
0398         return false;
0399     }
0400 
0401     return true;
0402 }
0403 
0404 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
0405 {
0406     uint32_t status;
0407 
0408     status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
0409     DBG("%s: %08x", gpu->name, status);
0410 
0411     // TODO
0412 
0413     gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
0414 
0415     msm_gpu_retire(gpu);
0416 
0417     return IRQ_HANDLED;
0418 }
0419 
0420 static const unsigned int a3xx_registers[] = {
0421     0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
0422     0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
0423     0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
0424     0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
0425     0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
0426     0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
0427     0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
0428     0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
0429     0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
0430     0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
0431     0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
0432     0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
0433     0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
0434     0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
0435     0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
0436     0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
0437     0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
0438     0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
0439     0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
0440     0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
0441     0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
0442     0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
0443     0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
0444     0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
0445     0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
0446     0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
0447     0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
0448     0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
0449     0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
0450     0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
0451     0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
0452     0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
0453     0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
0454     0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
0455     ~0   /* sentinel */
0456 };
0457 
0458 /* would be nice to not have to duplicate the _show() stuff with printk(): */
0459 static void a3xx_dump(struct msm_gpu *gpu)
0460 {
0461     printk("status:   %08x\n",
0462             gpu_read(gpu, REG_A3XX_RBBM_STATUS));
0463     adreno_dump(gpu);
0464 }
0465 
0466 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
0467 {
0468     struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
0469 
0470     if (!state)
0471         return ERR_PTR(-ENOMEM);
0472 
0473     adreno_gpu_state_get(gpu, state);
0474 
0475     state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
0476 
0477     return state;
0478 }
0479 
0480 static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
0481 {
0482     ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
0483     return ring->memptrs->rptr;
0484 }
0485 
0486 static const struct adreno_gpu_funcs funcs = {
0487     .base = {
0488         .get_param = adreno_get_param,
0489         .set_param = adreno_set_param,
0490         .hw_init = a3xx_hw_init,
0491         .pm_suspend = msm_gpu_pm_suspend,
0492         .pm_resume = msm_gpu_pm_resume,
0493         .recover = a3xx_recover,
0494         .submit = a3xx_submit,
0495         .active_ring = adreno_active_ring,
0496         .irq = a3xx_irq,
0497         .destroy = a3xx_destroy,
0498 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
0499         .show = adreno_show,
0500 #endif
0501         .gpu_state_get = a3xx_gpu_state_get,
0502         .gpu_state_put = adreno_gpu_state_put,
0503         .create_address_space = adreno_iommu_create_address_space,
0504         .get_rptr = a3xx_get_rptr,
0505     },
0506 };
0507 
0508 static const struct msm_gpu_perfcntr perfcntrs[] = {
0509     { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
0510             SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
0511     { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
0512             SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
0513 };
0514 
0515 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
0516 {
0517     struct a3xx_gpu *a3xx_gpu = NULL;
0518     struct adreno_gpu *adreno_gpu;
0519     struct msm_gpu *gpu;
0520     struct msm_drm_private *priv = dev->dev_private;
0521     struct platform_device *pdev = priv->gpu_pdev;
0522     struct icc_path *ocmem_icc_path;
0523     struct icc_path *icc_path;
0524     int ret;
0525 
0526     if (!pdev) {
0527         DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
0528         ret = -ENXIO;
0529         goto fail;
0530     }
0531 
0532     a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
0533     if (!a3xx_gpu) {
0534         ret = -ENOMEM;
0535         goto fail;
0536     }
0537 
0538     adreno_gpu = &a3xx_gpu->base;
0539     gpu = &adreno_gpu->base;
0540 
0541     gpu->perfcntrs = perfcntrs;
0542     gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
0543 
0544     adreno_gpu->registers = a3xx_registers;
0545 
0546     ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
0547     if (ret)
0548         goto fail;
0549 
0550     /* if needed, allocate gmem: */
0551     if (adreno_is_a330(adreno_gpu)) {
0552         ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
0553                         adreno_gpu, &a3xx_gpu->ocmem);
0554         if (ret)
0555             goto fail;
0556     }
0557 
0558     if (!gpu->aspace) {
0559         /* TODO we think it is possible to configure the GPU to
0560          * restrict access to VRAM carveout.  But the required
0561          * registers are unknown.  For now just bail out and
0562          * limp along with just modesetting.  If it turns out
0563          * to not be possible to restrict access, then we must
0564          * implement a cmdstream validator.
0565          */
0566         DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
0567         if (!allow_vram_carveout) {
0568             ret = -ENXIO;
0569             goto fail;
0570         }
0571     }
0572 
0573     icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
0574     if (IS_ERR(icc_path)) {
0575         ret = PTR_ERR(icc_path);
0576         goto fail;
0577     }
0578 
0579     ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
0580     if (IS_ERR(ocmem_icc_path)) {
0581         ret = PTR_ERR(ocmem_icc_path);
0582         /* allow -ENODATA, ocmem icc is optional */
0583         if (ret != -ENODATA)
0584             goto fail;
0585         ocmem_icc_path = NULL;
0586     }
0587 
0588 
0589     /*
0590      * Set the ICC path to maximum speed for now by multiplying the fastest
0591      * frequency by the bus width (8). We'll want to scale this later on to
0592      * improve battery life.
0593      */
0594     icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
0595     icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
0596 
0597     return gpu;
0598 
0599 fail:
0600     if (a3xx_gpu)
0601         a3xx_destroy(&a3xx_gpu->base.base);
0602 
0603     return ERR_PTR(ret);
0604 }