Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
0003  */
0004 
0005 #include <linux/kernel.h>
0006 #include <linux/types.h>
0007 #include <linux/cpumask.h>
0008 #include <linux/qcom_scm.h>
0009 #include <linux/pm_opp.h>
0010 #include <linux/nvmem-consumer.h>
0011 #include <linux/slab.h>
0012 #include "msm_gem.h"
0013 #include "msm_mmu.h"
0014 #include "a5xx_gpu.h"
0015 
0016 extern bool hang_debug;
0017 static void a5xx_dump(struct msm_gpu *gpu);
0018 
0019 #define GPU_PAS_ID 13
0020 
0021 static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
0022 {
0023     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0024     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
0025 
0026     if (a5xx_gpu->has_whereami) {
0027         OUT_PKT7(ring, CP_WHERE_AM_I, 2);
0028         OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
0029         OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
0030     }
0031 }
0032 
0033 void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
0034         bool sync)
0035 {
0036     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0037     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
0038     uint32_t wptr;
0039     unsigned long flags;
0040 
0041     /*
0042      * Most flush operations need to issue a WHERE_AM_I opcode to sync up
0043      * the rptr shadow
0044      */
0045     if (sync)
0046         update_shadow_rptr(gpu, ring);
0047 
0048     spin_lock_irqsave(&ring->preempt_lock, flags);
0049 
0050     /* Copy the shadow to the actual register */
0051     ring->cur = ring->next;
0052 
0053     /* Make sure to wrap wptr if we need to */
0054     wptr = get_wptr(ring);
0055 
0056     spin_unlock_irqrestore(&ring->preempt_lock, flags);
0057 
0058     /* Make sure everything is posted before making a decision */
0059     mb();
0060 
0061     /* Update HW if this is the current ring and we are not in preempt */
0062     if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
0063         gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
0064 }
0065 
0066 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
0067 {
0068     struct msm_ringbuffer *ring = submit->ring;
0069     struct msm_gem_object *obj;
0070     uint32_t *ptr, dwords;
0071     unsigned int i;
0072 
0073     for (i = 0; i < submit->nr_cmds; i++) {
0074         switch (submit->cmd[i].type) {
0075         case MSM_SUBMIT_CMD_IB_TARGET_BUF:
0076             break;
0077         case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
0078             if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
0079                 break;
0080             fallthrough;
0081         case MSM_SUBMIT_CMD_BUF:
0082             /* copy commands into RB: */
0083             obj = submit->bos[submit->cmd[i].idx].obj;
0084             dwords = submit->cmd[i].size;
0085 
0086             ptr = msm_gem_get_vaddr(&obj->base);
0087 
0088             /* _get_vaddr() shouldn't fail at this point,
0089              * since we've already mapped it once in
0090              * submit_reloc()
0091              */
0092             if (WARN_ON(!ptr))
0093                 return;
0094 
0095             for (i = 0; i < dwords; i++) {
0096                 /* normally the OUT_PKTn() would wait
0097                  * for space for the packet.  But since
0098                  * we just OUT_RING() the whole thing,
0099                  * need to call adreno_wait_ring()
0100                  * ourself:
0101                  */
0102                 adreno_wait_ring(ring, 1);
0103                 OUT_RING(ring, ptr[i]);
0104             }
0105 
0106             msm_gem_put_vaddr(&obj->base);
0107 
0108             break;
0109         }
0110     }
0111 
0112     a5xx_flush(gpu, ring, true);
0113     a5xx_preempt_trigger(gpu);
0114 
0115     /* we might not necessarily have a cmd from userspace to
0116      * trigger an event to know that submit has completed, so
0117      * do this manually:
0118      */
0119     a5xx_idle(gpu, ring);
0120     ring->memptrs->fence = submit->seqno;
0121     msm_gpu_retire(gpu);
0122 }
0123 
0124 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
0125 {
0126     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0127     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
0128     struct msm_ringbuffer *ring = submit->ring;
0129     unsigned int i, ibs = 0;
0130 
0131     if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
0132         gpu->cur_ctx_seqno = 0;
0133         a5xx_submit_in_rb(gpu, submit);
0134         return;
0135     }
0136 
0137     OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
0138     OUT_RING(ring, 0x02);
0139 
0140     /* Turn off protected mode to write to special registers */
0141     OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
0142     OUT_RING(ring, 0);
0143 
0144     /* Set the save preemption record for the ring/command */
0145     OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
0146     OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
0147     OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
0148 
0149     /* Turn back on protected mode */
0150     OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
0151     OUT_RING(ring, 1);
0152 
0153     /* Enable local preemption for finegrain preemption */
0154     OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
0155     OUT_RING(ring, 0x02);
0156 
0157     /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
0158     OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
0159     OUT_RING(ring, 0x02);
0160 
0161     /* Submit the commands */
0162     for (i = 0; i < submit->nr_cmds; i++) {
0163         switch (submit->cmd[i].type) {
0164         case MSM_SUBMIT_CMD_IB_TARGET_BUF:
0165             break;
0166         case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
0167             if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
0168                 break;
0169             fallthrough;
0170         case MSM_SUBMIT_CMD_BUF:
0171             OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
0172             OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
0173             OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
0174             OUT_RING(ring, submit->cmd[i].size);
0175             ibs++;
0176             break;
0177         }
0178 
0179         /*
0180          * Periodically update shadow-wptr if needed, so that we
0181          * can see partial progress of submits with large # of
0182          * cmds.. otherwise we could needlessly stall waiting for
0183          * ringbuffer state, simply due to looking at a shadow
0184          * rptr value that has not been updated
0185          */
0186         if ((ibs % 32) == 0)
0187             update_shadow_rptr(gpu, ring);
0188     }
0189 
0190     /*
0191      * Write the render mode to NULL (0) to indicate to the CP that the IBs
0192      * are done rendering - otherwise a lucky preemption would start
0193      * replaying from the last checkpoint
0194      */
0195     OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
0196     OUT_RING(ring, 0);
0197     OUT_RING(ring, 0);
0198     OUT_RING(ring, 0);
0199     OUT_RING(ring, 0);
0200     OUT_RING(ring, 0);
0201 
0202     /* Turn off IB level preemptions */
0203     OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
0204     OUT_RING(ring, 0x01);
0205 
0206     /* Write the fence to the scratch register */
0207     OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
0208     OUT_RING(ring, submit->seqno);
0209 
0210     /*
0211      * Execute a CACHE_FLUSH_TS event. This will ensure that the
0212      * timestamp is written to the memory and then triggers the interrupt
0213      */
0214     OUT_PKT7(ring, CP_EVENT_WRITE, 4);
0215     OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
0216         CP_EVENT_WRITE_0_IRQ);
0217     OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
0218     OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
0219     OUT_RING(ring, submit->seqno);
0220 
0221     /* Yield the floor on command completion */
0222     OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
0223     /*
0224      * If dword[2:1] are non zero, they specify an address for the CP to
0225      * write the value of dword[3] to on preemption complete. Write 0 to
0226      * skip the write
0227      */
0228     OUT_RING(ring, 0x00);
0229     OUT_RING(ring, 0x00);
0230     /* Data value - not used if the address above is 0 */
0231     OUT_RING(ring, 0x01);
0232     /* Set bit 0 to trigger an interrupt on preempt complete */
0233     OUT_RING(ring, 0x01);
0234 
0235     /* A WHERE_AM_I packet is not needed after a YIELD */
0236     a5xx_flush(gpu, ring, false);
0237 
0238     /* Check to see if we need to start preemption */
0239     a5xx_preempt_trigger(gpu);
0240 }
0241 
0242 static const struct adreno_five_hwcg_regs {
0243     u32 offset;
0244     u32 value;
0245 } a5xx_hwcg[] = {
0246     {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
0247     {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
0248     {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
0249     {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
0250     {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
0251     {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
0252     {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
0253     {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
0254     {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
0255     {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
0256     {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
0257     {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
0258     {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
0259     {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
0260     {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
0261     {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
0262     {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
0263     {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
0264     {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
0265     {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
0266     {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
0267     {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
0268     {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
0269     {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
0270     {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
0271     {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
0272     {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
0273     {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
0274     {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
0275     {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
0276     {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
0277     {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
0278     {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
0279     {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
0280     {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
0281     {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
0282     {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
0283     {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
0284     {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
0285     {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
0286     {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
0287     {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
0288     {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
0289     {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
0290     {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
0291     {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
0292     {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
0293     {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
0294     {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
0295     {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
0296     {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
0297     {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
0298     {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
0299     {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
0300     {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
0301     {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
0302     {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
0303     {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
0304     {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
0305     {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
0306     {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
0307     {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
0308     {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
0309     {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
0310     {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
0311     {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
0312     {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
0313     {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
0314     {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
0315     {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
0316     {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
0317     {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
0318     {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
0319     {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
0320     {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
0321     {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
0322     {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
0323     {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
0324     {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
0325     {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
0326     {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
0327     {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
0328     {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
0329     {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
0330     {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
0331     {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
0332     {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
0333     {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
0334     {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
0335     {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
0336     {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
0337     {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
0338 }, a50x_hwcg[] = {
0339     {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
0340     {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
0341     {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
0342     {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
0343     {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
0344     {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
0345     {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
0346     {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
0347     {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
0348     {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
0349     {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
0350     {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
0351     {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
0352     {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
0353     {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
0354     {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
0355     {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
0356     {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
0357     {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
0358     {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
0359     {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
0360     {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
0361     {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
0362     {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
0363     {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
0364     {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
0365     {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
0366     {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
0367     {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
0368     {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
0369     {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
0370     {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
0371     {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
0372     {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
0373     {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
0374     {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
0375     {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
0376     {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
0377 }, a512_hwcg[] = {
0378     {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
0379     {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
0380     {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
0381     {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
0382     {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
0383     {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
0384     {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
0385     {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
0386     {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
0387     {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
0388     {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
0389     {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
0390     {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
0391     {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
0392     {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
0393     {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
0394     {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
0395     {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
0396     {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
0397     {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
0398     {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
0399     {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
0400     {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
0401     {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
0402     {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
0403     {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
0404     {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
0405     {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
0406     {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
0407     {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
0408     {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
0409     {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
0410     {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
0411     {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
0412     {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
0413     {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
0414     {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
0415     {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
0416     {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
0417     {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
0418     {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
0419     {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
0420     {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
0421     {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
0422     {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
0423     {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
0424     {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
0425     {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
0426     {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
0427     {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
0428     {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
0429     {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
0430     {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
0431     {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
0432     {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
0433     {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
0434 };
0435 
0436 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
0437 {
0438     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0439     const struct adreno_five_hwcg_regs *regs;
0440     unsigned int i, sz;
0441 
0442     if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu)) {
0443         regs = a50x_hwcg;
0444         sz = ARRAY_SIZE(a50x_hwcg);
0445     } else if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu)) {
0446         regs = a512_hwcg;
0447         sz = ARRAY_SIZE(a512_hwcg);
0448     } else {
0449         regs = a5xx_hwcg;
0450         sz = ARRAY_SIZE(a5xx_hwcg);
0451     }
0452 
0453     for (i = 0; i < sz; i++)
0454         gpu_write(gpu, regs[i].offset,
0455               state ? regs[i].value : 0);
0456 
0457     if (adreno_is_a540(adreno_gpu)) {
0458         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
0459         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
0460     }
0461 
0462     gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
0463     gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
0464 }
0465 
0466 static int a5xx_me_init(struct msm_gpu *gpu)
0467 {
0468     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0469     struct msm_ringbuffer *ring = gpu->rb[0];
0470 
0471     OUT_PKT7(ring, CP_ME_INIT, 8);
0472 
0473     OUT_RING(ring, 0x0000002F);
0474 
0475     /* Enable multiple hardware contexts */
0476     OUT_RING(ring, 0x00000003);
0477 
0478     /* Enable error detection */
0479     OUT_RING(ring, 0x20000000);
0480 
0481     /* Don't enable header dump */
0482     OUT_RING(ring, 0x00000000);
0483     OUT_RING(ring, 0x00000000);
0484 
0485     /* Specify workarounds for various microcode issues */
0486     if (adreno_is_a506(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
0487         /* Workaround for token end syncs
0488          * Force a WFI after every direct-render 3D mode draw and every
0489          * 2D mode 3 draw
0490          */
0491         OUT_RING(ring, 0x0000000B);
0492     } else if (adreno_is_a510(adreno_gpu)) {
0493         /* Workaround for token and syncs */
0494         OUT_RING(ring, 0x00000001);
0495     } else {
0496         /* No workarounds enabled */
0497         OUT_RING(ring, 0x00000000);
0498     }
0499 
0500     OUT_RING(ring, 0x00000000);
0501     OUT_RING(ring, 0x00000000);
0502 
0503     a5xx_flush(gpu, ring, true);
0504     return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
0505 }
0506 
0507 static int a5xx_preempt_start(struct msm_gpu *gpu)
0508 {
0509     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0510     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
0511     struct msm_ringbuffer *ring = gpu->rb[0];
0512 
0513     if (gpu->nr_rings == 1)
0514         return 0;
0515 
0516     /* Turn off protected mode to write to special registers */
0517     OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
0518     OUT_RING(ring, 0);
0519 
0520     /* Set the save preemption record for the ring/command */
0521     OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
0522     OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
0523     OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
0524 
0525     /* Turn back on protected mode */
0526     OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
0527     OUT_RING(ring, 1);
0528 
0529     OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
0530     OUT_RING(ring, 0x00);
0531 
0532     OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
0533     OUT_RING(ring, 0x01);
0534 
0535     OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
0536     OUT_RING(ring, 0x01);
0537 
0538     /* Yield the floor on command completion */
0539     OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
0540     OUT_RING(ring, 0x00);
0541     OUT_RING(ring, 0x00);
0542     OUT_RING(ring, 0x01);
0543     OUT_RING(ring, 0x01);
0544 
0545     /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
0546     a5xx_flush(gpu, ring, false);
0547 
0548     return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
0549 }
0550 
0551 static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
0552         struct drm_gem_object *obj)
0553 {
0554     u32 *buf = msm_gem_get_vaddr(obj);
0555 
0556     if (IS_ERR(buf))
0557         return;
0558 
0559     /*
0560      * If the lowest nibble is 0xa that is an indication that this microcode
0561      * has been patched. The actual version is in dword [3] but we only care
0562      * about the patchlevel which is the lowest nibble of dword [3]
0563      */
0564     if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
0565         a5xx_gpu->has_whereami = true;
0566 
0567     msm_gem_put_vaddr(obj);
0568 }
0569 
0570 static int a5xx_ucode_init(struct msm_gpu *gpu)
0571 {
0572     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0573     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
0574     int ret;
0575 
0576     if (!a5xx_gpu->pm4_bo) {
0577         a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
0578             adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
0579 
0580 
0581         if (IS_ERR(a5xx_gpu->pm4_bo)) {
0582             ret = PTR_ERR(a5xx_gpu->pm4_bo);
0583             a5xx_gpu->pm4_bo = NULL;
0584             DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
0585                 ret);
0586             return ret;
0587         }
0588 
0589         msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
0590     }
0591 
0592     if (!a5xx_gpu->pfp_bo) {
0593         a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
0594             adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
0595 
0596         if (IS_ERR(a5xx_gpu->pfp_bo)) {
0597             ret = PTR_ERR(a5xx_gpu->pfp_bo);
0598             a5xx_gpu->pfp_bo = NULL;
0599             DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
0600                 ret);
0601             return ret;
0602         }
0603 
0604         msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
0605         a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
0606     }
0607 
0608     gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
0609         REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
0610 
0611     gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
0612         REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
0613 
0614     return 0;
0615 }
0616 
0617 #define SCM_GPU_ZAP_SHADER_RESUME 0
0618 
0619 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
0620 {
0621     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0622     int ret;
0623 
0624     /*
0625      * Adreno 506 have CPZ Retention feature and doesn't require
0626      * to resume zap shader
0627      */
0628     if (adreno_is_a506(adreno_gpu))
0629         return 0;
0630 
0631     ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
0632     if (ret)
0633         DRM_ERROR("%s: zap-shader resume failed: %d\n",
0634             gpu->name, ret);
0635 
0636     return ret;
0637 }
0638 
0639 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
0640 {
0641     static bool loaded;
0642     int ret;
0643 
0644     /*
0645      * If the zap shader is already loaded into memory we just need to kick
0646      * the remote processor to reinitialize it
0647      */
0648     if (loaded)
0649         return a5xx_zap_shader_resume(gpu);
0650 
0651     ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
0652 
0653     loaded = !ret;
0654     return ret;
0655 }
0656 
0657 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
0658       A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
0659       A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
0660       A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
0661       A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
0662       A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
0663       A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
0664       A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
0665       A5XX_RBBM_INT_0_MASK_CP_SW | \
0666       A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
0667       A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
0668       A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
0669 
0670 static int a5xx_hw_init(struct msm_gpu *gpu)
0671 {
0672     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
0673     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
0674     u32 regbit;
0675     int ret;
0676 
0677     gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
0678 
0679     if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
0680         adreno_is_a540(adreno_gpu))
0681         gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
0682 
0683     /* Make all blocks contribute to the GPU BUSY perf counter */
0684     gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
0685 
0686     /* Enable RBBM error reporting bits */
0687     gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
0688 
0689     if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
0690         /*
0691          * Mask out the activity signals from RB1-3 to avoid false
0692          * positives
0693          */
0694 
0695         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
0696             0xF0000000);
0697         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
0698             0xFFFFFFFF);
0699         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
0700             0xFFFFFFFF);
0701         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
0702             0xFFFFFFFF);
0703         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
0704             0xFFFFFFFF);
0705         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
0706             0xFFFFFFFF);
0707         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
0708             0xFFFFFFFF);
0709         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
0710             0xFFFFFFFF);
0711     }
0712 
0713     /* Enable fault detection */
0714     gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
0715         (1 << 30) | 0xFFFF);
0716 
0717     /* Turn on performance counters */
0718     gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
0719 
0720     /* Select CP0 to always count cycles */
0721     gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
0722 
0723     /* Select RBBM0 to countable 6 to get the busy status for devfreq */
0724     gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
0725 
0726     /* Increase VFD cache access so LRZ and other data gets evicted less */
0727     gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
0728 
0729     /* Disable L2 bypass in the UCHE */
0730     gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
0731     gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
0732     gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
0733     gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
0734 
0735     /* Set the GMEM VA range (0 to gpu->gmem) */
0736     gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
0737     gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
0738     gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
0739         0x00100000 + adreno_gpu->gmem - 1);
0740     gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
0741 
0742     if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
0743         adreno_is_a510(adreno_gpu)) {
0744         gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
0745         if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu))
0746             gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
0747         else
0748             gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
0749         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
0750         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
0751     } else {
0752         gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
0753         if (adreno_is_a530(adreno_gpu))
0754             gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
0755         else
0756             gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
0757         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
0758         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
0759     }
0760 
0761     if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu))
0762         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
0763               (0x100 << 11 | 0x100 << 22));
0764     else if (adreno_is_a509(adreno_gpu) || adreno_is_a510(adreno_gpu) ||
0765          adreno_is_a512(adreno_gpu))
0766         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
0767               (0x200 << 11 | 0x200 << 22));
0768     else
0769         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
0770               (0x400 << 11 | 0x300 << 22));
0771 
0772     if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
0773         gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
0774 
0775     /*
0776      * Disable the RB sampler datapath DP2 clock gating optimization
0777      * for 1-SP GPUs, as it is enabled by default.
0778      */
0779     if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
0780         adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu))
0781         gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, 0, (1 << 9));
0782 
0783     /* Disable UCHE global filter as SP can invalidate/flush independently */
0784     gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(29));
0785 
0786     /* Enable USE_RETENTION_FLOPS */
0787     gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
0788 
0789     /* Enable ME/PFP split notification */
0790     gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
0791 
0792     /*
0793      *  In A5x, CCU can send context_done event of a particular context to
0794      *  UCHE which ultimately reaches CP even when there is valid
0795      *  transaction of that context inside CCU. This can let CP to program
0796      *  config registers, which will make the "valid transaction" inside
0797      *  CCU to be interpreted differently. This can cause gpu fault. This
0798      *  bug is fixed in latest A510 revision. To enable this bug fix -
0799      *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
0800      *  (disable). For older A510 version this bit is unused.
0801      */
0802     if (adreno_is_a510(adreno_gpu))
0803         gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
0804 
0805     /* Enable HWCG */
0806     a5xx_set_hwcg(gpu, true);
0807 
0808     gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
0809 
0810     /* Set the highest bank bit */
0811     if (adreno_is_a540(adreno_gpu))
0812         regbit = 2;
0813     else
0814         regbit = 1;
0815 
0816     gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7);
0817     gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1);
0818 
0819     if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
0820         adreno_is_a540(adreno_gpu))
0821         gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit);
0822 
0823     /* Disable All flat shading optimization (ALLFLATOPTDIS) */
0824     gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
0825 
0826     /* Protect registers from the CP */
0827     gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
0828 
0829     /* RBBM */
0830     gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
0831     gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
0832     gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
0833     gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
0834     gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
0835     gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
0836 
0837     /* Content protect */
0838     gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
0839         ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
0840             16));
0841     gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
0842         ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
0843 
0844     /* CP */
0845     gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
0846     gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
0847     gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
0848     gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
0849 
0850     /* RB */
0851     gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
0852     gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
0853 
0854     /* VPC */
0855     gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
0856     gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 16));
0857 
0858     /* UCHE */
0859     gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
0860 
0861     /* SMMU */
0862     gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
0863             ADRENO_PROTECT_RW(0x10000, 0x8000));
0864 
0865     gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
0866     /*
0867      * Disable the trusted memory range - we don't actually supported secure
0868      * memory rendering at this point in time and we don't want to block off
0869      * part of the virtual memory space.
0870      */
0871     gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
0872         REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
0873     gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
0874 
0875     /* Put the GPU into 64 bit by default */
0876     gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
0877     gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
0878     gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
0879     gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
0880     gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
0881     gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
0882     gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
0883     gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
0884     gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
0885     gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
0886     gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
0887     gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
0888 
0889     /*
0890      * VPC corner case with local memory load kill leads to corrupt
0891      * internal state. Normal Disable does not work for all a5x chips.
0892      * So do the following setting to disable it.
0893      */
0894     if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
0895         gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
0896         gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
0897     }
0898 
0899     ret = adreno_hw_init(gpu);
0900     if (ret)
0901         return ret;
0902 
0903     if (adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))
0904         a5xx_gpmu_ucode_init(gpu);
0905 
0906     ret = a5xx_ucode_init(gpu);
0907     if (ret)
0908         return ret;
0909 
0910     /* Set the ringbuffer address */
0911     gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
0912         gpu->rb[0]->iova);
0913 
0914     /*
0915      * If the microcode supports the WHERE_AM_I opcode then we can use that
0916      * in lieu of the RPTR shadow and enable preemption. Otherwise, we
0917      * can't safely use the RPTR shadow or preemption. In either case, the
0918      * RPTR shadow should be disabled in hardware.
0919      */
0920     gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
0921         MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
0922 
0923     /* Create a privileged buffer for the RPTR shadow */
0924     if (a5xx_gpu->has_whereami) {
0925         if (!a5xx_gpu->shadow_bo) {
0926             a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
0927                 sizeof(u32) * gpu->nr_rings,
0928                 MSM_BO_WC | MSM_BO_MAP_PRIV,
0929                 gpu->aspace, &a5xx_gpu->shadow_bo,
0930                 &a5xx_gpu->shadow_iova);
0931 
0932             if (IS_ERR(a5xx_gpu->shadow))
0933                 return PTR_ERR(a5xx_gpu->shadow);
0934 
0935             msm_gem_object_set_name(a5xx_gpu->shadow_bo, "shadow");
0936         }
0937 
0938         gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
0939             REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
0940     } else if (gpu->nr_rings > 1) {
0941         /* Disable preemption if WHERE_AM_I isn't available */
0942         a5xx_preempt_fini(gpu);
0943         gpu->nr_rings = 1;
0944     }
0945 
0946     a5xx_preempt_hw_init(gpu);
0947 
0948     /* Disable the interrupts through the initial bringup stage */
0949     gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
0950 
0951     /* Clear ME_HALT to start the micro engine */
0952     gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
0953     ret = a5xx_me_init(gpu);
0954     if (ret)
0955         return ret;
0956 
0957     ret = a5xx_power_init(gpu);
0958     if (ret)
0959         return ret;
0960 
0961     /*
0962      * Send a pipeline event stat to get misbehaving counters to start
0963      * ticking correctly
0964      */
0965     if (adreno_is_a530(adreno_gpu)) {
0966         OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
0967         OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
0968 
0969         a5xx_flush(gpu, gpu->rb[0], true);
0970         if (!a5xx_idle(gpu, gpu->rb[0]))
0971             return -EINVAL;
0972     }
0973 
0974     /*
0975      * If the chip that we are using does support loading one, then
0976      * try to load a zap shader into the secure world. If successful
0977      * we can use the CP to switch out of secure mode. If not then we
0978      * have no resource but to try to switch ourselves out manually. If we
0979      * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
0980      * be blocked and a permissions violation will soon follow.
0981      */
0982     ret = a5xx_zap_shader_init(gpu);
0983     if (!ret) {
0984         OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
0985         OUT_RING(gpu->rb[0], 0x00000000);
0986 
0987         a5xx_flush(gpu, gpu->rb[0], true);
0988         if (!a5xx_idle(gpu, gpu->rb[0]))
0989             return -EINVAL;
0990     } else if (ret == -ENODEV) {
0991         /*
0992          * This device does not use zap shader (but print a warning
0993          * just in case someone got their dt wrong.. hopefully they
0994          * have a debug UART to realize the error of their ways...
0995          * if you mess this up you are about to crash horribly)
0996          */
0997         dev_warn_once(gpu->dev->dev,
0998             "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
0999         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1000     } else {
1001         return ret;
1002     }
1003 
1004     /* Last step - yield the ringbuffer */
1005     a5xx_preempt_start(gpu);
1006 
1007     return 0;
1008 }
1009 
1010 static void a5xx_recover(struct msm_gpu *gpu)
1011 {
1012     int i;
1013 
1014     adreno_dump_info(gpu);
1015 
1016     for (i = 0; i < 8; i++) {
1017         printk("CP_SCRATCH_REG%d: %u\n", i,
1018             gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
1019     }
1020 
1021     if (hang_debug)
1022         a5xx_dump(gpu);
1023 
1024     gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
1025     gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
1026     gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
1027     adreno_recover(gpu);
1028 }
1029 
1030 static void a5xx_destroy(struct msm_gpu *gpu)
1031 {
1032     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1033     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1034 
1035     DBG("%s", gpu->name);
1036 
1037     a5xx_preempt_fini(gpu);
1038 
1039     if (a5xx_gpu->pm4_bo) {
1040         msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
1041         drm_gem_object_put(a5xx_gpu->pm4_bo);
1042     }
1043 
1044     if (a5xx_gpu->pfp_bo) {
1045         msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
1046         drm_gem_object_put(a5xx_gpu->pfp_bo);
1047     }
1048 
1049     if (a5xx_gpu->gpmu_bo) {
1050         msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
1051         drm_gem_object_put(a5xx_gpu->gpmu_bo);
1052     }
1053 
1054     if (a5xx_gpu->shadow_bo) {
1055         msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
1056         drm_gem_object_put(a5xx_gpu->shadow_bo);
1057     }
1058 
1059     adreno_gpu_cleanup(adreno_gpu);
1060     kfree(a5xx_gpu);
1061 }
1062 
1063 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
1064 {
1065     if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
1066         return false;
1067 
1068     /*
1069      * Nearly every abnormality ends up pausing the GPU and triggering a
1070      * fault so we can safely just watch for this one interrupt to fire
1071      */
1072     return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
1073         A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
1074 }
1075 
1076 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1077 {
1078     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1079     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1080 
1081     if (ring != a5xx_gpu->cur_ring) {
1082         WARN(1, "Tried to idle a non-current ringbuffer\n");
1083         return false;
1084     }
1085 
1086     /* wait for CP to drain ringbuffer: */
1087     if (!adreno_idle(gpu, ring))
1088         return false;
1089 
1090     if (spin_until(_a5xx_check_idle(gpu))) {
1091         DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
1092             gpu->name, __builtin_return_address(0),
1093             gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1094             gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
1095             gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1096             gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
1097         return false;
1098     }
1099 
1100     return true;
1101 }
1102 
1103 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1104 {
1105     struct msm_gpu *gpu = arg;
1106     pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
1107             iova, flags,
1108             gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
1109             gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
1110             gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
1111             gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
1112 
1113     return 0;
1114 }
1115 
1116 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
1117 {
1118     u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
1119 
1120     if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
1121         u32 val;
1122 
1123         gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
1124 
1125         /*
1126          * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
1127          * read it twice
1128          */
1129 
1130         gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1131         val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
1132 
1133         dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
1134             val);
1135     }
1136 
1137     if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
1138         dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
1139             gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
1140 
1141     if (status & A5XX_CP_INT_CP_DMA_ERROR)
1142         dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
1143 
1144     if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1145         u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
1146 
1147         dev_err_ratelimited(gpu->dev->dev,
1148             "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1149             val & (1 << 24) ? "WRITE" : "READ",
1150             (val & 0xFFFFF) >> 2, val);
1151     }
1152 
1153     if (status & A5XX_CP_INT_CP_AHB_ERROR) {
1154         u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
1155         const char *access[16] = { "reserved", "reserved",
1156             "timestamp lo", "timestamp hi", "pfp read", "pfp write",
1157             "", "", "me read", "me write", "", "", "crashdump read",
1158             "crashdump write" };
1159 
1160         dev_err_ratelimited(gpu->dev->dev,
1161             "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
1162             status & 0xFFFFF, access[(status >> 24) & 0xF],
1163             (status & (1 << 31)), status);
1164     }
1165 }
1166 
1167 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
1168 {
1169     if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1170         u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1171 
1172         dev_err_ratelimited(gpu->dev->dev,
1173             "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1174             val & (1 << 28) ? "WRITE" : "READ",
1175             (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1176             (val >> 24) & 0xF);
1177 
1178         /* Clear the error */
1179         gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1180 
1181         /* Clear the interrupt */
1182         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1183             A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1184     }
1185 
1186     if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1187         dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1188 
1189     if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1190         dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1191             gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1192 
1193     if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1194         dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1195             gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1196 
1197     if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1198         dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1199             gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1200 
1201     if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1202         dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1203 
1204     if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1205         dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1206 }
1207 
1208 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1209 {
1210     uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1211 
1212     addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1213 
1214     dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1215         addr);
1216 }
1217 
1218 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1219 {
1220     dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1221 }
1222 
1223 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1224 {
1225     struct drm_device *dev = gpu->dev;
1226     struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1227 
1228     /*
1229      * If stalled on SMMU fault, we could trip the GPU's hang detection,
1230      * but the fault handler will trigger the devcore dump, and we want
1231      * to otherwise resume normally rather than killing the submit, so
1232      * just bail.
1233      */
1234     if (gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24))
1235         return;
1236 
1237     DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1238         ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1239         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1240         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1241         gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1242         gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1243         gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1244         gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1245         gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1246 
1247     /* Turn off the hangcheck timer to keep it from bothering us */
1248     del_timer(&gpu->hangcheck_timer);
1249 
1250     kthread_queue_work(gpu->worker, &gpu->recover_work);
1251 }
1252 
1253 #define RBBM_ERROR_MASK \
1254     (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1255     A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1256     A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1257     A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1258     A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1259     A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1260 
1261 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1262 {
1263     struct msm_drm_private *priv = gpu->dev->dev_private;
1264     u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1265 
1266     /*
1267      * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1268      * before the source is cleared the interrupt will storm.
1269      */
1270     gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1271         status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1272 
1273     if (priv->disable_err_irq) {
1274         status &= A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS |
1275               A5XX_RBBM_INT_0_MASK_CP_SW;
1276     }
1277 
1278     /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1279     if (status & RBBM_ERROR_MASK)
1280         a5xx_rbbm_err_irq(gpu, status);
1281 
1282     if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1283         a5xx_cp_err_irq(gpu);
1284 
1285     if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1286         a5xx_fault_detect_irq(gpu);
1287 
1288     if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1289         a5xx_uche_err_irq(gpu);
1290 
1291     if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1292         a5xx_gpmu_err_irq(gpu);
1293 
1294     if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1295         a5xx_preempt_trigger(gpu);
1296         msm_gpu_retire(gpu);
1297     }
1298 
1299     if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1300         a5xx_preempt_irq(gpu);
1301 
1302     return IRQ_HANDLED;
1303 }
1304 
1305 static const u32 a5xx_registers[] = {
1306     0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1307     0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1308     0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1309     0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1310     0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1311     0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1312     0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1313     0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1314     0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1315     0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1316     0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1317     0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1318     0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1319     0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1320     0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1321     0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1322     0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1323     0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1324     0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1325     0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1326     0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1327     0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1328     0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1329     0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1330     0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1331     0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1332     0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1333     0xAC60, 0xAC60, ~0,
1334 };
1335 
1336 static void a5xx_dump(struct msm_gpu *gpu)
1337 {
1338     DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1339         gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1340     adreno_dump(gpu);
1341 }
1342 
1343 static int a5xx_pm_resume(struct msm_gpu *gpu)
1344 {
1345     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1346     int ret;
1347 
1348     /* Turn on the core power */
1349     ret = msm_gpu_pm_resume(gpu);
1350     if (ret)
1351         return ret;
1352 
1353     /* Adreno 506, 508, 509, 510, 512 needs manual RBBM sus/res control */
1354     if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) {
1355         /* Halt the sp_input_clk at HM level */
1356         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1357         a5xx_set_hwcg(gpu, true);
1358         /* Turn on sp_input_clk at HM level */
1359         gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1360         return 0;
1361     }
1362 
1363     /* Turn the RBCCU domain first to limit the chances of voltage droop */
1364     gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1365 
1366     /* Wait 3 usecs before polling */
1367     udelay(3);
1368 
1369     ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1370         (1 << 20), (1 << 20));
1371     if (ret) {
1372         DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1373             gpu->name,
1374             gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1375         return ret;
1376     }
1377 
1378     /* Turn on the SP domain */
1379     gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1380     ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1381         (1 << 20), (1 << 20));
1382     if (ret)
1383         DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1384             gpu->name);
1385 
1386     return ret;
1387 }
1388 
1389 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1390 {
1391     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1392     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1393     u32 mask = 0xf;
1394     int i, ret;
1395 
1396     /* A506, A508, A510 have 3 XIN ports in VBIF */
1397     if (adreno_is_a506(adreno_gpu) || adreno_is_a508(adreno_gpu) ||
1398         adreno_is_a510(adreno_gpu))
1399         mask = 0x7;
1400 
1401     /* Clear the VBIF pipe before shutting down */
1402     gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1403     spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1404                 mask) == mask);
1405 
1406     gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1407 
1408     /*
1409      * Reset the VBIF before power collapse to avoid issue with FIFO
1410      * entries on Adreno A510 and A530 (the others will tend to lock up)
1411      */
1412     if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
1413         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1414         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1415     }
1416 
1417     ret = msm_gpu_pm_suspend(gpu);
1418     if (ret)
1419         return ret;
1420 
1421     if (a5xx_gpu->has_whereami)
1422         for (i = 0; i < gpu->nr_rings; i++)
1423             a5xx_gpu->shadow[i] = 0;
1424 
1425     return 0;
1426 }
1427 
1428 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1429 {
1430     *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
1431         REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
1432 
1433     return 0;
1434 }
1435 
1436 struct a5xx_crashdumper {
1437     void *ptr;
1438     struct drm_gem_object *bo;
1439     u64 iova;
1440 };
1441 
1442 struct a5xx_gpu_state {
1443     struct msm_gpu_state base;
1444     u32 *hlsqregs;
1445 };
1446 
1447 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1448         struct a5xx_crashdumper *dumper)
1449 {
1450     dumper->ptr = msm_gem_kernel_new(gpu->dev,
1451         SZ_1M, MSM_BO_WC, gpu->aspace,
1452         &dumper->bo, &dumper->iova);
1453 
1454     if (!IS_ERR(dumper->ptr))
1455         msm_gem_object_set_name(dumper->bo, "crashdump");
1456 
1457     return PTR_ERR_OR_ZERO(dumper->ptr);
1458 }
1459 
1460 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1461         struct a5xx_crashdumper *dumper)
1462 {
1463     u32 val;
1464 
1465     if (IS_ERR_OR_NULL(dumper->ptr))
1466         return -EINVAL;
1467 
1468     gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1469         REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1470 
1471     gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1472 
1473     return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1474         val & 0x04, 100, 10000);
1475 }
1476 
1477 /*
1478  * These are a list of the registers that need to be read through the HLSQ
1479  * aperture through the crashdumper.  These are not nominally accessible from
1480  * the CPU on a secure platform.
1481  */
1482 static const struct {
1483     u32 type;
1484     u32 regoffset;
1485     u32 count;
1486 } a5xx_hlsq_aperture_regs[] = {
1487     { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1488     { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1489     { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1490     { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1491     { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1492     { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1493     { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1494     { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1495     { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1496     { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1497     { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1498     { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1499     { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1500     { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1501     { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1502 };
1503 
1504 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1505         struct a5xx_gpu_state *a5xx_state)
1506 {
1507     struct a5xx_crashdumper dumper = { 0 };
1508     u32 offset, count = 0;
1509     u64 *ptr;
1510     int i;
1511 
1512     if (a5xx_crashdumper_init(gpu, &dumper))
1513         return;
1514 
1515     /* The script will be written at offset 0 */
1516     ptr = dumper.ptr;
1517 
1518     /* Start writing the data at offset 256k */
1519     offset = dumper.iova + (256 * SZ_1K);
1520 
1521     /* Count how many additional registers to get from the HLSQ aperture */
1522     for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1523         count += a5xx_hlsq_aperture_regs[i].count;
1524 
1525     a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1526     if (!a5xx_state->hlsqregs)
1527         return;
1528 
1529     /* Build the crashdump script */
1530     for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1531         u32 type = a5xx_hlsq_aperture_regs[i].type;
1532         u32 c = a5xx_hlsq_aperture_regs[i].count;
1533 
1534         /* Write the register to select the desired bank */
1535         *ptr++ = ((u64) type << 8);
1536         *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1537             (1 << 21) | 1;
1538 
1539         *ptr++ = offset;
1540         *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1541             | c;
1542 
1543         offset += c * sizeof(u32);
1544     }
1545 
1546     /* Write two zeros to close off the script */
1547     *ptr++ = 0;
1548     *ptr++ = 0;
1549 
1550     if (a5xx_crashdumper_run(gpu, &dumper)) {
1551         kfree(a5xx_state->hlsqregs);
1552         msm_gem_kernel_put(dumper.bo, gpu->aspace);
1553         return;
1554     }
1555 
1556     /* Copy the data from the crashdumper to the state */
1557     memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1558         count * sizeof(u32));
1559 
1560     msm_gem_kernel_put(dumper.bo, gpu->aspace);
1561 }
1562 
1563 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1564 {
1565     struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1566             GFP_KERNEL);
1567     bool stalled = !!(gpu_read(gpu, REG_A5XX_RBBM_STATUS3) & BIT(24));
1568 
1569     if (!a5xx_state)
1570         return ERR_PTR(-ENOMEM);
1571 
1572     /* Temporarily disable hardware clock gating before reading the hw */
1573     a5xx_set_hwcg(gpu, false);
1574 
1575     /* First get the generic state from the adreno core */
1576     adreno_gpu_state_get(gpu, &(a5xx_state->base));
1577 
1578     a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1579 
1580     /*
1581      * Get the HLSQ regs with the help of the crashdumper, but only if
1582      * we are not stalled in an iommu fault (in which case the crashdumper
1583      * would not have access to memory)
1584      */
1585     if (!stalled)
1586         a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1587 
1588     a5xx_set_hwcg(gpu, true);
1589 
1590     return &a5xx_state->base;
1591 }
1592 
1593 static void a5xx_gpu_state_destroy(struct kref *kref)
1594 {
1595     struct msm_gpu_state *state = container_of(kref,
1596         struct msm_gpu_state, ref);
1597     struct a5xx_gpu_state *a5xx_state = container_of(state,
1598         struct a5xx_gpu_state, base);
1599 
1600     kfree(a5xx_state->hlsqregs);
1601 
1602     adreno_gpu_state_destroy(state);
1603     kfree(a5xx_state);
1604 }
1605 
1606 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1607 {
1608     if (IS_ERR_OR_NULL(state))
1609         return 1;
1610 
1611     return kref_put(&state->ref, a5xx_gpu_state_destroy);
1612 }
1613 
1614 
1615 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1616 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1617               struct drm_printer *p)
1618 {
1619     int i, j;
1620     u32 pos = 0;
1621     struct a5xx_gpu_state *a5xx_state = container_of(state,
1622         struct a5xx_gpu_state, base);
1623 
1624     if (IS_ERR_OR_NULL(state))
1625         return;
1626 
1627     adreno_show(gpu, state, p);
1628 
1629     /* Dump the additional a5xx HLSQ registers */
1630     if (!a5xx_state->hlsqregs)
1631         return;
1632 
1633     drm_printf(p, "registers-hlsq:\n");
1634 
1635     for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1636         u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1637         u32 c = a5xx_hlsq_aperture_regs[i].count;
1638 
1639         for (j = 0; j < c; j++, pos++, o++) {
1640             /*
1641              * To keep the crashdump simple we pull the entire range
1642              * for each register type but not all of the registers
1643              * in the range are valid. Fortunately invalid registers
1644              * stick out like a sore thumb with a value of
1645              * 0xdeadbeef
1646              */
1647             if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1648                 continue;
1649 
1650             drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1651                 o << 2, a5xx_state->hlsqregs[pos]);
1652         }
1653     }
1654 }
1655 #endif
1656 
1657 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1658 {
1659     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1660     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1661 
1662     return a5xx_gpu->cur_ring;
1663 }
1664 
1665 static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1666 {
1667     u64 busy_cycles;
1668 
1669     busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1670             REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1671     *out_sample_rate = clk_get_rate(gpu->core_clk);
1672 
1673     return busy_cycles;
1674 }
1675 
1676 static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1677 {
1678     struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1679     struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1680 
1681     if (a5xx_gpu->has_whereami)
1682         return a5xx_gpu->shadow[ring->id];
1683 
1684     return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1685 }
1686 
1687 static const struct adreno_gpu_funcs funcs = {
1688     .base = {
1689         .get_param = adreno_get_param,
1690         .set_param = adreno_set_param,
1691         .hw_init = a5xx_hw_init,
1692         .pm_suspend = a5xx_pm_suspend,
1693         .pm_resume = a5xx_pm_resume,
1694         .recover = a5xx_recover,
1695         .submit = a5xx_submit,
1696         .active_ring = a5xx_active_ring,
1697         .irq = a5xx_irq,
1698         .destroy = a5xx_destroy,
1699 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1700         .show = a5xx_show,
1701 #endif
1702 #if defined(CONFIG_DEBUG_FS)
1703         .debugfs_init = a5xx_debugfs_init,
1704 #endif
1705         .gpu_busy = a5xx_gpu_busy,
1706         .gpu_state_get = a5xx_gpu_state_get,
1707         .gpu_state_put = a5xx_gpu_state_put,
1708         .create_address_space = adreno_iommu_create_address_space,
1709         .get_rptr = a5xx_get_rptr,
1710     },
1711     .get_timestamp = a5xx_get_timestamp,
1712 };
1713 
1714 static void check_speed_bin(struct device *dev)
1715 {
1716     struct nvmem_cell *cell;
1717     u32 val;
1718 
1719     /*
1720      * If the OPP table specifies a opp-supported-hw property then we have
1721      * to set something with dev_pm_opp_set_supported_hw() or the table
1722      * doesn't get populated so pick an arbitrary value that should
1723      * ensure the default frequencies are selected but not conflict with any
1724      * actual bins
1725      */
1726     val = 0x80;
1727 
1728     cell = nvmem_cell_get(dev, "speed_bin");
1729 
1730     if (!IS_ERR(cell)) {
1731         void *buf = nvmem_cell_read(cell, NULL);
1732 
1733         if (!IS_ERR(buf)) {
1734             u8 bin = *((u8 *) buf);
1735 
1736             val = (1 << bin);
1737             kfree(buf);
1738         }
1739 
1740         nvmem_cell_put(cell);
1741     }
1742 
1743     devm_pm_opp_set_supported_hw(dev, &val, 1);
1744 }
1745 
1746 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1747 {
1748     struct msm_drm_private *priv = dev->dev_private;
1749     struct platform_device *pdev = priv->gpu_pdev;
1750     struct a5xx_gpu *a5xx_gpu = NULL;
1751     struct adreno_gpu *adreno_gpu;
1752     struct msm_gpu *gpu;
1753     int ret;
1754 
1755     if (!pdev) {
1756         DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1757         return ERR_PTR(-ENXIO);
1758     }
1759 
1760     a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1761     if (!a5xx_gpu)
1762         return ERR_PTR(-ENOMEM);
1763 
1764     adreno_gpu = &a5xx_gpu->base;
1765     gpu = &adreno_gpu->base;
1766 
1767     adreno_gpu->registers = a5xx_registers;
1768 
1769     a5xx_gpu->lm_leakage = 0x4E001A;
1770 
1771     check_speed_bin(&pdev->dev);
1772 
1773     ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1774     if (ret) {
1775         a5xx_destroy(&(a5xx_gpu->base.base));
1776         return ERR_PTR(ret);
1777     }
1778 
1779     if (gpu->aspace)
1780         msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1781 
1782     /* Set up the preemption specific bits and pieces for each ringbuffer */
1783     a5xx_preempt_init(gpu);
1784 
1785     return gpu;
1786 }