Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2021 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  */
0023 #include <linux/delay.h>
0024 #include <linux/kernel.h>
0025 #include <linux/firmware.h>
0026 #include <linux/module.h>
0027 #include <linux/pci.h>
0028 #include "amdgpu.h"
0029 #include "amdgpu_gfx.h"
0030 #include "amdgpu_psp.h"
0031 #include "amdgpu_smu.h"
0032 #include "amdgpu_atomfirmware.h"
0033 #include "imu_v11_0.h"
0034 #include "soc21.h"
0035 #include "nvd.h"
0036 
0037 #include "gc/gc_11_0_0_offset.h"
0038 #include "gc/gc_11_0_0_sh_mask.h"
0039 #include "smuio/smuio_13_0_6_offset.h"
0040 #include "smuio/smuio_13_0_6_sh_mask.h"
0041 #include "navi10_enum.h"
0042 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
0043 
0044 #include "soc15.h"
0045 #include "soc15d.h"
0046 #include "clearstate_gfx11.h"
0047 #include "v11_structs.h"
0048 #include "gfx_v11_0.h"
0049 #include "nbio_v4_3.h"
0050 #include "mes_v11_0.h"
0051 
0052 #define GFX11_NUM_GFX_RINGS     1
0053 #define GFX11_MEC_HPD_SIZE  2048
0054 
0055 #define RLCG_UCODE_LOADING_START_ADDRESS    0x00002000L
0056 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1    0x1388
0057 
0058 #define regCGTT_WD_CLK_CTRL     0x5086
0059 #define regCGTT_WD_CLK_CTRL_BASE_IDX    1
0060 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1   0x4e7e
0061 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX  1
0062 
0063 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
0064 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
0065 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
0066 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
0067 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
0068 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
0069 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
0070 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
0071 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
0072 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
0073 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
0074 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
0075 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
0076 
0077 static const struct soc15_reg_golden golden_settings_gc_11_0[] =
0078 {
0079     /* Pending on emulation bring up */
0080 };
0081 
0082 static const struct soc15_reg_golden golden_settings_gc_11_0_0[] =
0083 {
0084     /* Pending on emulation bring up */
0085 };
0086 
0087 static const struct soc15_reg_golden golden_settings_gc_rlc_spm_11_0[] =
0088 {
0089     /* Pending on emulation bring up */
0090 };
0091 
0092 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
0093 {
0094     SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
0095     SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
0096     SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
0097     SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
0098     SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
0099     SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
0100     SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
0101     SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
0102     SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
0103 };
0104 
0105 #define DEFAULT_SH_MEM_CONFIG \
0106     ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
0107      (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
0108      (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
0109 
0110 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
0111 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
0112 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
0113 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
0114 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
0115 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
0116 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
0117 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
0118                                  struct amdgpu_cu_info *cu_info);
0119 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
0120 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
0121                    u32 sh_num, u32 instance);
0122 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
0123 
0124 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
0125 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
0126 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
0127                      uint32_t val);
0128 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
0129 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
0130                        uint16_t pasid, uint32_t flush_type,
0131                        bool all_hub, uint8_t dst_sel);
0132 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
0133 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
0134 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
0135                       bool enable);
0136 
0137 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
0138 {
0139     amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
0140     amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
0141               PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
0142     amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
0143     amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
0144     amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
0145     amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
0146     amdgpu_ring_write(kiq_ring, 0); /* oac mask */
0147     amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
0148 }
0149 
0150 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
0151                  struct amdgpu_ring *ring)
0152 {
0153     uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
0154     uint64_t wptr_addr = ring->wptr_gpu_addr;
0155     uint32_t me = 0, eng_sel = 0;
0156 
0157     switch (ring->funcs->type) {
0158     case AMDGPU_RING_TYPE_COMPUTE:
0159         me = 1;
0160         eng_sel = 0;
0161         break;
0162     case AMDGPU_RING_TYPE_GFX:
0163         me = 0;
0164         eng_sel = 4;
0165         break;
0166     case AMDGPU_RING_TYPE_MES:
0167         me = 2;
0168         eng_sel = 5;
0169         break;
0170     default:
0171         WARN_ON(1);
0172     }
0173 
0174     amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
0175     /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
0176     amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
0177               PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
0178               PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
0179               PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
0180               PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
0181               PACKET3_MAP_QUEUES_ME((me)) |
0182               PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
0183               PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
0184               PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
0185               PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
0186     amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
0187     amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
0188     amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
0189     amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
0190     amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
0191 }
0192 
0193 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
0194                    struct amdgpu_ring *ring,
0195                    enum amdgpu_unmap_queues_action action,
0196                    u64 gpu_addr, u64 seq)
0197 {
0198     struct amdgpu_device *adev = kiq_ring->adev;
0199     uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
0200 
0201     if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
0202         amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
0203         return;
0204     }
0205 
0206     amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
0207     amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
0208               PACKET3_UNMAP_QUEUES_ACTION(action) |
0209               PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
0210               PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
0211               PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
0212     amdgpu_ring_write(kiq_ring,
0213           PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
0214 
0215     if (action == PREEMPT_QUEUES_NO_UNMAP) {
0216         amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
0217         amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
0218         amdgpu_ring_write(kiq_ring, seq);
0219     } else {
0220         amdgpu_ring_write(kiq_ring, 0);
0221         amdgpu_ring_write(kiq_ring, 0);
0222         amdgpu_ring_write(kiq_ring, 0);
0223     }
0224 }
0225 
0226 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
0227                    struct amdgpu_ring *ring,
0228                    u64 addr,
0229                    u64 seq)
0230 {
0231     uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
0232 
0233     amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
0234     amdgpu_ring_write(kiq_ring,
0235               PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
0236               PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
0237               PACKET3_QUERY_STATUS_COMMAND(2));
0238     amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
0239               PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
0240               PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
0241     amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
0242     amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
0243     amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
0244     amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
0245 }
0246 
0247 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
0248                 uint16_t pasid, uint32_t flush_type,
0249                 bool all_hub)
0250 {
0251     gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
0252 }
0253 
0254 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
0255     .kiq_set_resources = gfx11_kiq_set_resources,
0256     .kiq_map_queues = gfx11_kiq_map_queues,
0257     .kiq_unmap_queues = gfx11_kiq_unmap_queues,
0258     .kiq_query_status = gfx11_kiq_query_status,
0259     .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
0260     .set_resources_size = 8,
0261     .map_queues_size = 7,
0262     .unmap_queues_size = 6,
0263     .query_status_size = 7,
0264     .invalidate_tlbs_size = 2,
0265 };
0266 
0267 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
0268 {
0269     adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs;
0270 }
0271 
0272 static void gfx_v11_0_init_spm_golden_registers(struct amdgpu_device *adev)
0273 {
0274     switch (adev->ip_versions[GC_HWIP][0]) {
0275     case IP_VERSION(11, 0, 0):
0276         soc15_program_register_sequence(adev,
0277                         golden_settings_gc_rlc_spm_11_0,
0278                         (const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_11_0));
0279         break;
0280     default:
0281         break;
0282     }
0283 }
0284 
0285 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
0286 {
0287     switch (adev->ip_versions[GC_HWIP][0]) {
0288     case IP_VERSION(11, 0, 0):
0289         soc15_program_register_sequence(adev,
0290                         golden_settings_gc_11_0,
0291                         (const u32)ARRAY_SIZE(golden_settings_gc_11_0));
0292         soc15_program_register_sequence(adev,
0293                         golden_settings_gc_11_0_0,
0294                         (const u32)ARRAY_SIZE(golden_settings_gc_11_0_0));
0295         break;
0296     case IP_VERSION(11, 0, 1):
0297         soc15_program_register_sequence(adev,
0298                         golden_settings_gc_11_0,
0299                         (const u32)ARRAY_SIZE(golden_settings_gc_11_0));
0300         soc15_program_register_sequence(adev,
0301                         golden_settings_gc_11_0_1,
0302                         (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
0303         break;
0304     default:
0305         break;
0306     }
0307     gfx_v11_0_init_spm_golden_registers(adev);
0308 }
0309 
0310 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
0311                        bool wc, uint32_t reg, uint32_t val)
0312 {
0313     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
0314     amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
0315               WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
0316     amdgpu_ring_write(ring, reg);
0317     amdgpu_ring_write(ring, 0);
0318     amdgpu_ring_write(ring, val);
0319 }
0320 
0321 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
0322                   int mem_space, int opt, uint32_t addr0,
0323                   uint32_t addr1, uint32_t ref, uint32_t mask,
0324                   uint32_t inv)
0325 {
0326     amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
0327     amdgpu_ring_write(ring,
0328               /* memory (1) or register (0) */
0329               (WAIT_REG_MEM_MEM_SPACE(mem_space) |
0330                WAIT_REG_MEM_OPERATION(opt) | /* wait */
0331                WAIT_REG_MEM_FUNCTION(3) |  /* equal */
0332                WAIT_REG_MEM_ENGINE(eng_sel)));
0333 
0334     if (mem_space)
0335         BUG_ON(addr0 & 0x3); /* Dword align */
0336     amdgpu_ring_write(ring, addr0);
0337     amdgpu_ring_write(ring, addr1);
0338     amdgpu_ring_write(ring, ref);
0339     amdgpu_ring_write(ring, mask);
0340     amdgpu_ring_write(ring, inv); /* poll interval */
0341 }
0342 
0343 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
0344 {
0345     struct amdgpu_device *adev = ring->adev;
0346     uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
0347     uint32_t tmp = 0;
0348     unsigned i;
0349     int r;
0350 
0351     WREG32(scratch, 0xCAFEDEAD);
0352     r = amdgpu_ring_alloc(ring, 5);
0353     if (r) {
0354         DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
0355               ring->idx, r);
0356         return r;
0357     }
0358 
0359     if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
0360         gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
0361     } else {
0362         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
0363         amdgpu_ring_write(ring, scratch -
0364                   PACKET3_SET_UCONFIG_REG_START);
0365         amdgpu_ring_write(ring, 0xDEADBEEF);
0366     }
0367     amdgpu_ring_commit(ring);
0368 
0369     for (i = 0; i < adev->usec_timeout; i++) {
0370         tmp = RREG32(scratch);
0371         if (tmp == 0xDEADBEEF)
0372             break;
0373         if (amdgpu_emu_mode == 1)
0374             msleep(1);
0375         else
0376             udelay(1);
0377     }
0378 
0379     if (i >= adev->usec_timeout)
0380         r = -ETIMEDOUT;
0381     return r;
0382 }
0383 
0384 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
0385 {
0386     struct amdgpu_device *adev = ring->adev;
0387     struct amdgpu_ib ib;
0388     struct dma_fence *f = NULL;
0389     unsigned index;
0390     uint64_t gpu_addr;
0391     volatile uint32_t *cpu_ptr;
0392     long r;
0393 
0394     /* MES KIQ fw hasn't indirect buffer support for now */
0395     if (adev->enable_mes_kiq &&
0396         ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
0397         return 0;
0398 
0399     memset(&ib, 0, sizeof(ib));
0400 
0401     if (ring->is_mes_queue) {
0402         uint32_t padding, offset;
0403 
0404         offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
0405         padding = amdgpu_mes_ctx_get_offs(ring,
0406                           AMDGPU_MES_CTX_PADDING_OFFS);
0407 
0408         ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
0409         ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
0410 
0411         gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
0412         cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
0413         *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
0414     } else {
0415         r = amdgpu_device_wb_get(adev, &index);
0416         if (r)
0417             return r;
0418 
0419         gpu_addr = adev->wb.gpu_addr + (index * 4);
0420         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
0421         cpu_ptr = &adev->wb.wb[index];
0422 
0423         r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
0424         if (r) {
0425             DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
0426             goto err1;
0427         }
0428     }
0429 
0430     ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
0431     ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
0432     ib.ptr[2] = lower_32_bits(gpu_addr);
0433     ib.ptr[3] = upper_32_bits(gpu_addr);
0434     ib.ptr[4] = 0xDEADBEEF;
0435     ib.length_dw = 5;
0436 
0437     r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
0438     if (r)
0439         goto err2;
0440 
0441     r = dma_fence_wait_timeout(f, false, timeout);
0442     if (r == 0) {
0443         r = -ETIMEDOUT;
0444         goto err2;
0445     } else if (r < 0) {
0446         goto err2;
0447     }
0448 
0449     if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
0450         r = 0;
0451     else
0452         r = -EINVAL;
0453 err2:
0454     if (!ring->is_mes_queue)
0455         amdgpu_ib_free(adev, &ib, NULL);
0456     dma_fence_put(f);
0457 err1:
0458     if (!ring->is_mes_queue)
0459         amdgpu_device_wb_free(adev, index);
0460     return r;
0461 }
0462 
0463 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
0464 {
0465     release_firmware(adev->gfx.pfp_fw);
0466     adev->gfx.pfp_fw = NULL;
0467     release_firmware(adev->gfx.me_fw);
0468     adev->gfx.me_fw = NULL;
0469     release_firmware(adev->gfx.rlc_fw);
0470     adev->gfx.rlc_fw = NULL;
0471     release_firmware(adev->gfx.mec_fw);
0472     adev->gfx.mec_fw = NULL;
0473 
0474     kfree(adev->gfx.rlc.register_list_format);
0475 }
0476 
0477 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
0478 {
0479     char fw_name[40];
0480     char ucode_prefix[30];
0481     int err;
0482     struct amdgpu_firmware_info *info = NULL;
0483     const struct common_firmware_header *header = NULL;
0484     const struct gfx_firmware_header_v1_0 *cp_hdr;
0485     const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
0486     const struct rlc_firmware_header_v2_0 *rlc_hdr;
0487     uint16_t version_major;
0488     uint16_t version_minor;
0489 
0490     DRM_DEBUG("\n");
0491 
0492     amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
0493 
0494     snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
0495     err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
0496     if (err)
0497         goto out;
0498     err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
0499     if (err)
0500         goto out;
0501     /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
0502     adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
0503                 (union amdgpu_firmware_header *)
0504                 adev->gfx.pfp_fw->data, 2, 0);
0505     if (adev->gfx.rs64_enable) {
0506         dev_info(adev->dev, "CP RS64 enable\n");
0507         cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data;
0508         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
0509         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
0510         
0511     } else {
0512         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
0513         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
0514         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
0515     }
0516 
0517     snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
0518     err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
0519     if (err)
0520         goto out;
0521     err = amdgpu_ucode_validate(adev->gfx.me_fw);
0522     if (err)
0523         goto out;
0524     if (adev->gfx.rs64_enable) {
0525         cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data;
0526         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
0527         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
0528         
0529     } else {
0530         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
0531         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
0532         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
0533     }
0534 
0535     if (!amdgpu_sriov_vf(adev)) {
0536         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
0537         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
0538         if (err)
0539             goto out;
0540         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
0541         if (err)
0542             goto out;
0543         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
0544         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
0545         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
0546         err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
0547         if (err)
0548             goto out;
0549     }
0550 
0551     snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
0552     err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
0553     if (err)
0554         goto out;
0555     err = amdgpu_ucode_validate(adev->gfx.mec_fw);
0556     if (err)
0557         goto out;
0558     if (adev->gfx.rs64_enable) {
0559         cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
0560         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
0561         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
0562         
0563     } else {
0564         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
0565         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
0566         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
0567     }
0568 
0569     /* only one MEC for gfx 11.0.0. */
0570     adev->gfx.mec2_fw = NULL;
0571 
0572     if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
0573         if (adev->gfx.rs64_enable) {
0574             cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data;
0575             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP];
0576             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP;
0577             info->fw = adev->gfx.pfp_fw;
0578             header = (const struct common_firmware_header *)info->fw->data;
0579             adev->firmware.fw_size +=
0580                 ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
0581 
0582             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK];
0583             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK;
0584             info->fw = adev->gfx.pfp_fw;
0585             header = (const struct common_firmware_header *)info->fw->data;
0586             adev->firmware.fw_size +=
0587                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
0588 
0589             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK];
0590             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK;
0591             info->fw = adev->gfx.pfp_fw;
0592             header = (const struct common_firmware_header *)info->fw->data;
0593             adev->firmware.fw_size +=
0594                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
0595 
0596             cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data;
0597             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME];
0598             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME;
0599             info->fw = adev->gfx.me_fw;
0600             header = (const struct common_firmware_header *)info->fw->data;
0601             adev->firmware.fw_size +=
0602                 ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
0603 
0604             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK];
0605             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK;
0606             info->fw = adev->gfx.me_fw;
0607             header = (const struct common_firmware_header *)info->fw->data;
0608             adev->firmware.fw_size +=
0609                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
0610 
0611             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK];
0612             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK;
0613             info->fw = adev->gfx.me_fw;
0614             header = (const struct common_firmware_header *)info->fw->data;
0615             adev->firmware.fw_size +=
0616                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
0617 
0618             cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
0619             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC];
0620             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC;
0621             info->fw = adev->gfx.mec_fw;
0622             header = (const struct common_firmware_header *)info->fw->data;
0623             adev->firmware.fw_size +=
0624                 ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
0625 
0626             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK];
0627             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK;
0628             info->fw = adev->gfx.mec_fw;
0629             header = (const struct common_firmware_header *)info->fw->data;
0630             adev->firmware.fw_size +=
0631                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
0632 
0633             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK];
0634             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK;
0635             info->fw = adev->gfx.mec_fw;
0636             header = (const struct common_firmware_header *)info->fw->data;
0637             adev->firmware.fw_size +=
0638                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
0639 
0640             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK];
0641             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK;
0642             info->fw = adev->gfx.mec_fw;
0643             header = (const struct common_firmware_header *)info->fw->data;
0644             adev->firmware.fw_size +=
0645                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
0646 
0647             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK];
0648             info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK;
0649             info->fw = adev->gfx.mec_fw;
0650             header = (const struct common_firmware_header *)info->fw->data;
0651             adev->firmware.fw_size +=
0652                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
0653         } else {
0654             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
0655             info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
0656             info->fw = adev->gfx.pfp_fw;
0657             header = (const struct common_firmware_header *)info->fw->data;
0658             adev->firmware.fw_size +=
0659                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
0660 
0661             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
0662             info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
0663             info->fw = adev->gfx.me_fw;
0664             header = (const struct common_firmware_header *)info->fw->data;
0665             adev->firmware.fw_size +=
0666                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
0667 
0668             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
0669             info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
0670             info->fw = adev->gfx.mec_fw;
0671             header = (const struct common_firmware_header *)info->fw->data;
0672             cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
0673             adev->firmware.fw_size +=
0674                 ALIGN(le32_to_cpu(header->ucode_size_bytes) -
0675                       le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
0676 
0677             info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
0678             info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
0679             info->fw = adev->gfx.mec_fw;
0680             adev->firmware.fw_size +=
0681                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
0682         }
0683     }
0684 
0685 out:
0686     if (err) {
0687         dev_err(adev->dev,
0688             "gfx11: Failed to load firmware \"%s\"\n",
0689             fw_name);
0690         release_firmware(adev->gfx.pfp_fw);
0691         adev->gfx.pfp_fw = NULL;
0692         release_firmware(adev->gfx.me_fw);
0693         adev->gfx.me_fw = NULL;
0694         release_firmware(adev->gfx.rlc_fw);
0695         adev->gfx.rlc_fw = NULL;
0696         release_firmware(adev->gfx.mec_fw);
0697         adev->gfx.mec_fw = NULL;
0698     }
0699 
0700     return err;
0701 }
0702 
0703 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev)
0704 {
0705     const struct psp_firmware_header_v1_0 *toc_hdr;
0706     int err = 0;
0707     char fw_name[40];
0708     char ucode_prefix[30];
0709 
0710     amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
0711 
0712     snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
0713     err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
0714     if (err)
0715         goto out;
0716 
0717     err = amdgpu_ucode_validate(adev->psp.toc_fw);
0718     if (err)
0719         goto out;
0720 
0721     toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
0722     adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
0723     adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
0724     adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
0725     adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
0726                 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
0727     return 0;
0728 out:
0729     dev_err(adev->dev, "Failed to load TOC microcode\n");
0730     release_firmware(adev->psp.toc_fw);
0731     adev->psp.toc_fw = NULL;
0732     return err;
0733 }
0734 
0735 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
0736 {
0737     u32 count = 0;
0738     const struct cs_section_def *sect = NULL;
0739     const struct cs_extent_def *ext = NULL;
0740 
0741     /* begin clear state */
0742     count += 2;
0743     /* context control state */
0744     count += 3;
0745 
0746     for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
0747         for (ext = sect->section; ext->extent != NULL; ++ext) {
0748             if (sect->id == SECT_CONTEXT)
0749                 count += 2 + ext->reg_count;
0750             else
0751                 return 0;
0752         }
0753     }
0754 
0755     /* set PA_SC_TILE_STEERING_OVERRIDE */
0756     count += 3;
0757     /* end clear state */
0758     count += 2;
0759     /* clear state */
0760     count += 2;
0761 
0762     return count;
0763 }
0764 
0765 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
0766                     volatile u32 *buffer)
0767 {
0768     u32 count = 0, i;
0769     const struct cs_section_def *sect = NULL;
0770     const struct cs_extent_def *ext = NULL;
0771     int ctx_reg_offset;
0772 
0773     if (adev->gfx.rlc.cs_data == NULL)
0774         return;
0775     if (buffer == NULL)
0776         return;
0777 
0778     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
0779     buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
0780 
0781     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
0782     buffer[count++] = cpu_to_le32(0x80000000);
0783     buffer[count++] = cpu_to_le32(0x80000000);
0784 
0785     for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
0786         for (ext = sect->section; ext->extent != NULL; ++ext) {
0787             if (sect->id == SECT_CONTEXT) {
0788                 buffer[count++] =
0789                     cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
0790                 buffer[count++] = cpu_to_le32(ext->reg_index -
0791                         PACKET3_SET_CONTEXT_REG_START);
0792                 for (i = 0; i < ext->reg_count; i++)
0793                     buffer[count++] = cpu_to_le32(ext->extent[i]);
0794             } else {
0795                 return;
0796             }
0797         }
0798     }
0799 
0800     ctx_reg_offset =
0801         SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
0802     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
0803     buffer[count++] = cpu_to_le32(ctx_reg_offset);
0804     buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
0805 
0806     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
0807     buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
0808 
0809     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
0810     buffer[count++] = cpu_to_le32(0);
0811 }
0812 
0813 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
0814 {
0815     /* clear state block */
0816     amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
0817             &adev->gfx.rlc.clear_state_gpu_addr,
0818             (void **)&adev->gfx.rlc.cs_ptr);
0819 
0820     /* jump table block */
0821     amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
0822             &adev->gfx.rlc.cp_table_gpu_addr,
0823             (void **)&adev->gfx.rlc.cp_table_ptr);
0824 }
0825 
0826 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
0827 {
0828     struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
0829 
0830     reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
0831     reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
0832     reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
0833     reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
0834     reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
0835     reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
0836     reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
0837     reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
0838     adev->gfx.rlc.rlcg_reg_access_supported = true;
0839 }
0840 
0841 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
0842 {
0843     const struct cs_section_def *cs_data;
0844     int r;
0845 
0846     adev->gfx.rlc.cs_data = gfx11_cs_data;
0847 
0848     cs_data = adev->gfx.rlc.cs_data;
0849 
0850     if (cs_data) {
0851         /* init clear state block */
0852         r = amdgpu_gfx_rlc_init_csb(adev);
0853         if (r)
0854             return r;
0855     }
0856 
0857     /* init spm vmid with 0xf */
0858     if (adev->gfx.rlc.funcs->update_spm_vmid)
0859         adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
0860 
0861     return 0;
0862 }
0863 
0864 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
0865 {
0866     amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
0867     amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
0868     amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
0869 }
0870 
0871 static int gfx_v11_0_me_init(struct amdgpu_device *adev)
0872 {
0873     int r;
0874 
0875     bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
0876 
0877     amdgpu_gfx_graphics_queue_acquire(adev);
0878 
0879     r = gfx_v11_0_init_microcode(adev);
0880     if (r)
0881         DRM_ERROR("Failed to load gfx firmware!\n");
0882 
0883     return r;
0884 }
0885 
0886 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
0887 {
0888     int r;
0889     u32 *hpd;
0890     size_t mec_hpd_size;
0891 
0892     bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
0893 
0894     /* take ownership of the relevant compute queues */
0895     amdgpu_gfx_compute_queue_acquire(adev);
0896     mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
0897 
0898     if (mec_hpd_size) {
0899         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
0900                           AMDGPU_GEM_DOMAIN_GTT,
0901                           &adev->gfx.mec.hpd_eop_obj,
0902                           &adev->gfx.mec.hpd_eop_gpu_addr,
0903                           (void **)&hpd);
0904         if (r) {
0905             dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
0906             gfx_v11_0_mec_fini(adev);
0907             return r;
0908         }
0909 
0910         memset(hpd, 0, mec_hpd_size);
0911 
0912         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
0913         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
0914     }
0915 
0916     return 0;
0917 }
0918 
0919 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
0920 {
0921     WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
0922         (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
0923         (address << SQ_IND_INDEX__INDEX__SHIFT));
0924     return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
0925 }
0926 
0927 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
0928                uint32_t thread, uint32_t regno,
0929                uint32_t num, uint32_t *out)
0930 {
0931     WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
0932         (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
0933         (regno << SQ_IND_INDEX__INDEX__SHIFT) |
0934         (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
0935         (SQ_IND_INDEX__AUTO_INCR_MASK));
0936     while (num--)
0937         *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
0938 }
0939 
0940 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
0941 {
0942     /* in gfx11 the SIMD_ID is specified as part of the INSTANCE
0943      * field when performing a select_se_sh so it should be
0944      * zero here */
0945     WARN_ON(simd != 0);
0946 
0947     /* type 2 wave data */
0948     dst[(*no_fields)++] = 2;
0949     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
0950     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
0951     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
0952     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
0953     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
0954     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
0955     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
0956     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
0957     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
0958     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
0959     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
0960     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
0961     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
0962     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
0963     dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
0964 }
0965 
0966 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
0967                      uint32_t wave, uint32_t start,
0968                      uint32_t size, uint32_t *dst)
0969 {
0970     WARN_ON(simd != 0);
0971 
0972     wave_read_regs(
0973         adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
0974         dst);
0975 }
0976 
0977 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
0978                       uint32_t wave, uint32_t thread,
0979                       uint32_t start, uint32_t size,
0980                       uint32_t *dst)
0981 {
0982     wave_read_regs(
0983         adev, wave, thread,
0984         start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
0985 }
0986 
0987 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
0988                                       u32 me, u32 pipe, u32 q, u32 vm)
0989 {
0990     soc21_grbm_select(adev, me, pipe, q, vm);
0991 }
0992 
0993 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
0994     .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
0995     .select_se_sh = &gfx_v11_0_select_se_sh,
0996     .read_wave_data = &gfx_v11_0_read_wave_data,
0997     .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
0998     .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
0999     .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
1000     .init_spm_golden = &gfx_v11_0_init_spm_golden_registers,
1001     .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
1002 };
1003 
1004 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
1005 {
1006     adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
1007 
1008     switch (adev->ip_versions[GC_HWIP][0]) {
1009     case IP_VERSION(11, 0, 0):
1010     case IP_VERSION(11, 0, 2):
1011         adev->gfx.config.max_hw_contexts = 8;
1012         adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1013         adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1014         adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1015         adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1016         break;
1017     case IP_VERSION(11, 0, 1):
1018         adev->gfx.config.max_hw_contexts = 8;
1019         adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1020         adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1021         adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1022         adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
1023         break;
1024     default:
1025         BUG();
1026         break;
1027     }
1028 
1029     return 0;
1030 }
1031 
1032 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
1033                    int me, int pipe, int queue)
1034 {
1035     int r;
1036     struct amdgpu_ring *ring;
1037     unsigned int irq_type;
1038 
1039     ring = &adev->gfx.gfx_ring[ring_id];
1040 
1041     ring->me = me;
1042     ring->pipe = pipe;
1043     ring->queue = queue;
1044 
1045     ring->ring_obj = NULL;
1046     ring->use_doorbell = true;
1047 
1048     if (!ring_id)
1049         ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1050     else
1051         ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
1052     sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1053 
1054     irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
1055     r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1056                  AMDGPU_RING_PRIO_DEFAULT, NULL);
1057     if (r)
1058         return r;
1059     return 0;
1060 }
1061 
1062 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1063                        int mec, int pipe, int queue)
1064 {
1065     int r;
1066     unsigned irq_type;
1067     struct amdgpu_ring *ring;
1068     unsigned int hw_prio;
1069 
1070     ring = &adev->gfx.compute_ring[ring_id];
1071 
1072     /* mec0 is me1 */
1073     ring->me = mec + 1;
1074     ring->pipe = pipe;
1075     ring->queue = queue;
1076 
1077     ring->ring_obj = NULL;
1078     ring->use_doorbell = true;
1079     ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1080     ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1081                 + (ring_id * GFX11_MEC_HPD_SIZE);
1082     sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1083 
1084     irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1085         + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1086         + ring->pipe;
1087     hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1088             AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1089     /* type-2 packets are deprecated on MEC, use type-3 instead */
1090     r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1091                  hw_prio, NULL);
1092     if (r)
1093         return r;
1094 
1095     return 0;
1096 }
1097 
1098 static struct {
1099     SOC21_FIRMWARE_ID   id;
1100     unsigned int        offset;
1101     unsigned int        size;
1102 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
1103 
1104 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
1105 {
1106     RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
1107 
1108     while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
1109             (ucode->id < SOC21_FIRMWARE_ID_MAX)) {
1110         rlc_autoload_info[ucode->id].id = ucode->id;
1111         rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
1112         rlc_autoload_info[ucode->id].size = ucode->size * 4;
1113 
1114         ucode++;
1115     }
1116 }
1117 
1118 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
1119 {
1120     uint32_t total_size = 0;
1121     SOC21_FIRMWARE_ID id;
1122 
1123     gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
1124 
1125     for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
1126         total_size += rlc_autoload_info[id].size;
1127 
1128     /* In case the offset in rlc toc ucode is aligned */
1129     if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
1130         total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
1131             rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
1132 
1133     return total_size;
1134 }
1135 
1136 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1137 {
1138     int r;
1139     uint32_t total_size;
1140 
1141     total_size = gfx_v11_0_calc_toc_total_size(adev);
1142 
1143     r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1144             AMDGPU_GEM_DOMAIN_VRAM,
1145             &adev->gfx.rlc.rlc_autoload_bo,
1146             &adev->gfx.rlc.rlc_autoload_gpu_addr,
1147             (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1148 
1149     if (r) {
1150         dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1151         return r;
1152     }
1153 
1154     return 0;
1155 }
1156 
1157 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1158                           SOC21_FIRMWARE_ID id,
1159                               const void *fw_data,
1160                           uint32_t fw_size,
1161                           uint32_t *fw_autoload_mask)
1162 {
1163     uint32_t toc_offset;
1164     uint32_t toc_fw_size;
1165     char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1166 
1167     if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1168         return;
1169 
1170     toc_offset = rlc_autoload_info[id].offset;
1171     toc_fw_size = rlc_autoload_info[id].size;
1172 
1173     if (fw_size == 0)
1174         fw_size = toc_fw_size;
1175 
1176     if (fw_size > toc_fw_size)
1177         fw_size = toc_fw_size;
1178 
1179     memcpy(ptr + toc_offset, fw_data, fw_size);
1180 
1181     if (fw_size < toc_fw_size)
1182         memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1183 
1184     if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1185         *(uint64_t *)fw_autoload_mask |= 1ULL << id;
1186 }
1187 
1188 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1189                             uint32_t *fw_autoload_mask)
1190 {
1191     void *data;
1192     uint32_t size;
1193     uint64_t *toc_ptr;
1194 
1195     *(uint64_t *)fw_autoload_mask |= 0x1;
1196 
1197     DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1198 
1199     data = adev->psp.toc.start_addr;
1200     size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1201 
1202     toc_ptr = (uint64_t *)data + size / 8 - 1;
1203     *toc_ptr = *(uint64_t *)fw_autoload_mask;
1204 
1205     gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1206                     data, size, fw_autoload_mask);
1207 }
1208 
1209 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1210                             uint32_t *fw_autoload_mask)
1211 {
1212     const __le32 *fw_data;
1213     uint32_t fw_size;
1214     const struct gfx_firmware_header_v1_0 *cp_hdr;
1215     const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1216     const struct rlc_firmware_header_v2_0 *rlc_hdr;
1217     const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1218     uint16_t version_major, version_minor;
1219 
1220     if (adev->gfx.rs64_enable) {
1221         /* pfp ucode */
1222         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1223             adev->gfx.pfp_fw->data;
1224         /* instruction */
1225         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1226             le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1227         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1228         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1229                         fw_data, fw_size, fw_autoload_mask);
1230         /* data */
1231         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1232             le32_to_cpu(cpv2_hdr->data_offset_bytes));
1233         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1234         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1235                         fw_data, fw_size, fw_autoload_mask);
1236         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1237                         fw_data, fw_size, fw_autoload_mask);
1238         /* me ucode */
1239         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1240             adev->gfx.me_fw->data;
1241         /* instruction */
1242         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1243             le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1244         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1245         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1246                         fw_data, fw_size, fw_autoload_mask);
1247         /* data */
1248         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1249             le32_to_cpu(cpv2_hdr->data_offset_bytes));
1250         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1251         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1252                         fw_data, fw_size, fw_autoload_mask);
1253         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1254                         fw_data, fw_size, fw_autoload_mask);
1255         /* mec ucode */
1256         cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1257             adev->gfx.mec_fw->data;
1258         /* instruction */
1259         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1260             le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1261         fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1262         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1263                         fw_data, fw_size, fw_autoload_mask);
1264         /* data */
1265         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1266             le32_to_cpu(cpv2_hdr->data_offset_bytes));
1267         fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1268         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1269                         fw_data, fw_size, fw_autoload_mask);
1270         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1271                         fw_data, fw_size, fw_autoload_mask);
1272         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1273                         fw_data, fw_size, fw_autoload_mask);
1274         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1275                         fw_data, fw_size, fw_autoload_mask);
1276     } else {
1277         /* pfp ucode */
1278         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1279             adev->gfx.pfp_fw->data;
1280         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1281                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1282         fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1283         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1284                         fw_data, fw_size, fw_autoload_mask);
1285 
1286         /* me ucode */
1287         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1288             adev->gfx.me_fw->data;
1289         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1290                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1291         fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1292         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1293                         fw_data, fw_size, fw_autoload_mask);
1294 
1295         /* mec ucode */
1296         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1297             adev->gfx.mec_fw->data;
1298         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1299                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1300         fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1301             cp_hdr->jt_size * 4;
1302         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1303                         fw_data, fw_size, fw_autoload_mask);
1304     }
1305 
1306     /* rlc ucode */
1307     rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1308         adev->gfx.rlc_fw->data;
1309     fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1310             le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1311     fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1312     gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1313                     fw_data, fw_size, fw_autoload_mask);
1314 
1315     version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1316     version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1317     if (version_major == 2) {
1318         if (version_minor >= 2) {
1319             rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1320 
1321             fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1322                     le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1323             fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1324             gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1325                     fw_data, fw_size, fw_autoload_mask);
1326 
1327             fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1328                     le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1329             fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1330             gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1331                     fw_data, fw_size, fw_autoload_mask);
1332         }
1333     }
1334 }
1335 
1336 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1337                             uint32_t *fw_autoload_mask)
1338 {
1339     const __le32 *fw_data;
1340     uint32_t fw_size;
1341     const struct sdma_firmware_header_v2_0 *sdma_hdr;
1342 
1343     sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1344         adev->sdma.instance[0].fw->data;
1345     fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1346             le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1347     fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1348 
1349     gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1350             SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1351 
1352     fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1353             le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1354     fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1355 
1356     gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1357             SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1358 }
1359 
1360 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1361                             uint32_t *fw_autoload_mask)
1362 {
1363     const __le32 *fw_data;
1364     unsigned fw_size;
1365     const struct mes_firmware_header_v1_0 *mes_hdr;
1366     int pipe, ucode_id, data_id;
1367 
1368     for (pipe = 0; pipe < 2; pipe++) {
1369         if (pipe==0) {
1370             ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1371             data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1372         } else {
1373             ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1374             data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1375         }
1376 
1377         mes_hdr = (const struct mes_firmware_header_v1_0 *)
1378             adev->mes.fw[pipe]->data;
1379 
1380         fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1381                 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1382         fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1383 
1384         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1385                 ucode_id, fw_data, fw_size, fw_autoload_mask);
1386 
1387         fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1388                 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1389         fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1390 
1391         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1392                 data_id, fw_data, fw_size, fw_autoload_mask);
1393     }
1394 }
1395 
1396 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1397 {
1398     uint32_t rlc_g_offset, rlc_g_size;
1399     uint64_t gpu_addr;
1400     uint32_t autoload_fw_id[2];
1401 
1402     memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1403 
1404     /* RLC autoload sequence 2: copy ucode */
1405     gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1406     gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1407     gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1408     gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1409 
1410     rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1411     rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1412     gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1413 
1414     WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1415     WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1416 
1417     WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1418 
1419     /* RLC autoload sequence 3: load IMU fw */
1420     if (adev->gfx.imu.funcs->load_microcode)
1421         adev->gfx.imu.funcs->load_microcode(adev);
1422     /* RLC autoload sequence 4 init IMU fw */
1423     if (adev->gfx.imu.funcs->setup_imu)
1424         adev->gfx.imu.funcs->setup_imu(adev);
1425     if (adev->gfx.imu.funcs->start_imu)
1426         adev->gfx.imu.funcs->start_imu(adev);
1427 
1428     /* RLC autoload sequence 5 disable gpa mode */
1429     gfx_v11_0_disable_gpa_mode(adev);
1430 
1431     return 0;
1432 }
1433 
1434 static int gfx_v11_0_sw_init(void *handle)
1435 {
1436     int i, j, k, r, ring_id = 0;
1437     struct amdgpu_kiq *kiq;
1438     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1439 
1440     adev->gfxhub.funcs->init(adev);
1441 
1442     switch (adev->ip_versions[GC_HWIP][0]) {
1443     case IP_VERSION(11, 0, 0):
1444     case IP_VERSION(11, 0, 1):
1445     case IP_VERSION(11, 0, 2):
1446         adev->gfx.me.num_me = 1;
1447         adev->gfx.me.num_pipe_per_me = 1;
1448         adev->gfx.me.num_queue_per_pipe = 1;
1449         adev->gfx.mec.num_mec = 2;
1450         adev->gfx.mec.num_pipe_per_mec = 4;
1451         adev->gfx.mec.num_queue_per_pipe = 4;
1452         break;
1453     default:
1454         adev->gfx.me.num_me = 1;
1455         adev->gfx.me.num_pipe_per_me = 1;
1456         adev->gfx.me.num_queue_per_pipe = 1;
1457         adev->gfx.mec.num_mec = 1;
1458         adev->gfx.mec.num_pipe_per_mec = 4;
1459         adev->gfx.mec.num_queue_per_pipe = 8;
1460         break;
1461     }
1462 
1463     /* EOP Event */
1464     r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1465                   GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1466                   &adev->gfx.eop_irq);
1467     if (r)
1468         return r;
1469 
1470     /* Privileged reg */
1471     r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1472                   GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1473                   &adev->gfx.priv_reg_irq);
1474     if (r)
1475         return r;
1476 
1477     /* Privileged inst */
1478     r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1479                   GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1480                   &adev->gfx.priv_inst_irq);
1481     if (r)
1482         return r;
1483 
1484     adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1485 
1486     if (adev->gfx.imu.funcs) {
1487         if (adev->gfx.imu.funcs->init_microcode) {
1488             r = adev->gfx.imu.funcs->init_microcode(adev);
1489             if (r)
1490                 DRM_ERROR("Failed to load imu firmware!\n");
1491         }
1492     }
1493 
1494     r = gfx_v11_0_me_init(adev);
1495     if (r)
1496         return r;
1497 
1498     r = gfx_v11_0_rlc_init(adev);
1499     if (r) {
1500         DRM_ERROR("Failed to init rlc BOs!\n");
1501         return r;
1502     }
1503 
1504     r = gfx_v11_0_mec_init(adev);
1505     if (r) {
1506         DRM_ERROR("Failed to init MEC BOs!\n");
1507         return r;
1508     }
1509 
1510     /* set up the gfx ring */
1511     for (i = 0; i < adev->gfx.me.num_me; i++) {
1512         for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1513             for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1514                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1515                     continue;
1516 
1517                 r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1518                                 i, k, j);
1519                 if (r)
1520                     return r;
1521                 ring_id++;
1522             }
1523         }
1524     }
1525 
1526     ring_id = 0;
1527     /* set up the compute queues - allocate horizontally across pipes */
1528     for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1529         for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1530             for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1531                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
1532                                      j))
1533                     continue;
1534 
1535                 r = gfx_v11_0_compute_ring_init(adev, ring_id,
1536                                 i, k, j);
1537                 if (r)
1538                     return r;
1539 
1540                 ring_id++;
1541             }
1542         }
1543     }
1544 
1545     if (!adev->enable_mes_kiq) {
1546         r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE);
1547         if (r) {
1548             DRM_ERROR("Failed to init KIQ BOs!\n");
1549             return r;
1550         }
1551 
1552         kiq = &adev->gfx.kiq;
1553         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1554         if (r)
1555             return r;
1556     }
1557 
1558     r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd));
1559     if (r)
1560         return r;
1561 
1562     /* allocate visible FB for rlc auto-loading fw */
1563     if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1564         r = gfx_v11_0_init_toc_microcode(adev);
1565         if (r)
1566             dev_err(adev->dev, "Failed to load toc firmware!\n");
1567         r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1568         if (r)
1569             return r;
1570     }
1571 
1572     r = gfx_v11_0_gpu_early_init(adev);
1573     if (r)
1574         return r;
1575 
1576     return 0;
1577 }
1578 
1579 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1580 {
1581     amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1582                   &adev->gfx.pfp.pfp_fw_gpu_addr,
1583                   (void **)&adev->gfx.pfp.pfp_fw_ptr);
1584 
1585     amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1586                   &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1587                   (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1588 }
1589 
1590 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1591 {
1592     amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1593                   &adev->gfx.me.me_fw_gpu_addr,
1594                   (void **)&adev->gfx.me.me_fw_ptr);
1595 
1596     amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1597                    &adev->gfx.me.me_fw_data_gpu_addr,
1598                    (void **)&adev->gfx.me.me_fw_data_ptr);
1599 }
1600 
1601 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1602 {
1603     amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1604             &adev->gfx.rlc.rlc_autoload_gpu_addr,
1605             (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1606 }
1607 
1608 static int gfx_v11_0_sw_fini(void *handle)
1609 {
1610     int i;
1611     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1612 
1613     for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1614         amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1615     for (i = 0; i < adev->gfx.num_compute_rings; i++)
1616         amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1617 
1618     amdgpu_gfx_mqd_sw_fini(adev);
1619 
1620     if (!adev->enable_mes_kiq) {
1621         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
1622         amdgpu_gfx_kiq_fini(adev);
1623     }
1624 
1625     gfx_v11_0_pfp_fini(adev);
1626     gfx_v11_0_me_fini(adev);
1627     gfx_v11_0_rlc_fini(adev);
1628     gfx_v11_0_mec_fini(adev);
1629 
1630     if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1631         gfx_v11_0_rlc_autoload_buffer_fini(adev);
1632 
1633     gfx_v11_0_free_microcode(adev);
1634 
1635     return 0;
1636 }
1637 
1638 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1639                    u32 sh_num, u32 instance)
1640 {
1641     u32 data;
1642 
1643     if (instance == 0xffffffff)
1644         data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1645                      INSTANCE_BROADCAST_WRITES, 1);
1646     else
1647         data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1648                      instance);
1649 
1650     if (se_num == 0xffffffff)
1651         data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1652                      1);
1653     else
1654         data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1655 
1656     if (sh_num == 0xffffffff)
1657         data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1658                      1);
1659     else
1660         data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1661 
1662     WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1663 }
1664 
1665 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1666 {
1667     u32 data, mask;
1668 
1669     data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1670     data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1671 
1672     data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1673     data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1674 
1675     mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1676                      adev->gfx.config.max_sh_per_se);
1677 
1678     return (~data) & mask;
1679 }
1680 
1681 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1682 {
1683     int i, j;
1684     u32 data;
1685     u32 active_rbs = 0;
1686     u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1687                     adev->gfx.config.max_sh_per_se;
1688 
1689     mutex_lock(&adev->grbm_idx_mutex);
1690     for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1691         for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1692             gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
1693             data = gfx_v11_0_get_rb_active_bitmap(adev);
1694             active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1695                            rb_bitmap_width_per_sh);
1696         }
1697     }
1698     gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1699     mutex_unlock(&adev->grbm_idx_mutex);
1700 
1701     adev->gfx.config.backend_enable_mask = active_rbs;
1702     adev->gfx.config.num_rbs = hweight32(active_rbs);
1703 }
1704 
1705 #define DEFAULT_SH_MEM_BASES    (0x6000)
1706 #define LDS_APP_BASE           0x1
1707 #define SCRATCH_APP_BASE       0x2
1708 
1709 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1710 {
1711     int i;
1712     uint32_t sh_mem_bases;
1713     uint32_t data;
1714 
1715     /*
1716      * Configure apertures:
1717      * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1718      * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1719      * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1720      */
1721     sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1722             SCRATCH_APP_BASE;
1723 
1724     mutex_lock(&adev->srbm_mutex);
1725     for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1726         soc21_grbm_select(adev, 0, 0, 0, i);
1727         /* CP and shaders */
1728         WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1729         WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1730 
1731         /* Enable trap for each kfd vmid. */
1732         data = RREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL));
1733         data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1734     }
1735     soc21_grbm_select(adev, 0, 0, 0, 0);
1736     mutex_unlock(&adev->srbm_mutex);
1737 
1738     /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1739        acccess. These should be enabled by FW for target VMIDs. */
1740     for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1741         WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1742         WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1743         WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1744         WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1745     }
1746 }
1747 
1748 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1749 {
1750     int vmid;
1751 
1752     /*
1753      * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1754      * access. Compute VMIDs should be enabled by FW for target VMIDs,
1755      * the driver can enable them for graphics. VMID0 should maintain
1756      * access so that HWS firmware can save/restore entries.
1757      */
1758     for (vmid = 1; vmid < 16; vmid++) {
1759         WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1760         WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1761         WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1762         WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1763     }
1764 }
1765 
1766 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1767 {
1768     /* TODO: harvest feature to be added later. */
1769 }
1770 
1771 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1772 {
1773     /* TCCs are global (not instanced). */
1774     uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1775                    RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1776 
1777     adev->gfx.config.tcc_disabled_mask =
1778         REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
1779         (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
1780 }
1781 
1782 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
1783 {
1784     u32 tmp;
1785     int i;
1786 
1787     WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1788 
1789     gfx_v11_0_setup_rb(adev);
1790     gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
1791     gfx_v11_0_get_tcc_info(adev);
1792     adev->gfx.config.pa_sc_tile_steering_override = 0;
1793 
1794     /* XXX SH_MEM regs */
1795     /* where to put LDS, scratch, GPUVM in FSA64 space */
1796     mutex_lock(&adev->srbm_mutex);
1797     for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
1798         soc21_grbm_select(adev, 0, 0, 0, i);
1799         /* CP and shaders */
1800         WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1801         if (i != 0) {
1802             tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1803                 (adev->gmc.private_aperture_start >> 48));
1804             tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1805                 (adev->gmc.shared_aperture_start >> 48));
1806             WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1807         }
1808     }
1809     soc21_grbm_select(adev, 0, 0, 0, 0);
1810 
1811     mutex_unlock(&adev->srbm_mutex);
1812 
1813     gfx_v11_0_init_compute_vmid(adev);
1814     gfx_v11_0_init_gds_vmid(adev);
1815 }
1816 
1817 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1818                            bool enable)
1819 {
1820     u32 tmp;
1821 
1822     if (amdgpu_sriov_vf(adev))
1823         return;
1824 
1825     tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1826 
1827     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1828                 enable ? 1 : 0);
1829     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1830                 enable ? 1 : 0);
1831     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1832                 enable ? 1 : 0);
1833     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1834                 enable ? 1 : 0);
1835 
1836     WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1837 }
1838 
1839 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
1840 {
1841     adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1842 
1843     WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1844             adev->gfx.rlc.clear_state_gpu_addr >> 32);
1845     WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1846             adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1847     WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1848 
1849     return 0;
1850 }
1851 
1852 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
1853 {
1854     u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1855 
1856     tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1857     WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1858 }
1859 
1860 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
1861 {
1862     WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1863     udelay(50);
1864     WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1865     udelay(50);
1866 }
1867 
1868 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1869                          bool enable)
1870 {
1871     uint32_t rlc_pg_cntl;
1872 
1873     rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
1874 
1875     if (!enable) {
1876         /* RLC_PG_CNTL[23] = 0 (default)
1877          * RLC will wait for handshake acks with SMU
1878          * GFXOFF will be enabled
1879          * RLC_PG_CNTL[23] = 1
1880          * RLC will not issue any message to SMU
1881          * hence no handshake between SMU & RLC
1882          * GFXOFF will be disabled
1883          */
1884         rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1885     } else
1886         rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1887     WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
1888 }
1889 
1890 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
1891 {
1892     /* TODO: enable rlc & smu handshake until smu
1893      * and gfxoff feature works as expected */
1894     if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1895         gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
1896 
1897     WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1898     udelay(50);
1899 }
1900 
1901 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
1902 {
1903     uint32_t tmp;
1904 
1905     /* enable Save Restore Machine */
1906     tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
1907     tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1908     tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1909     WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
1910 }
1911 
1912 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
1913 {
1914     const struct rlc_firmware_header_v2_0 *hdr;
1915     const __le32 *fw_data;
1916     unsigned i, fw_size;
1917 
1918     hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1919     fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1920                le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1921     fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1922 
1923     WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
1924              RLCG_UCODE_LOADING_START_ADDRESS);
1925 
1926     for (i = 0; i < fw_size; i++)
1927         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
1928                  le32_to_cpup(fw_data++));
1929 
1930     WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1931 }
1932 
1933 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
1934 {
1935     const struct rlc_firmware_header_v2_2 *hdr;
1936     const __le32 *fw_data;
1937     unsigned i, fw_size;
1938     u32 tmp;
1939 
1940     hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1941 
1942     fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1943             le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1944     fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1945 
1946     WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
1947 
1948     for (i = 0; i < fw_size; i++) {
1949         if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1950             msleep(1);
1951         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
1952                 le32_to_cpup(fw_data++));
1953     }
1954 
1955     WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1956 
1957     fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1958             le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1959     fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1960 
1961     WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
1962     for (i = 0; i < fw_size; i++) {
1963         if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1964             msleep(1);
1965         WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
1966                 le32_to_cpup(fw_data++));
1967     }
1968 
1969     WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1970 
1971     tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
1972     tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1973     tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1974     WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
1975 }
1976 
1977 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
1978 {
1979     const struct rlc_firmware_header_v2_3 *hdr;
1980     const __le32 *fw_data;
1981     unsigned i, fw_size;
1982     u32 tmp;
1983 
1984     hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
1985 
1986     fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1987             le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
1988     fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
1989 
1990     WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
1991 
1992     for (i = 0; i < fw_size; i++) {
1993         if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1994             msleep(1);
1995         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
1996                 le32_to_cpup(fw_data++));
1997     }
1998 
1999     WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
2000 
2001     tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
2002     tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
2003     WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
2004 
2005     fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2006             le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
2007     fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
2008 
2009     WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
2010 
2011     for (i = 0; i < fw_size; i++) {
2012         if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2013             msleep(1);
2014         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
2015                 le32_to_cpup(fw_data++));
2016     }
2017 
2018     WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
2019 
2020     tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
2021     tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
2022     WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
2023 }
2024 
2025 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
2026 {
2027     const struct rlc_firmware_header_v2_0 *hdr;
2028     uint16_t version_major;
2029     uint16_t version_minor;
2030 
2031     if (!adev->gfx.rlc_fw)
2032         return -EINVAL;
2033 
2034     hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2035     amdgpu_ucode_print_rlc_hdr(&hdr->header);
2036 
2037     version_major = le16_to_cpu(hdr->header.header_version_major);
2038     version_minor = le16_to_cpu(hdr->header.header_version_minor);
2039 
2040     if (version_major == 2) {
2041         gfx_v11_0_load_rlcg_microcode(adev);
2042         if (amdgpu_dpm == 1) {
2043             if (version_minor >= 2)
2044                 gfx_v11_0_load_rlc_iram_dram_microcode(adev);
2045             if (version_minor == 3)
2046                 gfx_v11_0_load_rlcp_rlcv_microcode(adev);
2047         }
2048         
2049         return 0;
2050     }
2051 
2052     return -EINVAL;
2053 }
2054 
2055 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
2056 {
2057     int r;
2058 
2059     if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2060         gfx_v11_0_init_csb(adev);
2061 
2062         if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
2063             gfx_v11_0_rlc_enable_srm(adev);
2064     } else {
2065         if (amdgpu_sriov_vf(adev)) {
2066             gfx_v11_0_init_csb(adev);
2067             return 0;
2068         }
2069 
2070         adev->gfx.rlc.funcs->stop(adev);
2071 
2072         /* disable CG */
2073         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
2074 
2075         /* disable PG */
2076         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
2077 
2078         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2079             /* legacy rlc firmware loading */
2080             r = gfx_v11_0_rlc_load_microcode(adev);
2081             if (r)
2082                 return r;
2083         }
2084 
2085         gfx_v11_0_init_csb(adev);
2086 
2087         adev->gfx.rlc.funcs->start(adev);
2088     }
2089     return 0;
2090 }
2091 
2092 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
2093 {
2094     uint32_t usec_timeout = 50000;  /* wait for 50ms */
2095     uint32_t tmp;
2096     int i;
2097 
2098     /* Trigger an invalidation of the L1 instruction caches */
2099     tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2100     tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2101     WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2102 
2103     /* Wait for invalidation complete */
2104     for (i = 0; i < usec_timeout; i++) {
2105         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2106         if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2107                     INVALIDATE_CACHE_COMPLETE))
2108             break;
2109         udelay(1);
2110     }
2111 
2112     if (i >= usec_timeout) {
2113         dev_err(adev->dev, "failed to invalidate instruction cache\n");
2114         return -EINVAL;
2115     }
2116 
2117     if (amdgpu_emu_mode == 1)
2118         adev->hdp.funcs->flush_hdp(adev, NULL);
2119 
2120     tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2121     tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2122     tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2123     tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2124     tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2125     WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2126 
2127     /* Program me ucode address into intruction cache address register */
2128     WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2129             lower_32_bits(addr) & 0xFFFFF000);
2130     WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2131             upper_32_bits(addr));
2132 
2133     return 0;
2134 }
2135 
2136 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
2137 {
2138     uint32_t usec_timeout = 50000;  /* wait for 50ms */
2139     uint32_t tmp;
2140     int i;
2141 
2142     /* Trigger an invalidation of the L1 instruction caches */
2143     tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2144     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2145     WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2146 
2147     /* Wait for invalidation complete */
2148     for (i = 0; i < usec_timeout; i++) {
2149         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2150         if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2151                     INVALIDATE_CACHE_COMPLETE))
2152             break;
2153         udelay(1);
2154     }
2155 
2156     if (i >= usec_timeout) {
2157         dev_err(adev->dev, "failed to invalidate instruction cache\n");
2158         return -EINVAL;
2159     }
2160 
2161     if (amdgpu_emu_mode == 1)
2162         adev->hdp.funcs->flush_hdp(adev, NULL);
2163 
2164     tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2165     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2166     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2167     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2168     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2169     WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2170 
2171     /* Program pfp ucode address into intruction cache address register */
2172     WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2173             lower_32_bits(addr) & 0xFFFFF000);
2174     WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2175             upper_32_bits(addr));
2176 
2177     return 0;
2178 }
2179 
2180 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2181 {
2182     uint32_t usec_timeout = 50000;  /* wait for 50ms */
2183     uint32_t tmp;
2184     int i;
2185 
2186     /* Trigger an invalidation of the L1 instruction caches */
2187     tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2188     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2189 
2190     WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2191 
2192     /* Wait for invalidation complete */
2193     for (i = 0; i < usec_timeout; i++) {
2194         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2195         if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2196                     INVALIDATE_CACHE_COMPLETE))
2197             break;
2198         udelay(1);
2199     }
2200 
2201     if (i >= usec_timeout) {
2202         dev_err(adev->dev, "failed to invalidate instruction cache\n");
2203         return -EINVAL;
2204     }
2205 
2206     if (amdgpu_emu_mode == 1)
2207         adev->hdp.funcs->flush_hdp(adev, NULL);
2208 
2209     tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2210     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2211     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2212     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2213     WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2214 
2215     /* Program mec1 ucode address into intruction cache address register */
2216     WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2217             lower_32_bits(addr) & 0xFFFFF000);
2218     WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2219             upper_32_bits(addr));
2220 
2221     return 0;
2222 }
2223 
2224 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2225 {
2226     uint32_t usec_timeout = 50000;  /* wait for 50ms */
2227     uint32_t tmp;
2228     unsigned i, pipe_id;
2229     const struct gfx_firmware_header_v2_0 *pfp_hdr;
2230 
2231     pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2232         adev->gfx.pfp_fw->data;
2233 
2234     WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2235         lower_32_bits(addr));
2236     WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2237         upper_32_bits(addr));
2238 
2239     tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2240     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2241     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2242     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2243     WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2244 
2245     /*
2246      * Programming any of the CP_PFP_IC_BASE registers
2247      * forces invalidation of the ME L1 I$. Wait for the
2248      * invalidation complete
2249      */
2250     for (i = 0; i < usec_timeout; i++) {
2251         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2252         if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2253             INVALIDATE_CACHE_COMPLETE))
2254             break;
2255         udelay(1);
2256     }
2257 
2258     if (i >= usec_timeout) {
2259         dev_err(adev->dev, "failed to invalidate instruction cache\n");
2260         return -EINVAL;
2261     }
2262 
2263     /* Prime the L1 instruction caches */
2264     tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2265     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2266     WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2267     /* Waiting for cache primed*/
2268     for (i = 0; i < usec_timeout; i++) {
2269         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2270         if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2271             ICACHE_PRIMED))
2272             break;
2273         udelay(1);
2274     }
2275 
2276     if (i >= usec_timeout) {
2277         dev_err(adev->dev, "failed to prime instruction cache\n");
2278         return -EINVAL;
2279     }
2280 
2281     mutex_lock(&adev->srbm_mutex);
2282     for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2283         soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2284         WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2285             (pfp_hdr->ucode_start_addr_hi << 30) |
2286             (pfp_hdr->ucode_start_addr_lo >> 2));
2287         WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2288             pfp_hdr->ucode_start_addr_hi >> 2);
2289 
2290         /*
2291          * Program CP_ME_CNTL to reset given PIPE to take
2292          * effect of CP_PFP_PRGRM_CNTR_START.
2293          */
2294         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2295         if (pipe_id == 0)
2296             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2297                     PFP_PIPE0_RESET, 1);
2298         else
2299             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2300                     PFP_PIPE1_RESET, 1);
2301         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2302 
2303         /* Clear pfp pipe0 reset bit. */
2304         if (pipe_id == 0)
2305             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2306                     PFP_PIPE0_RESET, 0);
2307         else
2308             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2309                     PFP_PIPE1_RESET, 0);
2310         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2311 
2312         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2313             lower_32_bits(addr2));
2314         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2315             upper_32_bits(addr2));
2316     }
2317     soc21_grbm_select(adev, 0, 0, 0, 0);
2318     mutex_unlock(&adev->srbm_mutex);
2319 
2320     tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2321     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2322     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2323     WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2324 
2325     /* Invalidate the data caches */
2326     tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2327     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2328     WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2329 
2330     for (i = 0; i < usec_timeout; i++) {
2331         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2332         if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2333             INVALIDATE_DCACHE_COMPLETE))
2334             break;
2335         udelay(1);
2336     }
2337 
2338     if (i >= usec_timeout) {
2339         dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2340         return -EINVAL;
2341     }
2342 
2343     return 0;
2344 }
2345 
2346 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2347 {
2348     uint32_t usec_timeout = 50000;  /* wait for 50ms */
2349     uint32_t tmp;
2350     unsigned i, pipe_id;
2351     const struct gfx_firmware_header_v2_0 *me_hdr;
2352 
2353     me_hdr = (const struct gfx_firmware_header_v2_0 *)
2354         adev->gfx.me_fw->data;
2355 
2356     WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2357         lower_32_bits(addr));
2358     WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2359         upper_32_bits(addr));
2360 
2361     tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2362     tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2363     tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2364     tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2365     WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2366 
2367     /*
2368      * Programming any of the CP_ME_IC_BASE registers
2369      * forces invalidation of the ME L1 I$. Wait for the
2370      * invalidation complete
2371      */
2372     for (i = 0; i < usec_timeout; i++) {
2373         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2374         if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2375             INVALIDATE_CACHE_COMPLETE))
2376             break;
2377         udelay(1);
2378     }
2379 
2380     if (i >= usec_timeout) {
2381         dev_err(adev->dev, "failed to invalidate instruction cache\n");
2382         return -EINVAL;
2383     }
2384 
2385     /* Prime the instruction caches */
2386     tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2387     tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2388     WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2389 
2390     /* Waiting for instruction cache primed*/
2391     for (i = 0; i < usec_timeout; i++) {
2392         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2393         if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2394             ICACHE_PRIMED))
2395             break;
2396         udelay(1);
2397     }
2398 
2399     if (i >= usec_timeout) {
2400         dev_err(adev->dev, "failed to prime instruction cache\n");
2401         return -EINVAL;
2402     }
2403 
2404     mutex_lock(&adev->srbm_mutex);
2405     for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2406         soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2407         WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2408             (me_hdr->ucode_start_addr_hi << 30) |
2409             (me_hdr->ucode_start_addr_lo >> 2) );
2410         WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2411             me_hdr->ucode_start_addr_hi>>2);
2412 
2413         /*
2414          * Program CP_ME_CNTL to reset given PIPE to take
2415          * effect of CP_PFP_PRGRM_CNTR_START.
2416          */
2417         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2418         if (pipe_id == 0)
2419             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2420                     ME_PIPE0_RESET, 1);
2421         else
2422             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2423                     ME_PIPE1_RESET, 1);
2424         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2425 
2426         /* Clear pfp pipe0 reset bit. */
2427         if (pipe_id == 0)
2428             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2429                     ME_PIPE0_RESET, 0);
2430         else
2431             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2432                     ME_PIPE1_RESET, 0);
2433         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2434 
2435         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2436             lower_32_bits(addr2));
2437         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2438             upper_32_bits(addr2));
2439     }
2440     soc21_grbm_select(adev, 0, 0, 0, 0);
2441     mutex_unlock(&adev->srbm_mutex);
2442 
2443     tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2444     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2445     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2446     WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2447 
2448     /* Invalidate the data caches */
2449     tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2450     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2451     WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2452 
2453     for (i = 0; i < usec_timeout; i++) {
2454         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2455         if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2456             INVALIDATE_DCACHE_COMPLETE))
2457             break;
2458         udelay(1);
2459     }
2460 
2461     if (i >= usec_timeout) {
2462         dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2463         return -EINVAL;
2464     }
2465 
2466     return 0;
2467 }
2468 
2469 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2470 {
2471     uint32_t usec_timeout = 50000;  /* wait for 50ms */
2472     uint32_t tmp;
2473     unsigned i;
2474     const struct gfx_firmware_header_v2_0 *mec_hdr;
2475 
2476     mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2477         adev->gfx.mec_fw->data;
2478 
2479     tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2480     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2481     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2482     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2483     WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2484 
2485     tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2486     tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2487     tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2488     WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2489 
2490     mutex_lock(&adev->srbm_mutex);
2491     for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2492         soc21_grbm_select(adev, 1, i, 0, 0);
2493 
2494         WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2495         WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2496              upper_32_bits(addr2));
2497 
2498         WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2499                     mec_hdr->ucode_start_addr_lo >> 2 |
2500                     mec_hdr->ucode_start_addr_hi << 30);
2501         WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2502                     mec_hdr->ucode_start_addr_hi >> 2);
2503 
2504         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2505         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2506              upper_32_bits(addr));
2507     }
2508     mutex_unlock(&adev->srbm_mutex);
2509     soc21_grbm_select(adev, 0, 0, 0, 0);
2510 
2511     /* Trigger an invalidation of the L1 instruction caches */
2512     tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2513     tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2514     WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2515 
2516     /* Wait for invalidation complete */
2517     for (i = 0; i < usec_timeout; i++) {
2518         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2519         if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2520                        INVALIDATE_DCACHE_COMPLETE))
2521             break;
2522         udelay(1);
2523     }
2524 
2525     if (i >= usec_timeout) {
2526         dev_err(adev->dev, "failed to invalidate instruction cache\n");
2527         return -EINVAL;
2528     }
2529 
2530     /* Trigger an invalidation of the L1 instruction caches */
2531     tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2532     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2533     WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2534 
2535     /* Wait for invalidation complete */
2536     for (i = 0; i < usec_timeout; i++) {
2537         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2538         if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2539                        INVALIDATE_CACHE_COMPLETE))
2540             break;
2541         udelay(1);
2542     }
2543 
2544     if (i >= usec_timeout) {
2545         dev_err(adev->dev, "failed to invalidate instruction cache\n");
2546         return -EINVAL;
2547     }
2548 
2549     return 0;
2550 }
2551 
2552 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2553 {
2554     const struct gfx_firmware_header_v2_0 *pfp_hdr;
2555     const struct gfx_firmware_header_v2_0 *me_hdr;
2556     const struct gfx_firmware_header_v2_0 *mec_hdr;
2557     uint32_t pipe_id, tmp;
2558 
2559     mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2560         adev->gfx.mec_fw->data;
2561     me_hdr = (const struct gfx_firmware_header_v2_0 *)
2562         adev->gfx.me_fw->data;
2563     pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2564         adev->gfx.pfp_fw->data;
2565 
2566     /* config pfp program start addr */
2567     for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2568         soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2569         WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2570             (pfp_hdr->ucode_start_addr_hi << 30) |
2571             (pfp_hdr->ucode_start_addr_lo >> 2));
2572         WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2573             pfp_hdr->ucode_start_addr_hi >> 2);
2574     }
2575     soc21_grbm_select(adev, 0, 0, 0, 0);
2576 
2577     /* reset pfp pipe */
2578     tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2579     tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2580     tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2581     WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2582 
2583     /* clear pfp pipe reset */
2584     tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2585     tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2586     WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2587 
2588     /* config me program start addr */
2589     for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2590         soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2591         WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2592             (me_hdr->ucode_start_addr_hi << 30) |
2593             (me_hdr->ucode_start_addr_lo >> 2) );
2594         WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2595             me_hdr->ucode_start_addr_hi>>2);
2596     }
2597     soc21_grbm_select(adev, 0, 0, 0, 0);
2598 
2599     /* reset me pipe */
2600     tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2601     tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2602     tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2603     WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2604 
2605     /* clear me pipe reset */
2606     tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2607     tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2608     WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2609 
2610     /* config mec program start addr */
2611     for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2612         soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2613         WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2614                     mec_hdr->ucode_start_addr_lo >> 2 |
2615                     mec_hdr->ucode_start_addr_hi << 30);
2616         WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2617                     mec_hdr->ucode_start_addr_hi >> 2);
2618     }
2619     soc21_grbm_select(adev, 0, 0, 0, 0);
2620 }
2621 
2622 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2623 {
2624     uint32_t cp_status;
2625     uint32_t bootload_status;
2626     int i, r;
2627     uint64_t addr, addr2;
2628 
2629     for (i = 0; i < adev->usec_timeout; i++) {
2630         cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2631 
2632         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1))
2633             bootload_status = RREG32_SOC15(GC, 0,
2634                     regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2635         else
2636             bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2637 
2638         if ((cp_status == 0) &&
2639             (REG_GET_FIELD(bootload_status,
2640             RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2641             break;
2642         }
2643         udelay(1);
2644     }
2645 
2646     if (i >= adev->usec_timeout) {
2647         dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2648         return -ETIMEDOUT;
2649     }
2650 
2651     if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2652         if (adev->gfx.rs64_enable) {
2653             addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2654                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2655             addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2656                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2657             r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2658             if (r)
2659                 return r;
2660             addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2661                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2662             addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2663                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2664             r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2665             if (r)
2666                 return r;
2667             addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2668                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2669             addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2670                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2671             r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2672             if (r)
2673                 return r;
2674         } else {
2675             addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2676                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2677             r = gfx_v11_0_config_me_cache(adev, addr);
2678             if (r)
2679                 return r;
2680             addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2681                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2682             r = gfx_v11_0_config_pfp_cache(adev, addr);
2683             if (r)
2684                 return r;
2685             addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2686                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2687             r = gfx_v11_0_config_mec_cache(adev, addr);
2688             if (r)
2689                 return r;
2690         }
2691     }
2692 
2693     return 0;
2694 }
2695 
2696 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2697 {
2698     int i;
2699     u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2700 
2701     tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2702     tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2703     WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2704 
2705     for (i = 0; i < adev->usec_timeout; i++) {
2706         if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2707             break;
2708         udelay(1);
2709     }
2710 
2711     if (i >= adev->usec_timeout)
2712         DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2713 
2714     return 0;
2715 }
2716 
2717 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2718 {
2719     int r;
2720     const struct gfx_firmware_header_v1_0 *pfp_hdr;
2721     const __le32 *fw_data;
2722     unsigned i, fw_size;
2723 
2724     pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2725         adev->gfx.pfp_fw->data;
2726 
2727     amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2728 
2729     fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2730         le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2731     fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2732 
2733     r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2734                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2735                       &adev->gfx.pfp.pfp_fw_obj,
2736                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2737                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2738     if (r) {
2739         dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2740         gfx_v11_0_pfp_fini(adev);
2741         return r;
2742     }
2743 
2744     memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2745 
2746     amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2747     amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2748 
2749     gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
2750 
2751     WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
2752 
2753     for (i = 0; i < pfp_hdr->jt_size; i++)
2754         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
2755                  le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
2756 
2757     WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2758 
2759     return 0;
2760 }
2761 
2762 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2763 {
2764     int r;
2765     const struct gfx_firmware_header_v2_0 *pfp_hdr;
2766     const __le32 *fw_ucode, *fw_data;
2767     unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2768     uint32_t tmp;
2769     uint32_t usec_timeout = 50000;  /* wait for 50ms */
2770 
2771     pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2772         adev->gfx.pfp_fw->data;
2773 
2774     amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2775 
2776     /* instruction */
2777     fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2778         le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2779     fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2780     /* data */
2781     fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2782         le32_to_cpu(pfp_hdr->data_offset_bytes));
2783     fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2784 
2785     /* 64kb align */
2786     r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2787                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2788                       &adev->gfx.pfp.pfp_fw_obj,
2789                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2790                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2791     if (r) {
2792         dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2793         gfx_v11_0_pfp_fini(adev);
2794         return r;
2795     }
2796 
2797     r = amdgpu_bo_create_reserved(adev, fw_data_size,
2798                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
2799                       &adev->gfx.pfp.pfp_fw_data_obj,
2800                       &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2801                       (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2802     if (r) {
2803         dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2804         gfx_v11_0_pfp_fini(adev);
2805         return r;
2806     }
2807 
2808     memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2809     memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2810 
2811     amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2812     amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2813     amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2814     amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2815 
2816     if (amdgpu_emu_mode == 1)
2817         adev->hdp.funcs->flush_hdp(adev, NULL);
2818 
2819     WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2820         lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2821     WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2822         upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2823 
2824     tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2825     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2826     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2827     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2828     WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2829 
2830     /*
2831      * Programming any of the CP_PFP_IC_BASE registers
2832      * forces invalidation of the ME L1 I$. Wait for the
2833      * invalidation complete
2834      */
2835     for (i = 0; i < usec_timeout; i++) {
2836         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2837         if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2838             INVALIDATE_CACHE_COMPLETE))
2839             break;
2840         udelay(1);
2841     }
2842 
2843     if (i >= usec_timeout) {
2844         dev_err(adev->dev, "failed to invalidate instruction cache\n");
2845         return -EINVAL;
2846     }
2847 
2848     /* Prime the L1 instruction caches */
2849     tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2850     tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2851     WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2852     /* Waiting for cache primed*/
2853     for (i = 0; i < usec_timeout; i++) {
2854         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2855         if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2856             ICACHE_PRIMED))
2857             break;
2858         udelay(1);
2859     }
2860 
2861     if (i >= usec_timeout) {
2862         dev_err(adev->dev, "failed to prime instruction cache\n");
2863         return -EINVAL;
2864     }
2865 
2866     mutex_lock(&adev->srbm_mutex);
2867     for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2868         soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2869         WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2870             (pfp_hdr->ucode_start_addr_hi << 30) |
2871             (pfp_hdr->ucode_start_addr_lo >> 2) );
2872         WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2873             pfp_hdr->ucode_start_addr_hi>>2);
2874 
2875         /*
2876          * Program CP_ME_CNTL to reset given PIPE to take
2877          * effect of CP_PFP_PRGRM_CNTR_START.
2878          */
2879         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2880         if (pipe_id == 0)
2881             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2882                     PFP_PIPE0_RESET, 1);
2883         else
2884             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2885                     PFP_PIPE1_RESET, 1);
2886         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2887 
2888         /* Clear pfp pipe0 reset bit. */
2889         if (pipe_id == 0)
2890             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2891                     PFP_PIPE0_RESET, 0);
2892         else
2893             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2894                     PFP_PIPE1_RESET, 0);
2895         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2896 
2897         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2898             lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2899         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2900             upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
2901     }
2902     soc21_grbm_select(adev, 0, 0, 0, 0);
2903     mutex_unlock(&adev->srbm_mutex);
2904 
2905     tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2906     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2907     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2908     WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2909 
2910     /* Invalidate the data caches */
2911     tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2912     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2913     WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2914 
2915     for (i = 0; i < usec_timeout; i++) {
2916         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2917         if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2918             INVALIDATE_DCACHE_COMPLETE))
2919             break;
2920         udelay(1);
2921     }
2922 
2923     if (i >= usec_timeout) {
2924         dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2925         return -EINVAL;
2926     }
2927 
2928     return 0;
2929 }
2930 
2931 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
2932 {
2933     int r;
2934     const struct gfx_firmware_header_v1_0 *me_hdr;
2935     const __le32 *fw_data;
2936     unsigned i, fw_size;
2937 
2938     me_hdr = (const struct gfx_firmware_header_v1_0 *)
2939         adev->gfx.me_fw->data;
2940 
2941     amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2942 
2943     fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2944         le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2945     fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
2946 
2947     r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
2948                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2949                       &adev->gfx.me.me_fw_obj,
2950                       &adev->gfx.me.me_fw_gpu_addr,
2951                       (void **)&adev->gfx.me.me_fw_ptr);
2952     if (r) {
2953         dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
2954         gfx_v11_0_me_fini(adev);
2955         return r;
2956     }
2957 
2958     memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
2959 
2960     amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2961     amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2962 
2963     gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
2964 
2965     WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
2966 
2967     for (i = 0; i < me_hdr->jt_size; i++)
2968         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
2969                  le32_to_cpup(fw_data + me_hdr->jt_offset + i));
2970 
2971     WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
2972 
2973     return 0;
2974 }
2975 
2976 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
2977 {
2978     int r;
2979     const struct gfx_firmware_header_v2_0 *me_hdr;
2980     const __le32 *fw_ucode, *fw_data;
2981     unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2982     uint32_t tmp;
2983     uint32_t usec_timeout = 50000;  /* wait for 50ms */
2984 
2985     me_hdr = (const struct gfx_firmware_header_v2_0 *)
2986         adev->gfx.me_fw->data;
2987 
2988     amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2989 
2990     /* instruction */
2991     fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
2992         le32_to_cpu(me_hdr->ucode_offset_bytes));
2993     fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
2994     /* data */
2995     fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2996         le32_to_cpu(me_hdr->data_offset_bytes));
2997     fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
2998 
2999     /* 64kb align*/
3000     r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3001                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3002                       &adev->gfx.me.me_fw_obj,
3003                       &adev->gfx.me.me_fw_gpu_addr,
3004                       (void **)&adev->gfx.me.me_fw_ptr);
3005     if (r) {
3006         dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
3007         gfx_v11_0_me_fini(adev);
3008         return r;
3009     }
3010 
3011     r = amdgpu_bo_create_reserved(adev, fw_data_size,
3012                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3013                       &adev->gfx.me.me_fw_data_obj,
3014                       &adev->gfx.me.me_fw_data_gpu_addr,
3015                       (void **)&adev->gfx.me.me_fw_data_ptr);
3016     if (r) {
3017         dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
3018         gfx_v11_0_pfp_fini(adev);
3019         return r;
3020     }
3021 
3022     memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
3023     memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
3024 
3025     amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3026     amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
3027     amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3028     amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
3029 
3030     if (amdgpu_emu_mode == 1)
3031         adev->hdp.funcs->flush_hdp(adev, NULL);
3032 
3033     WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
3034         lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
3035     WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
3036         upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
3037 
3038     tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
3039     tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
3040     tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
3041     tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
3042     WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
3043 
3044     /*
3045      * Programming any of the CP_ME_IC_BASE registers
3046      * forces invalidation of the ME L1 I$. Wait for the
3047      * invalidation complete
3048      */
3049     for (i = 0; i < usec_timeout; i++) {
3050         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3051         if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3052             INVALIDATE_CACHE_COMPLETE))
3053             break;
3054         udelay(1);
3055     }
3056 
3057     if (i >= usec_timeout) {
3058         dev_err(adev->dev, "failed to invalidate instruction cache\n");
3059         return -EINVAL;
3060     }
3061 
3062     /* Prime the instruction caches */
3063     tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3064     tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
3065     WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
3066 
3067     /* Waiting for instruction cache primed*/
3068     for (i = 0; i < usec_timeout; i++) {
3069         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3070         if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3071             ICACHE_PRIMED))
3072             break;
3073         udelay(1);
3074     }
3075 
3076     if (i >= usec_timeout) {
3077         dev_err(adev->dev, "failed to prime instruction cache\n");
3078         return -EINVAL;
3079     }
3080 
3081     mutex_lock(&adev->srbm_mutex);
3082     for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3083         soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3084         WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3085             (me_hdr->ucode_start_addr_hi << 30) |
3086             (me_hdr->ucode_start_addr_lo >> 2) );
3087         WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3088             me_hdr->ucode_start_addr_hi>>2);
3089 
3090         /*
3091          * Program CP_ME_CNTL to reset given PIPE to take
3092          * effect of CP_PFP_PRGRM_CNTR_START.
3093          */
3094         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3095         if (pipe_id == 0)
3096             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3097                     ME_PIPE0_RESET, 1);
3098         else
3099             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3100                     ME_PIPE1_RESET, 1);
3101         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3102 
3103         /* Clear pfp pipe0 reset bit. */
3104         if (pipe_id == 0)
3105             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3106                     ME_PIPE0_RESET, 0);
3107         else
3108             tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3109                     ME_PIPE1_RESET, 0);
3110         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3111 
3112         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
3113             lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3114         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
3115             upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3116     }
3117     soc21_grbm_select(adev, 0, 0, 0, 0);
3118     mutex_unlock(&adev->srbm_mutex);
3119 
3120     tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3121     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3122     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3123     WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3124 
3125     /* Invalidate the data caches */
3126     tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3127     tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3128     WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3129 
3130     for (i = 0; i < usec_timeout; i++) {
3131         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3132         if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3133             INVALIDATE_DCACHE_COMPLETE))
3134             break;
3135         udelay(1);
3136     }
3137 
3138     if (i >= usec_timeout) {
3139         dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3140         return -EINVAL;
3141     }
3142 
3143     return 0;
3144 }
3145 
3146 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3147 {
3148     int r;
3149 
3150     if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3151         return -EINVAL;
3152 
3153     gfx_v11_0_cp_gfx_enable(adev, false);
3154 
3155     if (adev->gfx.rs64_enable)
3156         r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3157     else
3158         r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3159     if (r) {
3160         dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3161         return r;
3162     }
3163 
3164     if (adev->gfx.rs64_enable)
3165         r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3166     else
3167         r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3168     if (r) {
3169         dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3170         return r;
3171     }
3172 
3173     return 0;
3174 }
3175 
3176 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3177 {
3178     struct amdgpu_ring *ring;
3179     const struct cs_section_def *sect = NULL;
3180     const struct cs_extent_def *ext = NULL;
3181     int r, i;
3182     int ctx_reg_offset;
3183 
3184     /* init the CP */
3185     WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3186              adev->gfx.config.max_hw_contexts - 1);
3187     WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3188 
3189     if (!amdgpu_async_gfx_ring)
3190         gfx_v11_0_cp_gfx_enable(adev, true);
3191 
3192     ring = &adev->gfx.gfx_ring[0];
3193     r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3194     if (r) {
3195         DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3196         return r;
3197     }
3198 
3199     amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3200     amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3201 
3202     amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3203     amdgpu_ring_write(ring, 0x80000000);
3204     amdgpu_ring_write(ring, 0x80000000);
3205 
3206     for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3207         for (ext = sect->section; ext->extent != NULL; ++ext) {
3208             if (sect->id == SECT_CONTEXT) {
3209                 amdgpu_ring_write(ring,
3210                           PACKET3(PACKET3_SET_CONTEXT_REG,
3211                               ext->reg_count));
3212                 amdgpu_ring_write(ring, ext->reg_index -
3213                           PACKET3_SET_CONTEXT_REG_START);
3214                 for (i = 0; i < ext->reg_count; i++)
3215                     amdgpu_ring_write(ring, ext->extent[i]);
3216             }
3217         }
3218     }
3219 
3220     ctx_reg_offset =
3221         SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3222     amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3223     amdgpu_ring_write(ring, ctx_reg_offset);
3224     amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3225 
3226     amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3227     amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3228 
3229     amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3230     amdgpu_ring_write(ring, 0);
3231 
3232     amdgpu_ring_commit(ring);
3233 
3234     /* submit cs packet to copy state 0 to next available state */
3235     if (adev->gfx.num_gfx_rings > 1) {
3236         /* maximum supported gfx ring is 2 */
3237         ring = &adev->gfx.gfx_ring[1];
3238         r = amdgpu_ring_alloc(ring, 2);
3239         if (r) {
3240             DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3241             return r;
3242         }
3243 
3244         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3245         amdgpu_ring_write(ring, 0);
3246 
3247         amdgpu_ring_commit(ring);
3248     }
3249     return 0;
3250 }
3251 
3252 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3253                      CP_PIPE_ID pipe)
3254 {
3255     u32 tmp;
3256 
3257     tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3258     tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3259 
3260     WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3261 }
3262 
3263 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3264                       struct amdgpu_ring *ring)
3265 {
3266     u32 tmp;
3267 
3268     tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3269     if (ring->use_doorbell) {
3270         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3271                     DOORBELL_OFFSET, ring->doorbell_index);
3272         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3273                     DOORBELL_EN, 1);
3274     } else {
3275         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3276                     DOORBELL_EN, 0);
3277     }
3278     WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3279 
3280     tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3281                 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3282     WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3283 
3284     WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3285              CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3286 }
3287 
3288 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3289 {
3290     struct amdgpu_ring *ring;
3291     u32 tmp;
3292     u32 rb_bufsz;
3293     u64 rb_addr, rptr_addr, wptr_gpu_addr;
3294     u32 i;
3295 
3296     /* Set the write pointer delay */
3297     WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3298 
3299     /* set the RB to use vmid 0 */
3300     WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3301 
3302     /* Init gfx ring 0 for pipe 0 */
3303     mutex_lock(&adev->srbm_mutex);
3304     gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3305 
3306     /* Set ring buffer size */
3307     ring = &adev->gfx.gfx_ring[0];
3308     rb_bufsz = order_base_2(ring->ring_size / 8);
3309     tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3310     tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3311     WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3312 
3313     /* Initialize the ring buffer's write pointers */
3314     ring->wptr = 0;
3315     WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3316     WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3317 
3318     /* set the wb address wether it's enabled or not */
3319     rptr_addr = ring->rptr_gpu_addr;
3320     WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3321     WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3322              CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3323 
3324     wptr_gpu_addr = ring->wptr_gpu_addr;
3325     WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3326              lower_32_bits(wptr_gpu_addr));
3327     WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3328              upper_32_bits(wptr_gpu_addr));
3329 
3330     mdelay(1);
3331     WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3332 
3333     rb_addr = ring->gpu_addr >> 8;
3334     WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3335     WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3336 
3337     WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3338 
3339     gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3340     mutex_unlock(&adev->srbm_mutex);
3341 
3342     /* Init gfx ring 1 for pipe 1 */
3343     if (adev->gfx.num_gfx_rings > 1) {
3344         mutex_lock(&adev->srbm_mutex);
3345         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3346         /* maximum supported gfx ring is 2 */
3347         ring = &adev->gfx.gfx_ring[1];
3348         rb_bufsz = order_base_2(ring->ring_size / 8);
3349         tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3350         tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3351         WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3352         /* Initialize the ring buffer's write pointers */
3353         ring->wptr = 0;
3354         WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3355         WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3356         /* Set the wb address wether it's enabled or not */
3357         rptr_addr = ring->rptr_gpu_addr;
3358         WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3359         WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3360                  CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3361         wptr_gpu_addr = ring->wptr_gpu_addr;
3362         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3363                  lower_32_bits(wptr_gpu_addr));
3364         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3365                  upper_32_bits(wptr_gpu_addr));
3366 
3367         mdelay(1);
3368         WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3369 
3370         rb_addr = ring->gpu_addr >> 8;
3371         WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3372         WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3373         WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3374 
3375         gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3376         mutex_unlock(&adev->srbm_mutex);
3377     }
3378     /* Switch to pipe 0 */
3379     mutex_lock(&adev->srbm_mutex);
3380     gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3381     mutex_unlock(&adev->srbm_mutex);
3382 
3383     /* start the ring */
3384     gfx_v11_0_cp_gfx_start(adev);
3385 
3386     for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3387         ring = &adev->gfx.gfx_ring[i];
3388         ring->sched.ready = true;
3389     }
3390 
3391     return 0;
3392 }
3393 
3394 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3395 {
3396     u32 data;
3397 
3398     if (adev->gfx.rs64_enable) {
3399         data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3400         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3401                              enable ? 0 : 1);
3402         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3403                              enable ? 0 : 1);
3404         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3405                              enable ? 0 : 1);
3406         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3407                              enable ? 0 : 1);
3408         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3409                              enable ? 0 : 1);
3410         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3411                              enable ? 1 : 0);
3412         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3413                                          enable ? 1 : 0);
3414         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3415                              enable ? 1 : 0);
3416         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3417                              enable ? 1 : 0);
3418         data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3419                              enable ? 0 : 1);
3420         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3421     } else {
3422         data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3423 
3424         if (enable) {
3425             data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3426             if (!adev->enable_mes_kiq)
3427                 data = REG_SET_FIELD(data, CP_MEC_CNTL,
3428                              MEC_ME2_HALT, 0);
3429         } else {
3430             data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3431             data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3432         }
3433         WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3434     }
3435 
3436     adev->gfx.kiq.ring.sched.ready = enable;
3437 
3438     udelay(50);
3439 }
3440 
3441 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3442 {
3443     const struct gfx_firmware_header_v1_0 *mec_hdr;
3444     const __le32 *fw_data;
3445     unsigned i, fw_size;
3446     u32 *fw = NULL;
3447     int r;
3448 
3449     if (!adev->gfx.mec_fw)
3450         return -EINVAL;
3451 
3452     gfx_v11_0_cp_compute_enable(adev, false);
3453 
3454     mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3455     amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3456 
3457     fw_data = (const __le32 *)
3458         (adev->gfx.mec_fw->data +
3459          le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3460     fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3461 
3462     r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3463                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3464                       &adev->gfx.mec.mec_fw_obj,
3465                       &adev->gfx.mec.mec_fw_gpu_addr,
3466                       (void **)&fw);
3467     if (r) {
3468         dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3469         gfx_v11_0_mec_fini(adev);
3470         return r;
3471     }
3472 
3473     memcpy(fw, fw_data, fw_size);
3474     
3475     amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3476     amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3477 
3478     gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3479 
3480     /* MEC1 */
3481     WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3482 
3483     for (i = 0; i < mec_hdr->jt_size; i++)
3484         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3485                  le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3486 
3487     WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3488 
3489     return 0;
3490 }
3491 
3492 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3493 {
3494     const struct gfx_firmware_header_v2_0 *mec_hdr;
3495     const __le32 *fw_ucode, *fw_data;
3496     u32 tmp, fw_ucode_size, fw_data_size;
3497     u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3498     u32 *fw_ucode_ptr, *fw_data_ptr;
3499     int r;
3500 
3501     if (!adev->gfx.mec_fw)
3502         return -EINVAL;
3503 
3504     gfx_v11_0_cp_compute_enable(adev, false);
3505 
3506     mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3507     amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3508 
3509     fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3510                 le32_to_cpu(mec_hdr->ucode_offset_bytes));
3511     fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3512 
3513     fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3514                 le32_to_cpu(mec_hdr->data_offset_bytes));
3515     fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3516 
3517     r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3518                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3519                       &adev->gfx.mec.mec_fw_obj,
3520                       &adev->gfx.mec.mec_fw_gpu_addr,
3521                       (void **)&fw_ucode_ptr);
3522     if (r) {
3523         dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3524         gfx_v11_0_mec_fini(adev);
3525         return r;
3526     }
3527 
3528     r = amdgpu_bo_create_reserved(adev, fw_data_size,
3529                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
3530                       &adev->gfx.mec.mec_fw_data_obj,
3531                       &adev->gfx.mec.mec_fw_data_gpu_addr,
3532                       (void **)&fw_data_ptr);
3533     if (r) {
3534         dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3535         gfx_v11_0_mec_fini(adev);
3536         return r;
3537     }
3538 
3539     memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3540     memcpy(fw_data_ptr, fw_data, fw_data_size);
3541 
3542     amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3543     amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3544     amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3545     amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3546 
3547     tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3548     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3549     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3550     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3551     WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3552 
3553     tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3554     tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3555     tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3556     WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3557 
3558     mutex_lock(&adev->srbm_mutex);
3559     for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3560         soc21_grbm_select(adev, 1, i, 0, 0);
3561 
3562         WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3563         WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3564              upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3565 
3566         WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3567                     mec_hdr->ucode_start_addr_lo >> 2 |
3568                     mec_hdr->ucode_start_addr_hi << 30);
3569         WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3570                     mec_hdr->ucode_start_addr_hi >> 2);
3571 
3572         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3573         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3574              upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3575     }
3576     mutex_unlock(&adev->srbm_mutex);
3577     soc21_grbm_select(adev, 0, 0, 0, 0);
3578 
3579     /* Trigger an invalidation of the L1 instruction caches */
3580     tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3581     tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3582     WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3583 
3584     /* Wait for invalidation complete */
3585     for (i = 0; i < usec_timeout; i++) {
3586         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3587         if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3588                        INVALIDATE_DCACHE_COMPLETE))
3589             break;
3590         udelay(1);
3591     }
3592 
3593     if (i >= usec_timeout) {
3594         dev_err(adev->dev, "failed to invalidate instruction cache\n");
3595         return -EINVAL;
3596     }
3597 
3598     /* Trigger an invalidation of the L1 instruction caches */
3599     tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3600     tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3601     WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3602 
3603     /* Wait for invalidation complete */
3604     for (i = 0; i < usec_timeout; i++) {
3605         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3606         if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3607                        INVALIDATE_CACHE_COMPLETE))
3608             break;
3609         udelay(1);
3610     }
3611 
3612     if (i >= usec_timeout) {
3613         dev_err(adev->dev, "failed to invalidate instruction cache\n");
3614         return -EINVAL;
3615     }
3616 
3617     return 0;
3618 }
3619 
3620 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3621 {
3622     uint32_t tmp;
3623     struct amdgpu_device *adev = ring->adev;
3624 
3625     /* tell RLC which is KIQ queue */
3626     tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3627     tmp &= 0xffffff00;
3628     tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3629     WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3630     tmp |= 0x80;
3631     WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3632 }
3633 
3634 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3635 {
3636     /* set graphics engine doorbell range */
3637     WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3638              (adev->doorbell_index.gfx_ring0 * 2) << 2);
3639     WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3640              (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3641 
3642     /* set compute engine doorbell range */
3643     WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3644              (adev->doorbell_index.kiq * 2) << 2);
3645     WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3646              (adev->doorbell_index.userqueue_end * 2) << 2);
3647 }
3648 
3649 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3650                   struct amdgpu_mqd_prop *prop)
3651 {
3652     struct v11_gfx_mqd *mqd = m;
3653     uint64_t hqd_gpu_addr, wb_gpu_addr;
3654     uint32_t tmp;
3655     uint32_t rb_bufsz;
3656 
3657     /* set up gfx hqd wptr */
3658     mqd->cp_gfx_hqd_wptr = 0;
3659     mqd->cp_gfx_hqd_wptr_hi = 0;
3660 
3661     /* set the pointer to the MQD */
3662     mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3663     mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3664 
3665     /* set up mqd control */
3666     tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3667     tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3668     tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3669     tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3670     mqd->cp_gfx_mqd_control = tmp;
3671 
3672     /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3673     tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3674     tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3675     mqd->cp_gfx_hqd_vmid = 0;
3676 
3677     /* set up default queue priority level
3678      * 0x0 = low priority, 0x1 = high priority */
3679     tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3680     tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
3681     mqd->cp_gfx_hqd_queue_priority = tmp;
3682 
3683     /* set up time quantum */
3684     tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3685     tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
3686     mqd->cp_gfx_hqd_quantum = tmp;
3687 
3688     /* set up gfx hqd base. this is similar as CP_RB_BASE */
3689     hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3690     mqd->cp_gfx_hqd_base = hqd_gpu_addr;
3691     mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
3692 
3693     /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3694     wb_gpu_addr = prop->rptr_gpu_addr;
3695     mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
3696     mqd->cp_gfx_hqd_rptr_addr_hi =
3697         upper_32_bits(wb_gpu_addr) & 0xffff;
3698 
3699     /* set up rb_wptr_poll addr */
3700     wb_gpu_addr = prop->wptr_gpu_addr;
3701     mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3702     mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3703 
3704     /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3705     rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
3706     tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
3707     tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
3708     tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
3709 #ifdef __BIG_ENDIAN
3710     tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3711 #endif
3712     mqd->cp_gfx_hqd_cntl = tmp;
3713 
3714     /* set up cp_doorbell_control */
3715     tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3716     if (prop->use_doorbell) {
3717         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3718                     DOORBELL_OFFSET, prop->doorbell_index);
3719         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3720                     DOORBELL_EN, 1);
3721     } else
3722         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3723                     DOORBELL_EN, 0);
3724     mqd->cp_rb_doorbell_control = tmp;
3725 
3726     /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3727     mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
3728 
3729     /* active the queue */
3730     mqd->cp_gfx_hqd_active = 1;
3731 
3732     return 0;
3733 }
3734 
3735 #ifdef BRING_UP_DEBUG
3736 static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring)
3737 {
3738     struct amdgpu_device *adev = ring->adev;
3739     struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3740 
3741     /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
3742     WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
3743     WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
3744 
3745     /* set GFX_MQD_BASE */
3746     WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
3747     WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3748 
3749     /* set GFX_MQD_CONTROL */
3750     WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
3751 
3752     /* set GFX_HQD_VMID to 0 */
3753     WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
3754 
3755     WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY,
3756             mqd->cp_gfx_hqd_queue_priority);
3757     WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
3758 
3759     /* set GFX_HQD_BASE, similar as CP_RB_BASE */
3760     WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
3761     WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
3762 
3763     /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
3764     WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
3765     WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
3766 
3767     /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
3768     WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
3769 
3770     /* set RB_WPTR_POLL_ADDR */
3771     WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
3772     WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
3773 
3774     /* set RB_DOORBELL_CONTROL */
3775     WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
3776 
3777     /* active the queue */
3778     WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
3779 
3780     return 0;
3781 }
3782 #endif
3783 
3784 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3785 {
3786     struct amdgpu_device *adev = ring->adev;
3787     struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3788     int mqd_idx = ring - &adev->gfx.gfx_ring[0];
3789 
3790     if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3791         memset((void *)mqd, 0, sizeof(*mqd));
3792         mutex_lock(&adev->srbm_mutex);
3793         soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3794         amdgpu_ring_init_mqd(ring);
3795 #ifdef BRING_UP_DEBUG
3796         gfx_v11_0_gfx_queue_init_register(ring);
3797 #endif
3798         soc21_grbm_select(adev, 0, 0, 0, 0);
3799         mutex_unlock(&adev->srbm_mutex);
3800         if (adev->gfx.me.mqd_backup[mqd_idx])
3801             memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3802     } else if (amdgpu_in_reset(adev)) {
3803         /* reset mqd with the backup copy */
3804         if (adev->gfx.me.mqd_backup[mqd_idx])
3805             memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
3806         /* reset the ring */
3807         ring->wptr = 0;
3808         *ring->wptr_cpu_addr = 0;
3809         amdgpu_ring_clear_ring(ring);
3810 #ifdef BRING_UP_DEBUG
3811         mutex_lock(&adev->srbm_mutex);
3812         soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3813         gfx_v11_0_gfx_queue_init_register(ring);
3814         soc21_grbm_select(adev, 0, 0, 0, 0);
3815         mutex_unlock(&adev->srbm_mutex);
3816 #endif
3817     } else {
3818         amdgpu_ring_clear_ring(ring);
3819     }
3820 
3821     return 0;
3822 }
3823 
3824 #ifndef BRING_UP_DEBUG
3825 static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev)
3826 {
3827     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3828     struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3829     int r, i;
3830 
3831     if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
3832         return -EINVAL;
3833 
3834     r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
3835                     adev->gfx.num_gfx_rings);
3836     if (r) {
3837         DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3838         return r;
3839     }
3840 
3841     for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3842         kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
3843 
3844     return amdgpu_ring_test_helper(kiq_ring);
3845 }
3846 #endif
3847 
3848 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3849 {
3850     int r, i;
3851     struct amdgpu_ring *ring;
3852 
3853     for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3854         ring = &adev->gfx.gfx_ring[i];
3855 
3856         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3857         if (unlikely(r != 0))
3858             goto done;
3859 
3860         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3861         if (!r) {
3862             r = gfx_v11_0_gfx_init_queue(ring);
3863             amdgpu_bo_kunmap(ring->mqd_obj);
3864             ring->mqd_ptr = NULL;
3865         }
3866         amdgpu_bo_unreserve(ring->mqd_obj);
3867         if (r)
3868             goto done;
3869     }
3870 #ifndef BRING_UP_DEBUG
3871     r = gfx_v11_0_kiq_enable_kgq(adev);
3872     if (r)
3873         goto done;
3874 #endif
3875     r = gfx_v11_0_cp_gfx_start(adev);
3876     if (r)
3877         goto done;
3878 
3879     for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3880         ring = &adev->gfx.gfx_ring[i];
3881         ring->sched.ready = true;
3882     }
3883 done:
3884     return r;
3885 }
3886 
3887 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
3888                       struct amdgpu_mqd_prop *prop)
3889 {
3890     struct v11_compute_mqd *mqd = m;
3891     uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3892     uint32_t tmp;
3893 
3894     mqd->header = 0xC0310800;
3895     mqd->compute_pipelinestat_enable = 0x00000001;
3896     mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3897     mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3898     mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3899     mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3900     mqd->compute_misc_reserved = 0x00000007;
3901 
3902     eop_base_addr = prop->eop_gpu_addr >> 8;
3903     mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3904     mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3905 
3906     /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3907     tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3908     tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3909             (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
3910 
3911     mqd->cp_hqd_eop_control = tmp;
3912 
3913     /* enable doorbell? */
3914     tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3915 
3916     if (prop->use_doorbell) {
3917         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3918                     DOORBELL_OFFSET, prop->doorbell_index);
3919         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3920                     DOORBELL_EN, 1);
3921         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3922                     DOORBELL_SOURCE, 0);
3923         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3924                     DOORBELL_HIT, 0);
3925     } else {
3926         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3927                     DOORBELL_EN, 0);
3928     }
3929 
3930     mqd->cp_hqd_pq_doorbell_control = tmp;
3931 
3932     /* disable the queue if it's active */
3933     mqd->cp_hqd_dequeue_request = 0;
3934     mqd->cp_hqd_pq_rptr = 0;
3935     mqd->cp_hqd_pq_wptr_lo = 0;
3936     mqd->cp_hqd_pq_wptr_hi = 0;
3937 
3938     /* set the pointer to the MQD */
3939     mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
3940     mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3941 
3942     /* set MQD vmid to 0 */
3943     tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
3944     tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3945     mqd->cp_mqd_control = tmp;
3946 
3947     /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3948     hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3949     mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3950     mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3951 
3952     /* set up the HQD, this is similar to CP_RB0_CNTL */
3953     tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
3954     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3955                 (order_base_2(prop->queue_size / 4) - 1));
3956     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3957                 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3958     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3959     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3960     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3961     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3962     mqd->cp_hqd_pq_control = tmp;
3963 
3964     /* set the wb address whether it's enabled or not */
3965     wb_gpu_addr = prop->rptr_gpu_addr;
3966     mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3967     mqd->cp_hqd_pq_rptr_report_addr_hi =
3968         upper_32_bits(wb_gpu_addr) & 0xffff;
3969 
3970     /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3971     wb_gpu_addr = prop->wptr_gpu_addr;
3972     mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3973     mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3974 
3975     tmp = 0;
3976     /* enable the doorbell if requested */
3977     if (prop->use_doorbell) {
3978         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3979         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3980                 DOORBELL_OFFSET, prop->doorbell_index);
3981 
3982         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3983                     DOORBELL_EN, 1);
3984         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3985                     DOORBELL_SOURCE, 0);
3986         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3987                     DOORBELL_HIT, 0);
3988     }
3989 
3990     mqd->cp_hqd_pq_doorbell_control = tmp;
3991 
3992     /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3993     mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
3994 
3995     /* set the vmid for the queue */
3996     mqd->cp_hqd_vmid = 0;
3997 
3998     tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
3999     tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
4000     mqd->cp_hqd_persistent_state = tmp;
4001 
4002     /* set MIN_IB_AVAIL_SIZE */
4003     tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
4004     tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4005     mqd->cp_hqd_ib_control = tmp;
4006 
4007     /* set static priority for a compute queue/ring */
4008     mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
4009     mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
4010 
4011     mqd->cp_hqd_active = prop->hqd_active;
4012 
4013     return 0;
4014 }
4015 
4016 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
4017 {
4018     struct amdgpu_device *adev = ring->adev;
4019     struct v11_compute_mqd *mqd = ring->mqd_ptr;
4020     int j;
4021 
4022     /* inactivate the queue */
4023     if (amdgpu_sriov_vf(adev))
4024         WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
4025 
4026     /* disable wptr polling */
4027     WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4028 
4029     /* write the EOP addr */
4030     WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
4031            mqd->cp_hqd_eop_base_addr_lo);
4032     WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
4033            mqd->cp_hqd_eop_base_addr_hi);
4034 
4035     /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4036     WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
4037            mqd->cp_hqd_eop_control);
4038 
4039     /* enable doorbell? */
4040     WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4041            mqd->cp_hqd_pq_doorbell_control);
4042 
4043     /* disable the queue if it's active */
4044     if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
4045         WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
4046         for (j = 0; j < adev->usec_timeout; j++) {
4047             if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
4048                 break;
4049             udelay(1);
4050         }
4051         WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
4052                mqd->cp_hqd_dequeue_request);
4053         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
4054                mqd->cp_hqd_pq_rptr);
4055         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4056                mqd->cp_hqd_pq_wptr_lo);
4057         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4058                mqd->cp_hqd_pq_wptr_hi);
4059     }
4060 
4061     /* set the pointer to the MQD */
4062     WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
4063            mqd->cp_mqd_base_addr_lo);
4064     WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
4065            mqd->cp_mqd_base_addr_hi);
4066 
4067     /* set MQD vmid to 0 */
4068     WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
4069            mqd->cp_mqd_control);
4070 
4071     /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4072     WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
4073            mqd->cp_hqd_pq_base_lo);
4074     WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
4075            mqd->cp_hqd_pq_base_hi);
4076 
4077     /* set up the HQD, this is similar to CP_RB0_CNTL */
4078     WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
4079            mqd->cp_hqd_pq_control);
4080 
4081     /* set the wb address whether it's enabled or not */
4082     WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
4083         mqd->cp_hqd_pq_rptr_report_addr_lo);
4084     WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4085         mqd->cp_hqd_pq_rptr_report_addr_hi);
4086 
4087     /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4088     WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
4089            mqd->cp_hqd_pq_wptr_poll_addr_lo);
4090     WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4091            mqd->cp_hqd_pq_wptr_poll_addr_hi);
4092 
4093     /* enable the doorbell if requested */
4094     if (ring->use_doorbell) {
4095         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
4096             (adev->doorbell_index.kiq * 2) << 2);
4097         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
4098             (adev->doorbell_index.userqueue_end * 2) << 2);
4099     }
4100 
4101     WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4102            mqd->cp_hqd_pq_doorbell_control);
4103 
4104     /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4105     WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4106            mqd->cp_hqd_pq_wptr_lo);
4107     WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4108            mqd->cp_hqd_pq_wptr_hi);
4109 
4110     /* set the vmid for the queue */
4111     WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
4112 
4113     WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
4114            mqd->cp_hqd_persistent_state);
4115 
4116     /* activate the queue */
4117     WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
4118            mqd->cp_hqd_active);
4119 
4120     if (ring->use_doorbell)
4121         WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4122 
4123     return 0;
4124 }
4125 
4126 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
4127 {
4128     struct amdgpu_device *adev = ring->adev;
4129     struct v11_compute_mqd *mqd = ring->mqd_ptr;
4130     int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4131 
4132     gfx_v11_0_kiq_setting(ring);
4133 
4134     if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4135         /* reset MQD to a clean status */
4136         if (adev->gfx.mec.mqd_backup[mqd_idx])
4137             memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4138 
4139         /* reset ring buffer */
4140         ring->wptr = 0;
4141         amdgpu_ring_clear_ring(ring);
4142 
4143         mutex_lock(&adev->srbm_mutex);
4144         soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4145         gfx_v11_0_kiq_init_register(ring);
4146         soc21_grbm_select(adev, 0, 0, 0, 0);
4147         mutex_unlock(&adev->srbm_mutex);
4148     } else {
4149         memset((void *)mqd, 0, sizeof(*mqd));
4150         mutex_lock(&adev->srbm_mutex);
4151         soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4152         amdgpu_ring_init_mqd(ring);
4153         gfx_v11_0_kiq_init_register(ring);
4154         soc21_grbm_select(adev, 0, 0, 0, 0);
4155         mutex_unlock(&adev->srbm_mutex);
4156 
4157         if (adev->gfx.mec.mqd_backup[mqd_idx])
4158             memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4159     }
4160 
4161     return 0;
4162 }
4163 
4164 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4165 {
4166     struct amdgpu_device *adev = ring->adev;
4167     struct v11_compute_mqd *mqd = ring->mqd_ptr;
4168     int mqd_idx = ring - &adev->gfx.compute_ring[0];
4169 
4170     if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4171         memset((void *)mqd, 0, sizeof(*mqd));
4172         mutex_lock(&adev->srbm_mutex);
4173         soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4174         amdgpu_ring_init_mqd(ring);
4175         soc21_grbm_select(adev, 0, 0, 0, 0);
4176         mutex_unlock(&adev->srbm_mutex);
4177 
4178         if (adev->gfx.mec.mqd_backup[mqd_idx])
4179             memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4180     } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4181         /* reset MQD to a clean status */
4182         if (adev->gfx.mec.mqd_backup[mqd_idx])
4183             memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4184 
4185         /* reset ring buffer */
4186         ring->wptr = 0;
4187         atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4188         amdgpu_ring_clear_ring(ring);
4189     } else {
4190         amdgpu_ring_clear_ring(ring);
4191     }
4192 
4193     return 0;
4194 }
4195 
4196 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4197 {
4198     struct amdgpu_ring *ring;
4199     int r;
4200 
4201     ring = &adev->gfx.kiq.ring;
4202 
4203     r = amdgpu_bo_reserve(ring->mqd_obj, false);
4204     if (unlikely(r != 0))
4205         return r;
4206 
4207     r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4208     if (unlikely(r != 0)) {
4209         amdgpu_bo_unreserve(ring->mqd_obj);
4210         return r;
4211     }
4212 
4213     gfx_v11_0_kiq_init_queue(ring);
4214     amdgpu_bo_kunmap(ring->mqd_obj);
4215     ring->mqd_ptr = NULL;
4216     amdgpu_bo_unreserve(ring->mqd_obj);
4217     ring->sched.ready = true;
4218     return 0;
4219 }
4220 
4221 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4222 {
4223     struct amdgpu_ring *ring = NULL;
4224     int r = 0, i;
4225 
4226     if (!amdgpu_async_gfx_ring)
4227         gfx_v11_0_cp_compute_enable(adev, true);
4228 
4229     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4230         ring = &adev->gfx.compute_ring[i];
4231 
4232         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4233         if (unlikely(r != 0))
4234             goto done;
4235         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4236         if (!r) {
4237             r = gfx_v11_0_kcq_init_queue(ring);
4238             amdgpu_bo_kunmap(ring->mqd_obj);
4239             ring->mqd_ptr = NULL;
4240         }
4241         amdgpu_bo_unreserve(ring->mqd_obj);
4242         if (r)
4243             goto done;
4244     }
4245 
4246     r = amdgpu_gfx_enable_kcq(adev);
4247 done:
4248     return r;
4249 }
4250 
4251 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4252 {
4253     int r, i;
4254     struct amdgpu_ring *ring;
4255 
4256     if (!(adev->flags & AMD_IS_APU))
4257         gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4258 
4259     if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4260         /* legacy firmware loading */
4261         r = gfx_v11_0_cp_gfx_load_microcode(adev);
4262         if (r)
4263             return r;
4264 
4265         if (adev->gfx.rs64_enable)
4266             r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4267         else
4268             r = gfx_v11_0_cp_compute_load_microcode(adev);
4269         if (r)
4270             return r;
4271     }
4272 
4273     gfx_v11_0_cp_set_doorbell_range(adev);
4274 
4275     if (amdgpu_async_gfx_ring) {
4276         gfx_v11_0_cp_compute_enable(adev, true);
4277         gfx_v11_0_cp_gfx_enable(adev, true);
4278     }
4279 
4280     if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4281         r = amdgpu_mes_kiq_hw_init(adev);
4282     else
4283         r = gfx_v11_0_kiq_resume(adev);
4284     if (r)
4285         return r;
4286 
4287     r = gfx_v11_0_kcq_resume(adev);
4288     if (r)
4289         return r;
4290 
4291     if (!amdgpu_async_gfx_ring) {
4292         r = gfx_v11_0_cp_gfx_resume(adev);
4293         if (r)
4294             return r;
4295     } else {
4296         r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4297         if (r)
4298             return r;
4299     }
4300 
4301     for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4302         ring = &adev->gfx.gfx_ring[i];
4303         r = amdgpu_ring_test_helper(ring);
4304         if (r)
4305             return r;
4306     }
4307 
4308     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4309         ring = &adev->gfx.compute_ring[i];
4310         r = amdgpu_ring_test_helper(ring);
4311         if (r)
4312             return r;
4313     }
4314 
4315     return 0;
4316 }
4317 
4318 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4319 {
4320     gfx_v11_0_cp_gfx_enable(adev, enable);
4321     gfx_v11_0_cp_compute_enable(adev, enable);
4322 }
4323 
4324 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4325 {
4326     int r;
4327     bool value;
4328 
4329     r = adev->gfxhub.funcs->gart_enable(adev);
4330     if (r)
4331         return r;
4332 
4333     adev->hdp.funcs->flush_hdp(adev, NULL);
4334 
4335     value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4336         false : true;
4337 
4338     adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4339     amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
4340 
4341     return 0;
4342 }
4343 
4344 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4345 {
4346     u32 tmp;
4347 
4348     /* select RS64 */
4349     if (adev->gfx.rs64_enable) {
4350         tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4351         tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4352         WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4353 
4354         tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4355         tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4356         WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4357     }
4358 
4359     if (amdgpu_emu_mode == 1)
4360         msleep(100);
4361 }
4362 
4363 static int get_gb_addr_config(struct amdgpu_device * adev)
4364 {
4365     u32 gb_addr_config;
4366 
4367     gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4368     if (gb_addr_config == 0)
4369         return -EINVAL;
4370 
4371     adev->gfx.config.gb_addr_config_fields.num_pkrs =
4372         1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4373 
4374     adev->gfx.config.gb_addr_config = gb_addr_config;
4375 
4376     adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4377             REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4378                       GB_ADDR_CONFIG, NUM_PIPES);
4379 
4380     adev->gfx.config.max_tile_pipes =
4381         adev->gfx.config.gb_addr_config_fields.num_pipes;
4382 
4383     adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4384             REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4385                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4386     adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4387             REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4388                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
4389     adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4390             REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4391                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4392     adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4393             REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4394                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4395 
4396     return 0;
4397 }
4398 
4399 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4400 {
4401     uint32_t data;
4402 
4403     data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4404     data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4405     WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4406 
4407     data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4408     data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4409     WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4410 }
4411 
4412 static int gfx_v11_0_hw_init(void *handle)
4413 {
4414     int r;
4415     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4416 
4417     if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4418         if (adev->gfx.imu.funcs) {
4419             /* RLC autoload sequence 1: Program rlc ram */
4420             if (adev->gfx.imu.funcs->program_rlc_ram)
4421                 adev->gfx.imu.funcs->program_rlc_ram(adev);
4422         }
4423         /* rlc autoload firmware */
4424         r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4425         if (r)
4426             return r;
4427     } else {
4428         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4429             if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4430                 if (adev->gfx.imu.funcs->load_microcode)
4431                     adev->gfx.imu.funcs->load_microcode(adev);
4432                 if (adev->gfx.imu.funcs->setup_imu)
4433                     adev->gfx.imu.funcs->setup_imu(adev);
4434                 if (adev->gfx.imu.funcs->start_imu)
4435                     adev->gfx.imu.funcs->start_imu(adev);
4436             }
4437 
4438             /* disable gpa mode in backdoor loading */
4439             gfx_v11_0_disable_gpa_mode(adev);
4440         }
4441     }
4442 
4443     if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4444         (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4445         r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4446         if (r) {
4447             dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4448             return r;
4449         }
4450     }
4451 
4452     adev->gfx.is_poweron = true;
4453 
4454     if(get_gb_addr_config(adev))
4455         DRM_WARN("Invalid gb_addr_config !\n");
4456 
4457     if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4458         adev->gfx.rs64_enable)
4459         gfx_v11_0_config_gfx_rs64(adev);
4460 
4461     r = gfx_v11_0_gfxhub_enable(adev);
4462     if (r)
4463         return r;
4464 
4465     if (!amdgpu_emu_mode)
4466         gfx_v11_0_init_golden_registers(adev);
4467 
4468     if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4469         (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4470         /**
4471          * For gfx 11, rlc firmware loading relies on smu firmware is
4472          * loaded firstly, so in direct type, it has to load smc ucode
4473          * here before rlc.
4474          */
4475         if (!(adev->flags & AMD_IS_APU)) {
4476             r = amdgpu_pm_load_smu_firmware(adev, NULL);
4477             if (r)
4478                 return r;
4479         }
4480     }
4481 
4482     gfx_v11_0_constants_init(adev);
4483 
4484     if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4485         gfx_v11_0_select_cp_fw_arch(adev);
4486 
4487     if (adev->nbio.funcs->gc_doorbell_init)
4488         adev->nbio.funcs->gc_doorbell_init(adev);
4489 
4490     r = gfx_v11_0_rlc_resume(adev);
4491     if (r)
4492         return r;
4493 
4494     /*
4495      * init golden registers and rlc resume may override some registers,
4496      * reconfig them here
4497      */
4498     gfx_v11_0_tcp_harvest(adev);
4499 
4500     r = gfx_v11_0_cp_resume(adev);
4501     if (r)
4502         return r;
4503 
4504     return r;
4505 }
4506 
4507 #ifndef BRING_UP_DEBUG
4508 static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev)
4509 {
4510     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4511     struct amdgpu_ring *kiq_ring = &kiq->ring;
4512     int i, r = 0;
4513 
4514     if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4515         return -EINVAL;
4516 
4517     if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
4518                     adev->gfx.num_gfx_rings))
4519         return -ENOMEM;
4520 
4521     for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4522         kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
4523                        PREEMPT_QUEUES, 0, 0);
4524 
4525     if (adev->gfx.kiq.ring.sched.ready)
4526         r = amdgpu_ring_test_helper(kiq_ring);
4527 
4528     return r;
4529 }
4530 #endif
4531 
4532 static int gfx_v11_0_hw_fini(void *handle)
4533 {
4534     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4535     int r;
4536     uint32_t tmp;
4537 
4538     amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4539     amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4540 
4541     if (!adev->no_hw_access) {
4542 #ifndef BRING_UP_DEBUG
4543         if (amdgpu_async_gfx_ring) {
4544             r = gfx_v11_0_kiq_disable_kgq(adev);
4545             if (r)
4546                 DRM_ERROR("KGQ disable failed\n");
4547         }
4548 #endif
4549         if (amdgpu_gfx_disable_kcq(adev))
4550             DRM_ERROR("KCQ disable failed\n");
4551 
4552         amdgpu_mes_kiq_hw_fini(adev);
4553     }
4554 
4555     if (amdgpu_sriov_vf(adev)) {
4556         gfx_v11_0_cp_gfx_enable(adev, false);
4557         /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
4558         tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
4559         tmp &= 0xffffff00;
4560         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
4561 
4562         return 0;
4563     }
4564     gfx_v11_0_cp_enable(adev, false);
4565     gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4566 
4567     adev->gfxhub.funcs->gart_disable(adev);
4568 
4569     adev->gfx.is_poweron = false;
4570 
4571     return 0;
4572 }
4573 
4574 static int gfx_v11_0_suspend(void *handle)
4575 {
4576     return gfx_v11_0_hw_fini(handle);
4577 }
4578 
4579 static int gfx_v11_0_resume(void *handle)
4580 {
4581     return gfx_v11_0_hw_init(handle);
4582 }
4583 
4584 static bool gfx_v11_0_is_idle(void *handle)
4585 {
4586     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4587 
4588     if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4589                 GRBM_STATUS, GUI_ACTIVE))
4590         return false;
4591     else
4592         return true;
4593 }
4594 
4595 static int gfx_v11_0_wait_for_idle(void *handle)
4596 {
4597     unsigned i;
4598     u32 tmp;
4599     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4600 
4601     for (i = 0; i < adev->usec_timeout; i++) {
4602         /* read MC_STATUS */
4603         tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4604             GRBM_STATUS__GUI_ACTIVE_MASK;
4605 
4606         if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4607             return 0;
4608         udelay(1);
4609     }
4610     return -ETIMEDOUT;
4611 }
4612 
4613 static int gfx_v11_0_soft_reset(void *handle)
4614 {
4615     u32 grbm_soft_reset = 0;
4616     u32 tmp;
4617     int i, j, k;
4618     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4619 
4620     tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4621     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4622     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4623     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4624     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4625     WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4626 
4627     gfx_v11_0_set_safe_mode(adev);
4628 
4629     for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4630         for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4631             for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4632                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4633                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4634                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4635                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4636                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4637 
4638                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4639                 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4640             }
4641         }
4642     }
4643     for (i = 0; i < adev->gfx.me.num_me; ++i) {
4644         for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4645             for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4646                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
4647                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
4648                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
4649                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
4650                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
4651 
4652                 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4653             }
4654         }
4655     }
4656 
4657     WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4658 
4659     // Read CP_VMID_RESET register three times.
4660     // to get sufficient time for GFX_HQD_ACTIVE reach 0
4661     RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4662     RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4663     RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4664 
4665     for (i = 0; i < adev->usec_timeout; i++) {
4666         if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4667             !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4668             break;
4669         udelay(1);
4670     }
4671     if (i >= adev->usec_timeout) {
4672         printk("Failed to wait all pipes clean\n");
4673         return -EINVAL;
4674     }
4675 
4676     /**********  trigger soft reset  ***********/
4677     grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4678     grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4679                     SOFT_RESET_CP, 1);
4680     grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4681                     SOFT_RESET_GFX, 1);
4682     grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4683                     SOFT_RESET_CPF, 1);
4684     grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4685                     SOFT_RESET_CPC, 1);
4686     grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4687                     SOFT_RESET_CPG, 1);
4688     WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4689     /**********  exit soft reset  ***********/
4690     grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4691     grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4692                     SOFT_RESET_CP, 0);
4693     grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4694                     SOFT_RESET_GFX, 0);
4695     grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4696                     SOFT_RESET_CPF, 0);
4697     grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4698                     SOFT_RESET_CPC, 0);
4699     grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4700                     SOFT_RESET_CPG, 0);
4701     WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4702 
4703     tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4704     tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4705     WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4706 
4707     WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4708     WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4709 
4710     for (i = 0; i < adev->usec_timeout; i++) {
4711         if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4712             break;
4713         udelay(1);
4714     }
4715     if (i >= adev->usec_timeout) {
4716         printk("Failed to wait CP_VMID_RESET to 0\n");
4717         return -EINVAL;
4718     }
4719 
4720     tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4721     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4722     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4723     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4724     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4725     WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4726 
4727     gfx_v11_0_unset_safe_mode(adev);
4728 
4729     return gfx_v11_0_cp_resume(adev);
4730 }
4731 
4732 static bool gfx_v11_0_check_soft_reset(void *handle)
4733 {
4734     int i, r;
4735     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4736     struct amdgpu_ring *ring;
4737     long tmo = msecs_to_jiffies(1000);
4738 
4739     for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4740         ring = &adev->gfx.gfx_ring[i];
4741         r = amdgpu_ring_test_ib(ring, tmo);
4742         if (r)
4743             return true;
4744     }
4745 
4746     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4747         ring = &adev->gfx.compute_ring[i];
4748         r = amdgpu_ring_test_ib(ring, tmo);
4749         if (r)
4750             return true;
4751     }
4752 
4753     return false;
4754 }
4755 
4756 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4757 {
4758     uint64_t clock;
4759 
4760     amdgpu_gfx_off_ctrl(adev, false);
4761     mutex_lock(&adev->gfx.gpu_clock_mutex);
4762     clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) |
4763         ((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL);
4764     mutex_unlock(&adev->gfx.gpu_clock_mutex);
4765     amdgpu_gfx_off_ctrl(adev, true);
4766     return clock;
4767 }
4768 
4769 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4770                        uint32_t vmid,
4771                        uint32_t gds_base, uint32_t gds_size,
4772                        uint32_t gws_base, uint32_t gws_size,
4773                        uint32_t oa_base, uint32_t oa_size)
4774 {
4775     struct amdgpu_device *adev = ring->adev;
4776 
4777     /* GDS Base */
4778     gfx_v11_0_write_data_to_reg(ring, 0, false,
4779                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
4780                     gds_base);
4781 
4782     /* GDS Size */
4783     gfx_v11_0_write_data_to_reg(ring, 0, false,
4784                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
4785                     gds_size);
4786 
4787     /* GWS */
4788     gfx_v11_0_write_data_to_reg(ring, 0, false,
4789                     SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
4790                     gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4791 
4792     /* OA */
4793     gfx_v11_0_write_data_to_reg(ring, 0, false,
4794                     SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
4795                     (1 << (oa_size + oa_base)) - (1 << oa_base));
4796 }
4797 
4798 static int gfx_v11_0_early_init(void *handle)
4799 {
4800     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4801 
4802     adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
4803     adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4804                       AMDGPU_MAX_COMPUTE_RINGS);
4805 
4806     gfx_v11_0_set_kiq_pm4_funcs(adev);
4807     gfx_v11_0_set_ring_funcs(adev);
4808     gfx_v11_0_set_irq_funcs(adev);
4809     gfx_v11_0_set_gds_init(adev);
4810     gfx_v11_0_set_rlc_funcs(adev);
4811     gfx_v11_0_set_mqd_funcs(adev);
4812     gfx_v11_0_set_imu_funcs(adev);
4813 
4814     gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
4815 
4816     return 0;
4817 }
4818 
4819 static int gfx_v11_0_late_init(void *handle)
4820 {
4821     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4822     int r;
4823 
4824     r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4825     if (r)
4826         return r;
4827 
4828     r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4829     if (r)
4830         return r;
4831 
4832     return 0;
4833 }
4834 
4835 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
4836 {
4837     uint32_t rlc_cntl;
4838 
4839     /* if RLC is not enabled, do nothing */
4840     rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
4841     return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
4842 }
4843 
4844 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
4845 {
4846     uint32_t data;
4847     unsigned i;
4848 
4849     data = RLC_SAFE_MODE__CMD_MASK;
4850     data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4851 
4852     WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
4853 
4854     /* wait for RLC_SAFE_MODE */
4855     for (i = 0; i < adev->usec_timeout; i++) {
4856         if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
4857                    RLC_SAFE_MODE, CMD))
4858             break;
4859         udelay(1);
4860     }
4861 }
4862 
4863 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev)
4864 {
4865     WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
4866 }
4867 
4868 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
4869                       bool enable)
4870 {
4871     uint32_t def, data;
4872 
4873     if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
4874         return;
4875 
4876     def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4877 
4878     if (enable)
4879         data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4880     else
4881         data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4882 
4883     if (def != data)
4884         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4885 }
4886 
4887 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
4888                        bool enable)
4889 {
4890     uint32_t def, data;
4891 
4892     if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4893         return;
4894 
4895     def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4896 
4897     if (enable)
4898         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4899     else
4900         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
4901 
4902     if (def != data)
4903         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4904 }
4905 
4906 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
4907                        bool enable)
4908 {
4909     uint32_t def, data;
4910 
4911     if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
4912         return;
4913 
4914     def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4915 
4916     if (enable)
4917         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4918     else
4919         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
4920 
4921     if (def != data)
4922         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4923 }
4924 
4925 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4926                                bool enable)
4927 {
4928     uint32_t data, def;
4929 
4930     if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
4931         return;
4932 
4933     /* It is disabled by HW by default */
4934     if (enable) {
4935         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4936             /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4937             def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4938 
4939             data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4940                   RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4941                   RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4942 
4943             if (def != data)
4944                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4945         }
4946     } else {
4947         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
4948             def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4949 
4950             data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4951                  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4952                  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
4953 
4954             if (def != data)
4955                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4956         }
4957     }
4958 }
4959 
4960 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4961                                bool enable)
4962 {
4963     uint32_t def, data;
4964 
4965     if (!(adev->cg_flags &
4966           (AMD_CG_SUPPORT_GFX_CGCG |
4967           AMD_CG_SUPPORT_GFX_CGLS |
4968           AMD_CG_SUPPORT_GFX_3D_CGCG |
4969           AMD_CG_SUPPORT_GFX_3D_CGLS)))
4970         return;
4971 
4972     if (enable) {
4973         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4974 
4975         /* unset CGCG override */
4976         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
4977             data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4978         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4979             data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4980         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
4981             adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4982             data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4983 
4984         /* update CGCG override bits */
4985         if (def != data)
4986             WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4987 
4988         /* enable cgcg FSM(0x0000363F) */
4989         def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
4990 
4991         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
4992             data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
4993             data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4994                  RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4995         }
4996 
4997         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4998             data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
4999             data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5000                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5001         }
5002 
5003         if (def != data)
5004             WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5005 
5006         /* Program RLC_CGCG_CGLS_CTRL_3D */
5007         def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5008 
5009         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5010             data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
5011             data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5012                  RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5013         }
5014 
5015         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5016             data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
5017             data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5018                  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5019         }
5020 
5021         if (def != data)
5022             WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5023 
5024         /* set IDLE_POLL_COUNT(0x00900100) */
5025         def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
5026 
5027         data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
5028         data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5029             (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5030 
5031         if (def != data)
5032             WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
5033 
5034         data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5035         data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
5036         data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
5037         data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
5038         data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
5039         WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
5040 
5041         data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5042         data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5043         WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5044 
5045         /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5046         if (adev->sdma.num_instances > 1) {
5047             data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5048             data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5049             WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5050         }
5051     } else {
5052         /* Program RLC_CGCG_CGLS_CTRL */
5053         def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5054 
5055         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5056             data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5057 
5058         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5059             data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5060 
5061         if (def != data)
5062             WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5063 
5064         /* Program RLC_CGCG_CGLS_CTRL_3D */
5065         def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5066 
5067         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5068             data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5069         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5070             data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5071 
5072         if (def != data)
5073             WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5074 
5075         data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5076         data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5077         WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5078 
5079         /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5080         if (adev->sdma.num_instances > 1) {
5081             data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5082             data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5083             WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5084         }
5085     }
5086 }
5087 
5088 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5089                         bool enable)
5090 {
5091     amdgpu_gfx_rlc_enter_safe_mode(adev);
5092 
5093     gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
5094 
5095     gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
5096 
5097     gfx_v11_0_update_repeater_fgcg(adev, enable);
5098 
5099     gfx_v11_0_update_sram_fgcg(adev, enable);
5100 
5101     gfx_v11_0_update_perf_clk(adev, enable);
5102 
5103     if (adev->cg_flags &
5104         (AMD_CG_SUPPORT_GFX_MGCG |
5105          AMD_CG_SUPPORT_GFX_CGLS |
5106          AMD_CG_SUPPORT_GFX_CGCG |
5107          AMD_CG_SUPPORT_GFX_3D_CGCG |
5108          AMD_CG_SUPPORT_GFX_3D_CGLS))
5109             gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
5110 
5111     amdgpu_gfx_rlc_exit_safe_mode(adev);
5112 
5113     return 0;
5114 }
5115 
5116 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5117 {
5118     u32 reg, data;
5119 
5120     amdgpu_gfx_off_ctrl(adev, false);
5121 
5122     reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5123     if (amdgpu_sriov_is_pp_one_vf(adev))
5124         data = RREG32_NO_KIQ(reg);
5125     else
5126         data = RREG32(reg);
5127 
5128     data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5129     data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5130 
5131     if (amdgpu_sriov_is_pp_one_vf(adev))
5132         WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5133     else
5134         WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5135 
5136     amdgpu_gfx_off_ctrl(adev, true);
5137 }
5138 
5139 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5140     .is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5141     .set_safe_mode = gfx_v11_0_set_safe_mode,
5142     .unset_safe_mode = gfx_v11_0_unset_safe_mode,
5143     .init = gfx_v11_0_rlc_init,
5144     .get_csb_size = gfx_v11_0_get_csb_size,
5145     .get_csb_buffer = gfx_v11_0_get_csb_buffer,
5146     .resume = gfx_v11_0_rlc_resume,
5147     .stop = gfx_v11_0_rlc_stop,
5148     .reset = gfx_v11_0_rlc_reset,
5149     .start = gfx_v11_0_rlc_start,
5150     .update_spm_vmid = gfx_v11_0_update_spm_vmid,
5151 };
5152 
5153 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5154 {
5155     u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5156 
5157     if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5158         data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5159     else
5160         data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5161 
5162     WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5163 
5164     // Program RLC_PG_DELAY3 for CGPG hysteresis
5165     if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5166         switch (adev->ip_versions[GC_HWIP][0]) {
5167         case IP_VERSION(11, 0, 1):
5168             WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5169             break;
5170         default:
5171             break;
5172         }
5173     }
5174 }
5175 
5176 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5177 {
5178     amdgpu_gfx_rlc_enter_safe_mode(adev);
5179 
5180     gfx_v11_cntl_power_gating(adev, enable);
5181 
5182     amdgpu_gfx_rlc_exit_safe_mode(adev);
5183 }
5184 
5185 static int gfx_v11_0_set_powergating_state(void *handle,
5186                        enum amd_powergating_state state)
5187 {
5188     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5189     bool enable = (state == AMD_PG_STATE_GATE);
5190 
5191     if (amdgpu_sriov_vf(adev))
5192         return 0;
5193 
5194     switch (adev->ip_versions[GC_HWIP][0]) {
5195     case IP_VERSION(11, 0, 0):
5196     case IP_VERSION(11, 0, 2):
5197         amdgpu_gfx_off_ctrl(adev, enable);
5198         break;
5199     case IP_VERSION(11, 0, 1):
5200         gfx_v11_cntl_pg(adev, enable);
5201         amdgpu_gfx_off_ctrl(adev, enable);
5202         break;
5203     default:
5204         break;
5205     }
5206 
5207     return 0;
5208 }
5209 
5210 static int gfx_v11_0_set_clockgating_state(void *handle,
5211                       enum amd_clockgating_state state)
5212 {
5213     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5214 
5215     if (amdgpu_sriov_vf(adev))
5216             return 0;
5217 
5218     switch (adev->ip_versions[GC_HWIP][0]) {
5219     case IP_VERSION(11, 0, 0):
5220     case IP_VERSION(11, 0, 1):
5221     case IP_VERSION(11, 0, 2):
5222             gfx_v11_0_update_gfx_clock_gating(adev,
5223                             state ==  AMD_CG_STATE_GATE);
5224             break;
5225     default:
5226             break;
5227     }
5228 
5229     return 0;
5230 }
5231 
5232 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5233 {
5234     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5235     int data;
5236 
5237     /* AMD_CG_SUPPORT_GFX_MGCG */
5238     data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5239     if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5240         *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5241 
5242     /* AMD_CG_SUPPORT_REPEATER_FGCG */
5243     if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5244         *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5245 
5246     /* AMD_CG_SUPPORT_GFX_FGCG */
5247     if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5248         *flags |= AMD_CG_SUPPORT_GFX_FGCG;
5249 
5250     /* AMD_CG_SUPPORT_GFX_PERF_CLK */
5251     if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5252         *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5253 
5254     /* AMD_CG_SUPPORT_GFX_CGCG */
5255     data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5256     if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5257         *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5258 
5259     /* AMD_CG_SUPPORT_GFX_CGLS */
5260     if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5261         *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5262 
5263     /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5264     data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5265     if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5266         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5267 
5268     /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5269     if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5270         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5271 }
5272 
5273 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5274 {
5275     /* gfx11 is 32bit rptr*/
5276     return *(uint32_t *)ring->rptr_cpu_addr;
5277 }
5278 
5279 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5280 {
5281     struct amdgpu_device *adev = ring->adev;
5282     u64 wptr;
5283 
5284     /* XXX check if swapping is necessary on BE */
5285     if (ring->use_doorbell) {
5286         wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5287     } else {
5288         wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5289         wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5290     }
5291 
5292     return wptr;
5293 }
5294 
5295 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5296 {
5297     struct amdgpu_device *adev = ring->adev;
5298     uint32_t *wptr_saved;
5299     uint32_t *is_queue_unmap;
5300     uint64_t aggregated_db_index;
5301     uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
5302     uint64_t wptr_tmp;
5303 
5304     if (ring->is_mes_queue) {
5305         wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5306         is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5307                           sizeof(uint32_t));
5308         aggregated_db_index =
5309             amdgpu_mes_get_aggregated_doorbell_index(adev,
5310                                  ring->hw_prio);
5311 
5312         wptr_tmp = ring->wptr & ring->buf_mask;
5313         atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5314         *wptr_saved = wptr_tmp;
5315         /* assume doorbell always being used by mes mapped queue */
5316         if (*is_queue_unmap) {
5317             WDOORBELL64(aggregated_db_index, wptr_tmp);
5318             WDOORBELL64(ring->doorbell_index, wptr_tmp);
5319         } else {
5320             WDOORBELL64(ring->doorbell_index, wptr_tmp);
5321 
5322             if (*is_queue_unmap)
5323                 WDOORBELL64(aggregated_db_index, wptr_tmp);
5324         }
5325     } else {
5326         if (ring->use_doorbell) {
5327             /* XXX check if swapping is necessary on BE */
5328             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5329                      ring->wptr);
5330             WDOORBELL64(ring->doorbell_index, ring->wptr);
5331         } else {
5332             WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5333                      lower_32_bits(ring->wptr));
5334             WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5335                      upper_32_bits(ring->wptr));
5336         }
5337     }
5338 }
5339 
5340 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5341 {
5342     /* gfx11 hardware is 32bit rptr */
5343     return *(uint32_t *)ring->rptr_cpu_addr;
5344 }
5345 
5346 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5347 {
5348     u64 wptr;
5349 
5350     /* XXX check if swapping is necessary on BE */
5351     if (ring->use_doorbell)
5352         wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5353     else
5354         BUG();
5355     return wptr;
5356 }
5357 
5358 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5359 {
5360     struct amdgpu_device *adev = ring->adev;
5361     uint32_t *wptr_saved;
5362     uint32_t *is_queue_unmap;
5363     uint64_t aggregated_db_index;
5364     uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
5365     uint64_t wptr_tmp;
5366 
5367     if (ring->is_mes_queue) {
5368         wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
5369         is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
5370                           sizeof(uint32_t));
5371         aggregated_db_index =
5372             amdgpu_mes_get_aggregated_doorbell_index(adev,
5373                                  ring->hw_prio);
5374 
5375         wptr_tmp = ring->wptr & ring->buf_mask;
5376         atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
5377         *wptr_saved = wptr_tmp;
5378         /* assume doorbell always used by mes mapped queue */
5379         if (*is_queue_unmap) {
5380             WDOORBELL64(aggregated_db_index, wptr_tmp);
5381             WDOORBELL64(ring->doorbell_index, wptr_tmp);
5382         } else {
5383             WDOORBELL64(ring->doorbell_index, wptr_tmp);
5384 
5385             if (*is_queue_unmap)
5386                 WDOORBELL64(aggregated_db_index, wptr_tmp);
5387         }
5388     } else {
5389         /* XXX check if swapping is necessary on BE */
5390         if (ring->use_doorbell) {
5391             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5392                      ring->wptr);
5393             WDOORBELL64(ring->doorbell_index, ring->wptr);
5394         } else {
5395             BUG(); /* only DOORBELL method supported on gfx11 now */
5396         }
5397     }
5398 }
5399 
5400 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5401 {
5402     struct amdgpu_device *adev = ring->adev;
5403     u32 ref_and_mask, reg_mem_engine;
5404     const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5405 
5406     if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5407         switch (ring->me) {
5408         case 1:
5409             ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5410             break;
5411         case 2:
5412             ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5413             break;
5414         default:
5415             return;
5416         }
5417         reg_mem_engine = 0;
5418     } else {
5419         ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5420         reg_mem_engine = 1; /* pfp */
5421     }
5422 
5423     gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5424                    adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5425                    adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5426                    ref_and_mask, ref_and_mask, 0x20);
5427 }
5428 
5429 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5430                        struct amdgpu_job *job,
5431                        struct amdgpu_ib *ib,
5432                        uint32_t flags)
5433 {
5434     unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5435     u32 header, control = 0;
5436 
5437     BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5438 
5439     header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5440 
5441     control |= ib->length_dw | (vmid << 24);
5442 
5443     if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5444         control |= INDIRECT_BUFFER_PRE_ENB(1);
5445 
5446         if (flags & AMDGPU_IB_PREEMPTED)
5447             control |= INDIRECT_BUFFER_PRE_RESUME(1);
5448 
5449         if (vmid)
5450             gfx_v11_0_ring_emit_de_meta(ring,
5451                     (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5452     }
5453 
5454     if (ring->is_mes_queue)
5455         /* inherit vmid from mqd */
5456         control |= 0x400000;
5457 
5458     amdgpu_ring_write(ring, header);
5459     BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5460     amdgpu_ring_write(ring,
5461 #ifdef __BIG_ENDIAN
5462         (2 << 0) |
5463 #endif
5464         lower_32_bits(ib->gpu_addr));
5465     amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5466     amdgpu_ring_write(ring, control);
5467 }
5468 
5469 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5470                        struct amdgpu_job *job,
5471                        struct amdgpu_ib *ib,
5472                        uint32_t flags)
5473 {
5474     unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5475     u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5476 
5477     if (ring->is_mes_queue)
5478         /* inherit vmid from mqd */
5479         control |= 0x40000000;
5480 
5481     /* Currently, there is a high possibility to get wave ID mismatch
5482      * between ME and GDS, leading to a hw deadlock, because ME generates
5483      * different wave IDs than the GDS expects. This situation happens
5484      * randomly when at least 5 compute pipes use GDS ordered append.
5485      * The wave IDs generated by ME are also wrong after suspend/resume.
5486      * Those are probably bugs somewhere else in the kernel driver.
5487      *
5488      * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5489      * GDS to 0 for this ring (me/pipe).
5490      */
5491     if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5492         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5493         amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5494         amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5495     }
5496 
5497     amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5498     BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5499     amdgpu_ring_write(ring,
5500 #ifdef __BIG_ENDIAN
5501                 (2 << 0) |
5502 #endif
5503                 lower_32_bits(ib->gpu_addr));
5504     amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5505     amdgpu_ring_write(ring, control);
5506 }
5507 
5508 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5509                      u64 seq, unsigned flags)
5510 {
5511     bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5512     bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5513 
5514     /* RELEASE_MEM - flush caches, send int */
5515     amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5516     amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5517                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
5518                  PACKET3_RELEASE_MEM_GCR_GL2_INV |
5519                  PACKET3_RELEASE_MEM_GCR_GL2_US |
5520                  PACKET3_RELEASE_MEM_GCR_GL1_INV |
5521                  PACKET3_RELEASE_MEM_GCR_GLV_INV |
5522                  PACKET3_RELEASE_MEM_GCR_GLM_INV |
5523                  PACKET3_RELEASE_MEM_GCR_GLM_WB |
5524                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5525                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5526                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5527     amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5528                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5529 
5530     /*
5531      * the address should be Qword aligned if 64bit write, Dword
5532      * aligned if only send 32bit data low (discard data high)
5533      */
5534     if (write64bit)
5535         BUG_ON(addr & 0x7);
5536     else
5537         BUG_ON(addr & 0x3);
5538     amdgpu_ring_write(ring, lower_32_bits(addr));
5539     amdgpu_ring_write(ring, upper_32_bits(addr));
5540     amdgpu_ring_write(ring, lower_32_bits(seq));
5541     amdgpu_ring_write(ring, upper_32_bits(seq));
5542     amdgpu_ring_write(ring, ring->is_mes_queue ?
5543              (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5544 }
5545 
5546 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5547 {
5548     int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5549     uint32_t seq = ring->fence_drv.sync_seq;
5550     uint64_t addr = ring->fence_drv.gpu_addr;
5551 
5552     gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5553                    upper_32_bits(addr), seq, 0xffffffff, 4);
5554 }
5555 
5556 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5557                    uint16_t pasid, uint32_t flush_type,
5558                    bool all_hub, uint8_t dst_sel)
5559 {
5560     amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5561     amdgpu_ring_write(ring,
5562               PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5563               PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5564               PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5565               PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5566 }
5567 
5568 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5569                      unsigned vmid, uint64_t pd_addr)
5570 {
5571     if (ring->is_mes_queue)
5572         gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5573     else
5574         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5575 
5576     /* compute doesn't have PFP */
5577     if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5578         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5579         amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5580         amdgpu_ring_write(ring, 0x0);
5581     }
5582 }
5583 
5584 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5585                       u64 seq, unsigned int flags)
5586 {
5587     struct amdgpu_device *adev = ring->adev;
5588 
5589     /* we only allocate 32bit for each seq wb address */
5590     BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5591 
5592     /* write fence seq to the "addr" */
5593     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5594     amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5595                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5596     amdgpu_ring_write(ring, lower_32_bits(addr));
5597     amdgpu_ring_write(ring, upper_32_bits(addr));
5598     amdgpu_ring_write(ring, lower_32_bits(seq));
5599 
5600     if (flags & AMDGPU_FENCE_FLAG_INT) {
5601         /* set register to trigger INT */
5602         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5603         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5604                      WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5605         amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5606         amdgpu_ring_write(ring, 0);
5607         amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5608     }
5609 }
5610 
5611 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5612                      uint32_t flags)
5613 {
5614     uint32_t dw2 = 0;
5615 
5616     dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5617     if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5618         /* set load_global_config & load_global_uconfig */
5619         dw2 |= 0x8001;
5620         /* set load_cs_sh_regs */
5621         dw2 |= 0x01000000;
5622         /* set load_per_context_state & load_gfx_sh_regs for GFX */
5623         dw2 |= 0x10002;
5624     }
5625 
5626     amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5627     amdgpu_ring_write(ring, dw2);
5628     amdgpu_ring_write(ring, 0);
5629 }
5630 
5631 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5632 {
5633     unsigned ret;
5634 
5635     amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5636     amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5637     amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5638     amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5639     ret = ring->wptr & ring->buf_mask;
5640     amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5641 
5642     return ret;
5643 }
5644 
5645 static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5646 {
5647     unsigned cur;
5648     BUG_ON(offset > ring->buf_mask);
5649     BUG_ON(ring->ring[offset] != 0x55aa55aa);
5650 
5651     cur = (ring->wptr - 1) & ring->buf_mask;
5652     if (likely(cur > offset))
5653         ring->ring[offset] = cur - offset;
5654     else
5655         ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
5656 }
5657 
5658 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5659 {
5660     int i, r = 0;
5661     struct amdgpu_device *adev = ring->adev;
5662     struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5663     struct amdgpu_ring *kiq_ring = &kiq->ring;
5664     unsigned long flags;
5665 
5666     if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5667         return -EINVAL;
5668 
5669     spin_lock_irqsave(&kiq->ring_lock, flags);
5670 
5671     if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5672         spin_unlock_irqrestore(&kiq->ring_lock, flags);
5673         return -ENOMEM;
5674     }
5675 
5676     /* assert preemption condition */
5677     amdgpu_ring_set_preempt_cond_exec(ring, false);
5678 
5679     /* assert IB preemption, emit the trailing fence */
5680     kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5681                    ring->trail_fence_gpu_addr,
5682                    ++ring->trail_seq);
5683     amdgpu_ring_commit(kiq_ring);
5684 
5685     spin_unlock_irqrestore(&kiq->ring_lock, flags);
5686 
5687     /* poll the trailing fence */
5688     for (i = 0; i < adev->usec_timeout; i++) {
5689         if (ring->trail_seq ==
5690             le32_to_cpu(*(ring->trail_fence_cpu_addr)))
5691             break;
5692         udelay(1);
5693     }
5694 
5695     if (i >= adev->usec_timeout) {
5696         r = -EINVAL;
5697         DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
5698     }
5699 
5700     /* deassert preemption condition */
5701     amdgpu_ring_set_preempt_cond_exec(ring, true);
5702     return r;
5703 }
5704 
5705 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5706 {
5707     struct amdgpu_device *adev = ring->adev;
5708     struct v10_de_ib_state de_payload = {0};
5709     uint64_t offset, gds_addr, de_payload_gpu_addr;
5710     void *de_payload_cpu_addr;
5711     int cnt;
5712 
5713     if (ring->is_mes_queue) {
5714         offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5715                   gfx[0].gfx_meta_data) +
5716             offsetof(struct v10_gfx_meta_data, de_payload);
5717         de_payload_gpu_addr =
5718             amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5719         de_payload_cpu_addr =
5720             amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5721 
5722         offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5723                   gfx[0].gds_backup) +
5724             offsetof(struct v10_gfx_meta_data, de_payload);
5725         gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5726     } else {
5727         offset = offsetof(struct v10_gfx_meta_data, de_payload);
5728         de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5729         de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5730 
5731         gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5732                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
5733                  PAGE_SIZE);
5734     }
5735 
5736     de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5737     de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5738 
5739     cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5740     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5741     amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5742                  WRITE_DATA_DST_SEL(8) |
5743                  WR_CONFIRM) |
5744                  WRITE_DATA_CACHE_POLICY(0));
5745     amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5746     amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5747 
5748     if (resume)
5749         amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5750                        sizeof(de_payload) >> 2);
5751     else
5752         amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5753                        sizeof(de_payload) >> 2);
5754 }
5755 
5756 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5757                     bool secure)
5758 {
5759     uint32_t v = secure ? FRAME_TMZ : 0;
5760 
5761     amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5762     amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5763 }
5764 
5765 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5766                      uint32_t reg_val_offs)
5767 {
5768     struct amdgpu_device *adev = ring->adev;
5769 
5770     amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5771     amdgpu_ring_write(ring, 0 | /* src: register*/
5772                 (5 << 8) |  /* dst: memory */
5773                 (1 << 20)); /* write confirm */
5774     amdgpu_ring_write(ring, reg);
5775     amdgpu_ring_write(ring, 0);
5776     amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5777                 reg_val_offs * 4));
5778     amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5779                 reg_val_offs * 4));
5780 }
5781 
5782 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5783                    uint32_t val)
5784 {
5785     uint32_t cmd = 0;
5786 
5787     switch (ring->funcs->type) {
5788     case AMDGPU_RING_TYPE_GFX:
5789         cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5790         break;
5791     case AMDGPU_RING_TYPE_KIQ:
5792         cmd = (1 << 16); /* no inc addr */
5793         break;
5794     default:
5795         cmd = WR_CONFIRM;
5796         break;
5797     }
5798     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5799     amdgpu_ring_write(ring, cmd);
5800     amdgpu_ring_write(ring, reg);
5801     amdgpu_ring_write(ring, 0);
5802     amdgpu_ring_write(ring, val);
5803 }
5804 
5805 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5806                     uint32_t val, uint32_t mask)
5807 {
5808     gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5809 }
5810 
5811 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5812                            uint32_t reg0, uint32_t reg1,
5813                            uint32_t ref, uint32_t mask)
5814 {
5815     int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5816 
5817     gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5818                    ref, mask, 0x20);
5819 }
5820 
5821 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
5822                      unsigned vmid)
5823 {
5824     struct amdgpu_device *adev = ring->adev;
5825     uint32_t value = 0;
5826 
5827     value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5828     value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5829     value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5830     value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5831     WREG32_SOC15(GC, 0, regSQ_CMD, value);
5832 }
5833 
5834 static void
5835 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5836                       uint32_t me, uint32_t pipe,
5837                       enum amdgpu_interrupt_state state)
5838 {
5839     uint32_t cp_int_cntl, cp_int_cntl_reg;
5840 
5841     if (!me) {
5842         switch (pipe) {
5843         case 0:
5844             cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
5845             break;
5846         case 1:
5847             cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
5848             break;
5849         default:
5850             DRM_DEBUG("invalid pipe %d\n", pipe);
5851             return;
5852         }
5853     } else {
5854         DRM_DEBUG("invalid me %d\n", me);
5855         return;
5856     }
5857 
5858     switch (state) {
5859     case AMDGPU_IRQ_STATE_DISABLE:
5860         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5861         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5862                         TIME_STAMP_INT_ENABLE, 0);
5863         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5864                         GENERIC0_INT_ENABLE, 0);
5865         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5866         break;
5867     case AMDGPU_IRQ_STATE_ENABLE:
5868         cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
5869         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5870                         TIME_STAMP_INT_ENABLE, 1);
5871         cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5872                         GENERIC0_INT_ENABLE, 1);
5873         WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
5874         break;
5875     default:
5876         break;
5877     }
5878 }
5879 
5880 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5881                              int me, int pipe,
5882                              enum amdgpu_interrupt_state state)
5883 {
5884     u32 mec_int_cntl, mec_int_cntl_reg;
5885 
5886     /*
5887      * amdgpu controls only the first MEC. That's why this function only
5888      * handles the setting of interrupts for this specific MEC. All other
5889      * pipes' interrupts are set by amdkfd.
5890      */
5891 
5892     if (me == 1) {
5893         switch (pipe) {
5894         case 0:
5895             mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
5896             break;
5897         case 1:
5898             mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
5899             break;
5900         case 2:
5901             mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
5902             break;
5903         case 3:
5904             mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
5905             break;
5906         default:
5907             DRM_DEBUG("invalid pipe %d\n", pipe);
5908             return;
5909         }
5910     } else {
5911         DRM_DEBUG("invalid me %d\n", me);
5912         return;
5913     }
5914 
5915     switch (state) {
5916     case AMDGPU_IRQ_STATE_DISABLE:
5917         mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5918         mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5919                          TIME_STAMP_INT_ENABLE, 0);
5920         mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5921                          GENERIC0_INT_ENABLE, 0);
5922         WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5923         break;
5924     case AMDGPU_IRQ_STATE_ENABLE:
5925         mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5926         mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5927                          TIME_STAMP_INT_ENABLE, 1);
5928         mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5929                          GENERIC0_INT_ENABLE, 1);
5930         WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5931         break;
5932     default:
5933         break;
5934     }
5935 }
5936 
5937 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5938                         struct amdgpu_irq_src *src,
5939                         unsigned type,
5940                         enum amdgpu_interrupt_state state)
5941 {
5942     switch (type) {
5943     case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5944         gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
5945         break;
5946     case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
5947         gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
5948         break;
5949     case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5950         gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5951         break;
5952     case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5953         gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5954         break;
5955     case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5956         gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5957         break;
5958     case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5959         gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5960         break;
5961     default:
5962         break;
5963     }
5964     return 0;
5965 }
5966 
5967 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
5968                  struct amdgpu_irq_src *source,
5969                  struct amdgpu_iv_entry *entry)
5970 {
5971     int i;
5972     u8 me_id, pipe_id, queue_id;
5973     struct amdgpu_ring *ring;
5974     uint32_t mes_queue_id = entry->src_data[0];
5975 
5976     DRM_DEBUG("IH: CP EOP\n");
5977 
5978     if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
5979         struct amdgpu_mes_queue *queue;
5980 
5981         mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
5982 
5983         spin_lock(&adev->mes.queue_id_lock);
5984         queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
5985         if (queue) {
5986             DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
5987             amdgpu_fence_process(queue->ring);
5988         }
5989         spin_unlock(&adev->mes.queue_id_lock);
5990     } else {
5991         me_id = (entry->ring_id & 0x0c) >> 2;
5992         pipe_id = (entry->ring_id & 0x03) >> 0;
5993         queue_id = (entry->ring_id & 0x70) >> 4;
5994 
5995         switch (me_id) {
5996         case 0:
5997             if (pipe_id == 0)
5998                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5999             else
6000                 amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
6001             break;
6002         case 1:
6003         case 2:
6004             for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6005                 ring = &adev->gfx.compute_ring[i];
6006                 /* Per-queue interrupt is supported for MEC starting from VI.
6007                  * The interrupt can only be enabled/disabled per pipe instead
6008                  * of per queue.
6009                  */
6010                 if ((ring->me == me_id) &&
6011                     (ring->pipe == pipe_id) &&
6012                     (ring->queue == queue_id))
6013                     amdgpu_fence_process(ring);
6014             }
6015             break;
6016         }
6017     }
6018 
6019     return 0;
6020 }
6021 
6022 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6023                           struct amdgpu_irq_src *source,
6024                           unsigned type,
6025                           enum amdgpu_interrupt_state state)
6026 {
6027     switch (state) {
6028     case AMDGPU_IRQ_STATE_DISABLE:
6029     case AMDGPU_IRQ_STATE_ENABLE:
6030         WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
6031                    PRIV_REG_INT_ENABLE,
6032                    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6033         break;
6034     default:
6035         break;
6036     }
6037 
6038     return 0;
6039 }
6040 
6041 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6042                            struct amdgpu_irq_src *source,
6043                            unsigned type,
6044                            enum amdgpu_interrupt_state state)
6045 {
6046     switch (state) {
6047     case AMDGPU_IRQ_STATE_DISABLE:
6048     case AMDGPU_IRQ_STATE_ENABLE:
6049         WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
6050                    PRIV_INSTR_INT_ENABLE,
6051                    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6052         break;
6053     default:
6054         break;
6055     }
6056 
6057     return 0;
6058 }
6059 
6060 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
6061                     struct amdgpu_iv_entry *entry)
6062 {
6063     u8 me_id, pipe_id, queue_id;
6064     struct amdgpu_ring *ring;
6065     int i;
6066 
6067     me_id = (entry->ring_id & 0x0c) >> 2;
6068     pipe_id = (entry->ring_id & 0x03) >> 0;
6069     queue_id = (entry->ring_id & 0x70) >> 4;
6070 
6071     switch (me_id) {
6072     case 0:
6073         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
6074             ring = &adev->gfx.gfx_ring[i];
6075             /* we only enabled 1 gfx queue per pipe for now */
6076             if (ring->me == me_id && ring->pipe == pipe_id)
6077                 drm_sched_fault(&ring->sched);
6078         }
6079         break;
6080     case 1:
6081     case 2:
6082         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6083             ring = &adev->gfx.compute_ring[i];
6084             if (ring->me == me_id && ring->pipe == pipe_id &&
6085                 ring->queue == queue_id)
6086                 drm_sched_fault(&ring->sched);
6087         }
6088         break;
6089     default:
6090         BUG();
6091         break;
6092     }
6093 }
6094 
6095 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
6096                   struct amdgpu_irq_src *source,
6097                   struct amdgpu_iv_entry *entry)
6098 {
6099     DRM_ERROR("Illegal register access in command stream\n");
6100     gfx_v11_0_handle_priv_fault(adev, entry);
6101     return 0;
6102 }
6103 
6104 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
6105                    struct amdgpu_irq_src *source,
6106                    struct amdgpu_iv_entry *entry)
6107 {
6108     DRM_ERROR("Illegal instruction in command stream\n");
6109     gfx_v11_0_handle_priv_fault(adev, entry);
6110     return 0;
6111 }
6112 
6113 #if 0
6114 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6115                          struct amdgpu_irq_src *src,
6116                          unsigned int type,
6117                          enum amdgpu_interrupt_state state)
6118 {
6119     uint32_t tmp, target;
6120     struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6121 
6122     target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6123     target += ring->pipe;
6124 
6125     switch (type) {
6126     case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6127         if (state == AMDGPU_IRQ_STATE_DISABLE) {
6128             tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6129             tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6130                         GENERIC2_INT_ENABLE, 0);
6131             WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6132 
6133             tmp = RREG32_SOC15_IP(GC, target);
6134             tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6135                         GENERIC2_INT_ENABLE, 0);
6136             WREG32_SOC15_IP(GC, target, tmp);
6137         } else {
6138             tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6139             tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6140                         GENERIC2_INT_ENABLE, 1);
6141             WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6142 
6143             tmp = RREG32_SOC15_IP(GC, target);
6144             tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6145                         GENERIC2_INT_ENABLE, 1);
6146             WREG32_SOC15_IP(GC, target, tmp);
6147         }
6148         break;
6149     default:
6150         BUG(); /* kiq only support GENERIC2_INT now */
6151         break;
6152     }
6153     return 0;
6154 }
6155 #endif
6156 
6157 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6158 {
6159     const unsigned int gcr_cntl =
6160             PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6161             PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6162             PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6163             PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6164             PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6165             PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6166             PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6167             PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6168 
6169     /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6170     amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6171     amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6172     amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6173     amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6174     amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6175     amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6176     amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6177     amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6178 }
6179 
6180 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6181     .name = "gfx_v11_0",
6182     .early_init = gfx_v11_0_early_init,
6183     .late_init = gfx_v11_0_late_init,
6184     .sw_init = gfx_v11_0_sw_init,
6185     .sw_fini = gfx_v11_0_sw_fini,
6186     .hw_init = gfx_v11_0_hw_init,
6187     .hw_fini = gfx_v11_0_hw_fini,
6188     .suspend = gfx_v11_0_suspend,
6189     .resume = gfx_v11_0_resume,
6190     .is_idle = gfx_v11_0_is_idle,
6191     .wait_for_idle = gfx_v11_0_wait_for_idle,
6192     .soft_reset = gfx_v11_0_soft_reset,
6193     .check_soft_reset = gfx_v11_0_check_soft_reset,
6194     .set_clockgating_state = gfx_v11_0_set_clockgating_state,
6195     .set_powergating_state = gfx_v11_0_set_powergating_state,
6196     .get_clockgating_state = gfx_v11_0_get_clockgating_state,
6197 };
6198 
6199 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6200     .type = AMDGPU_RING_TYPE_GFX,
6201     .align_mask = 0xff,
6202     .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6203     .support_64bit_ptrs = true,
6204     .vmhub = AMDGPU_GFXHUB_0,
6205     .get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6206     .get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6207     .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6208     .emit_frame_size = /* totally 242 maximum if 16 IBs */
6209         5 + /* COND_EXEC */
6210         7 + /* PIPELINE_SYNC */
6211         SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6212         SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6213         2 + /* VM_FLUSH */
6214         8 + /* FENCE for VM_FLUSH */
6215         20 + /* GDS switch */
6216         5 + /* COND_EXEC */
6217         7 + /* HDP_flush */
6218         4 + /* VGT_flush */
6219         31 + /* DE_META */
6220         3 + /* CNTX_CTRL */
6221         5 + /* HDP_INVL */
6222         8 + 8 + /* FENCE x2 */
6223         8, /* gfx_v11_0_emit_mem_sync */
6224     .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
6225     .emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6226     .emit_fence = gfx_v11_0_ring_emit_fence,
6227     .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6228     .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6229     .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6230     .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6231     .test_ring = gfx_v11_0_ring_test_ring,
6232     .test_ib = gfx_v11_0_ring_test_ib,
6233     .insert_nop = amdgpu_ring_insert_nop,
6234     .pad_ib = amdgpu_ring_generic_pad_ib,
6235     .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6236     .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6237     .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
6238     .preempt_ib = gfx_v11_0_ring_preempt_ib,
6239     .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6240     .emit_wreg = gfx_v11_0_ring_emit_wreg,
6241     .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6242     .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6243     .soft_recovery = gfx_v11_0_ring_soft_recovery,
6244     .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6245 };
6246 
6247 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6248     .type = AMDGPU_RING_TYPE_COMPUTE,
6249     .align_mask = 0xff,
6250     .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6251     .support_64bit_ptrs = true,
6252     .vmhub = AMDGPU_GFXHUB_0,
6253     .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6254     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6255     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6256     .emit_frame_size =
6257         20 + /* gfx_v11_0_ring_emit_gds_switch */
6258         7 + /* gfx_v11_0_ring_emit_hdp_flush */
6259         5 + /* hdp invalidate */
6260         7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6261         SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6262         SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6263         2 + /* gfx_v11_0_ring_emit_vm_flush */
6264         8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6265         8, /* gfx_v11_0_emit_mem_sync */
6266     .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6267     .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6268     .emit_fence = gfx_v11_0_ring_emit_fence,
6269     .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6270     .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6271     .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6272     .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6273     .test_ring = gfx_v11_0_ring_test_ring,
6274     .test_ib = gfx_v11_0_ring_test_ib,
6275     .insert_nop = amdgpu_ring_insert_nop,
6276     .pad_ib = amdgpu_ring_generic_pad_ib,
6277     .emit_wreg = gfx_v11_0_ring_emit_wreg,
6278     .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6279     .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6280     .emit_mem_sync = gfx_v11_0_emit_mem_sync,
6281 };
6282 
6283 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6284     .type = AMDGPU_RING_TYPE_KIQ,
6285     .align_mask = 0xff,
6286     .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6287     .support_64bit_ptrs = true,
6288     .vmhub = AMDGPU_GFXHUB_0,
6289     .get_rptr = gfx_v11_0_ring_get_rptr_compute,
6290     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
6291     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
6292     .emit_frame_size =
6293         20 + /* gfx_v11_0_ring_emit_gds_switch */
6294         7 + /* gfx_v11_0_ring_emit_hdp_flush */
6295         5 + /*hdp invalidate */
6296         7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6297         SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6298         SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6299         2 + /* gfx_v11_0_ring_emit_vm_flush */
6300         8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6301     .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
6302     .emit_ib = gfx_v11_0_ring_emit_ib_compute,
6303     .emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6304     .test_ring = gfx_v11_0_ring_test_ring,
6305     .test_ib = gfx_v11_0_ring_test_ib,
6306     .insert_nop = amdgpu_ring_insert_nop,
6307     .pad_ib = amdgpu_ring_generic_pad_ib,
6308     .emit_rreg = gfx_v11_0_ring_emit_rreg,
6309     .emit_wreg = gfx_v11_0_ring_emit_wreg,
6310     .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6311     .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6312 };
6313 
6314 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6315 {
6316     int i;
6317 
6318     adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6319 
6320     for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6321         adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6322 
6323     for (i = 0; i < adev->gfx.num_compute_rings; i++)
6324         adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6325 }
6326 
6327 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6328     .set = gfx_v11_0_set_eop_interrupt_state,
6329     .process = gfx_v11_0_eop_irq,
6330 };
6331 
6332 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6333     .set = gfx_v11_0_set_priv_reg_fault_state,
6334     .process = gfx_v11_0_priv_reg_irq,
6335 };
6336 
6337 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
6338     .set = gfx_v11_0_set_priv_inst_fault_state,
6339     .process = gfx_v11_0_priv_inst_irq,
6340 };
6341 
6342 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
6343 {
6344     adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6345     adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
6346 
6347     adev->gfx.priv_reg_irq.num_types = 1;
6348     adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
6349 
6350     adev->gfx.priv_inst_irq.num_types = 1;
6351     adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
6352 }
6353 
6354 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
6355 {
6356     if (adev->flags & AMD_IS_APU)
6357         adev->gfx.imu.mode = MISSION_MODE;
6358     else
6359         adev->gfx.imu.mode = DEBUG_MODE;
6360 
6361     adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
6362 }
6363 
6364 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
6365 {
6366     adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
6367 }
6368 
6369 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
6370 {
6371     unsigned total_cu = adev->gfx.config.max_cu_per_sh *
6372                 adev->gfx.config.max_sh_per_se *
6373                 adev->gfx.config.max_shader_engines;
6374 
6375     adev->gds.gds_size = 0x1000;
6376     adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
6377     adev->gds.gws_size = 64;
6378     adev->gds.oa_size = 16;
6379 }
6380 
6381 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
6382 {
6383     /* set gfx eng mqd */
6384     adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
6385         sizeof(struct v11_gfx_mqd);
6386     adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
6387         gfx_v11_0_gfx_mqd_init;
6388     /* set compute eng mqd */
6389     adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
6390         sizeof(struct v11_compute_mqd);
6391     adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
6392         gfx_v11_0_compute_mqd_init;
6393 }
6394 
6395 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
6396                               u32 bitmap)
6397 {
6398     u32 data;
6399 
6400     if (!bitmap)
6401         return;
6402 
6403     data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6404     data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6405 
6406     WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
6407 }
6408 
6409 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
6410 {
6411     u32 data, wgp_bitmask;
6412     data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
6413     data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
6414 
6415     data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6416     data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6417 
6418     wgp_bitmask =
6419         amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
6420 
6421     return (~data) & wgp_bitmask;
6422 }
6423 
6424 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
6425 {
6426     u32 wgp_idx, wgp_active_bitmap;
6427     u32 cu_bitmap_per_wgp, cu_active_bitmap;
6428 
6429     wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
6430     cu_active_bitmap = 0;
6431 
6432     for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
6433         /* if there is one WGP enabled, it means 2 CUs will be enabled */
6434         cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
6435         if (wgp_active_bitmap & (1 << wgp_idx))
6436             cu_active_bitmap |= cu_bitmap_per_wgp;
6437     }
6438 
6439     return cu_active_bitmap;
6440 }
6441 
6442 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
6443                  struct amdgpu_cu_info *cu_info)
6444 {
6445     int i, j, k, counter, active_cu_number = 0;
6446     u32 mask, bitmap;
6447     unsigned disable_masks[8 * 2];
6448 
6449     if (!adev || !cu_info)
6450         return -EINVAL;
6451 
6452     amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
6453 
6454     mutex_lock(&adev->grbm_idx_mutex);
6455     for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6456         for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6457             mask = 1;
6458             counter = 0;
6459             gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
6460             if (i < 8 && j < 2)
6461                 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
6462                     adev, disable_masks[i * 2 + j]);
6463             bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
6464 
6465             /**
6466              * GFX11 could support more than 4 SEs, while the bitmap
6467              * in cu_info struct is 4x4 and ioctl interface struct
6468              * drm_amdgpu_info_device should keep stable.
6469              * So we use last two columns of bitmap to store cu mask for
6470              * SEs 4 to 7, the layout of the bitmap is as below:
6471              *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
6472              *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
6473              *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
6474              *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
6475              *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
6476              *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
6477              *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
6478              *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
6479              */
6480             cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
6481 
6482             for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
6483                 if (bitmap & mask)
6484                     counter++;
6485 
6486                 mask <<= 1;
6487             }
6488             active_cu_number += counter;
6489         }
6490     }
6491     gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6492     mutex_unlock(&adev->grbm_idx_mutex);
6493 
6494     cu_info->number = active_cu_number;
6495     cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6496 
6497     return 0;
6498 }
6499 
6500 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
6501 {
6502     .type = AMD_IP_BLOCK_TYPE_GFX,
6503     .major = 11,
6504     .minor = 0,
6505     .rev = 0,
6506     .funcs = &gfx_v11_0_ip_funcs,
6507 };