Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2014 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  */
0023 
0024 #include <linux/delay.h>
0025 #include <linux/kernel.h>
0026 #include <linux/firmware.h>
0027 #include <linux/module.h>
0028 #include <linux/pci.h>
0029 
0030 #include "amdgpu.h"
0031 #include "amdgpu_gfx.h"
0032 #include "amdgpu_ring.h"
0033 #include "vi.h"
0034 #include "vi_structs.h"
0035 #include "vid.h"
0036 #include "amdgpu_ucode.h"
0037 #include "amdgpu_atombios.h"
0038 #include "atombios_i2c.h"
0039 #include "clearstate_vi.h"
0040 
0041 #include "gmc/gmc_8_2_d.h"
0042 #include "gmc/gmc_8_2_sh_mask.h"
0043 
0044 #include "oss/oss_3_0_d.h"
0045 #include "oss/oss_3_0_sh_mask.h"
0046 
0047 #include "bif/bif_5_0_d.h"
0048 #include "bif/bif_5_0_sh_mask.h"
0049 #include "gca/gfx_8_0_d.h"
0050 #include "gca/gfx_8_0_enum.h"
0051 #include "gca/gfx_8_0_sh_mask.h"
0052 
0053 #include "dce/dce_10_0_d.h"
0054 #include "dce/dce_10_0_sh_mask.h"
0055 
0056 #include "smu/smu_7_1_3_d.h"
0057 
0058 #include "ivsrcid/ivsrcid_vislands30.h"
0059 
0060 #define GFX8_NUM_GFX_RINGS     1
0061 #define GFX8_MEC_HPD_SIZE 4096
0062 
0063 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
0064 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
0065 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
0066 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
0067 
0068 #define ARRAY_MODE(x)                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
0069 #define PIPE_CONFIG(x)                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
0070 #define TILE_SPLIT(x)                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
0071 #define MICRO_TILE_MODE_NEW(x)              ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
0072 #define SAMPLE_SPLIT(x)                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
0073 #define BANK_WIDTH(x)                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
0074 #define BANK_HEIGHT(x)                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
0075 #define MACRO_TILE_ASPECT(x)                ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
0076 #define NUM_BANKS(x)                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
0077 
0078 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
0079 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
0080 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
0081 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
0082 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
0083 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
0084 
0085 /* BPM SERDES CMD */
0086 #define SET_BPM_SERDES_CMD    1
0087 #define CLE_BPM_SERDES_CMD    0
0088 
0089 /* BPM Register Address*/
0090 enum {
0091     BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
0092     BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
0093     BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
0094     BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
0095     BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
0096     BPM_REG_FGCG_MAX
0097 };
0098 
0099 #define RLC_FormatDirectRegListLength        14
0100 
0101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
0102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
0103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
0104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
0105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
0106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
0107 
0108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
0109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
0110 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
0111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
0112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
0113 
0114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
0115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
0116 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
0117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
0118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
0119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
0120 
0121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
0122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
0123 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
0124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
0125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
0126 
0127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
0128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
0129 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
0130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
0131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
0132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
0133 
0134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
0135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
0136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
0137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
0138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
0139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
0140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
0141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
0142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
0143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
0144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
0145 
0146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
0147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
0148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
0149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
0150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
0151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
0152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
0153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
0154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
0155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
0156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
0157 
0158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
0159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
0160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
0161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
0162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
0163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
0164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
0165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
0166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
0167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
0168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
0169 
0170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
0171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
0172 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
0173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
0174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
0175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
0176 
0177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
0178 {
0179     {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
0180     {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
0181     {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
0182     {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
0183     {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
0184     {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
0185     {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
0186     {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
0187     {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
0188     {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
0189     {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
0190     {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
0191     {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
0192     {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
0193     {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
0194     {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
0195 };
0196 
0197 static const u32 golden_settings_tonga_a11[] =
0198 {
0199     mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
0200     mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
0201     mmDB_DEBUG2, 0xf00fffff, 0x00000400,
0202     mmGB_GPU_ID, 0x0000000f, 0x00000000,
0203     mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
0204     mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
0205     mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
0206     mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
0207     mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
0208     mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
0209     mmTCC_CTRL, 0x00100000, 0xf31fff7f,
0210     mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
0211     mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
0212     mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
0213     mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
0214     mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
0215 };
0216 
0217 static const u32 tonga_golden_common_all[] =
0218 {
0219     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0220     mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
0221     mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
0222     mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
0223     mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
0224     mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
0225     mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
0226     mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
0227 };
0228 
0229 static const u32 tonga_mgcg_cgcg_init[] =
0230 {
0231     mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
0232     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0233     mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0234     mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
0235     mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
0236     mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
0237     mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
0238     mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
0239     mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
0240     mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
0241     mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
0242     mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
0243     mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
0244     mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
0245     mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
0246     mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
0247     mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
0248     mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
0249     mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
0250     mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
0251     mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
0252     mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
0253     mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
0254     mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
0255     mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
0256     mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
0257     mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
0258     mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0259     mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0260     mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
0261     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0262     mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0263     mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0264     mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
0265     mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0266     mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0267     mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0268     mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0269     mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
0270     mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0271     mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0272     mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0273     mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0274     mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
0275     mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0276     mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0277     mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0278     mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0279     mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
0280     mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0281     mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0282     mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0283     mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0284     mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
0285     mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0286     mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0287     mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0288     mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0289     mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
0290     mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0291     mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0292     mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0293     mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0294     mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
0295     mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0296     mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0297     mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0298     mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0299     mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
0300     mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0301     mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0302     mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
0303     mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
0304     mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
0305     mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
0306 };
0307 
0308 static const u32 golden_settings_vegam_a11[] =
0309 {
0310     mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
0311     mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
0312     mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
0313     mmDB_DEBUG2, 0xf00fffff, 0x00000400,
0314     mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
0315     mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
0316     mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
0317     mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
0318     mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
0319     mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
0320     mmSQ_CONFIG, 0x07f80000, 0x01180000,
0321     mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
0322     mmTCC_CTRL, 0x00100000, 0xf31fff7f,
0323     mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
0324     mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
0325     mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
0326     mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
0327 };
0328 
0329 static const u32 vegam_golden_common_all[] =
0330 {
0331     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0332     mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
0333     mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
0334     mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
0335     mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
0336     mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
0337 };
0338 
0339 static const u32 golden_settings_polaris11_a11[] =
0340 {
0341     mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
0342     mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
0343     mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
0344     mmDB_DEBUG2, 0xf00fffff, 0x00000400,
0345     mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
0346     mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
0347     mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
0348     mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
0349     mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
0350     mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
0351     mmSQ_CONFIG, 0x07f80000, 0x01180000,
0352     mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
0353     mmTCC_CTRL, 0x00100000, 0xf31fff7f,
0354     mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
0355     mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
0356     mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
0357     mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
0358 };
0359 
0360 static const u32 polaris11_golden_common_all[] =
0361 {
0362     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0363     mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
0364     mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
0365     mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
0366     mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
0367     mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
0368 };
0369 
0370 static const u32 golden_settings_polaris10_a11[] =
0371 {
0372     mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
0373     mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
0374     mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
0375     mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
0376     mmDB_DEBUG2, 0xf00fffff, 0x00000400,
0377     mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
0378     mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
0379     mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
0380     mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
0381     mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
0382     mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
0383     mmSQ_CONFIG, 0x07f80000, 0x07180000,
0384     mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
0385     mmTCC_CTRL, 0x00100000, 0xf31fff7f,
0386     mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
0387     mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
0388     mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
0389 };
0390 
0391 static const u32 polaris10_golden_common_all[] =
0392 {
0393     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0394     mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
0395     mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
0396     mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
0397     mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
0398     mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
0399     mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
0400     mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
0401 };
0402 
0403 static const u32 fiji_golden_common_all[] =
0404 {
0405     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0406     mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
0407     mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
0408     mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
0409     mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
0410     mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
0411     mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
0412     mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
0413     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0414     mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
0415 };
0416 
0417 static const u32 golden_settings_fiji_a10[] =
0418 {
0419     mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
0420     mmDB_DEBUG2, 0xf00fffff, 0x00000400,
0421     mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
0422     mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
0423     mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
0424     mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
0425     mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
0426     mmTCC_CTRL, 0x00100000, 0xf31fff7f,
0427     mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
0428     mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
0429     mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
0430 };
0431 
0432 static const u32 fiji_mgcg_cgcg_init[] =
0433 {
0434     mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
0435     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0436     mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0437     mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
0438     mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
0439     mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
0440     mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
0441     mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
0442     mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
0443     mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
0444     mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
0445     mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
0446     mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
0447     mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
0448     mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
0449     mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
0450     mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
0451     mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
0452     mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
0453     mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
0454     mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
0455     mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
0456     mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
0457     mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
0458     mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
0459     mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
0460     mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
0461     mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0462     mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0463     mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
0464     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0465     mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
0466     mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
0467     mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
0468     mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
0469 };
0470 
0471 static const u32 golden_settings_iceland_a11[] =
0472 {
0473     mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
0474     mmDB_DEBUG2, 0xf00fffff, 0x00000400,
0475     mmDB_DEBUG3, 0xc0000000, 0xc0000000,
0476     mmGB_GPU_ID, 0x0000000f, 0x00000000,
0477     mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
0478     mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
0479     mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
0480     mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
0481     mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
0482     mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
0483     mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
0484     mmTCC_CTRL, 0x00100000, 0xf31fff7f,
0485     mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
0486     mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
0487     mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
0488     mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
0489 };
0490 
0491 static const u32 iceland_golden_common_all[] =
0492 {
0493     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0494     mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
0495     mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
0496     mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
0497     mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
0498     mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
0499     mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
0500     mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
0501 };
0502 
0503 static const u32 iceland_mgcg_cgcg_init[] =
0504 {
0505     mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
0506     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0507     mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0508     mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
0509     mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
0510     mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
0511     mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
0512     mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
0513     mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
0514     mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
0515     mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
0516     mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
0517     mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
0518     mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
0519     mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
0520     mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
0521     mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
0522     mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
0523     mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
0524     mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
0525     mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
0526     mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
0527     mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
0528     mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
0529     mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
0530     mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
0531     mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
0532     mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0533     mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0534     mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
0535     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0536     mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0537     mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0538     mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
0539     mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0540     mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0541     mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0542     mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0543     mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
0544     mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0545     mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0546     mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0547     mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0548     mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
0549     mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0550     mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0551     mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0552     mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0553     mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
0554     mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0555     mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0556     mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0557     mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0558     mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
0559     mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0560     mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0561     mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0562     mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0563     mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
0564     mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0565     mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0566     mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
0567     mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
0568     mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
0569 };
0570 
0571 static const u32 cz_golden_settings_a11[] =
0572 {
0573     mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
0574     mmDB_DEBUG2, 0xf00fffff, 0x00000400,
0575     mmGB_GPU_ID, 0x0000000f, 0x00000000,
0576     mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
0577     mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
0578     mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
0579     mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
0580     mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
0581     mmTCC_CTRL, 0x00100000, 0xf31fff7f,
0582     mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
0583     mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
0584     mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
0585 };
0586 
0587 static const u32 cz_golden_common_all[] =
0588 {
0589     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0590     mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
0591     mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
0592     mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
0593     mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
0594     mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
0595     mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
0596     mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
0597 };
0598 
0599 static const u32 cz_mgcg_cgcg_init[] =
0600 {
0601     mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
0602     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0603     mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0604     mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
0605     mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
0606     mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
0607     mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
0608     mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
0609     mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
0610     mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
0611     mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
0612     mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
0613     mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
0614     mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
0615     mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
0616     mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
0617     mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
0618     mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
0619     mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
0620     mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
0621     mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
0622     mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
0623     mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
0624     mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
0625     mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
0626     mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
0627     mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
0628     mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0629     mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
0630     mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
0631     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0632     mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0633     mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0634     mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
0635     mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0636     mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0637     mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0638     mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0639     mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
0640     mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0641     mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0642     mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0643     mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0644     mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
0645     mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0646     mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0647     mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0648     mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0649     mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
0650     mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0651     mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0652     mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0653     mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0654     mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
0655     mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0656     mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0657     mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0658     mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0659     mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
0660     mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0661     mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0662     mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0663     mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0664     mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
0665     mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0666     mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0667     mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
0668     mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
0669     mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
0670     mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
0671     mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
0672     mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
0673     mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
0674     mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
0675     mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
0676 };
0677 
0678 static const u32 stoney_golden_settings_a11[] =
0679 {
0680     mmDB_DEBUG2, 0xf00fffff, 0x00000400,
0681     mmGB_GPU_ID, 0x0000000f, 0x00000000,
0682     mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
0683     mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
0684     mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
0685     mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
0686     mmTCC_CTRL, 0x00100000, 0xf31fff7f,
0687     mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
0688     mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
0689     mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
0690 };
0691 
0692 static const u32 stoney_golden_common_all[] =
0693 {
0694     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0695     mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
0696     mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
0697     mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
0698     mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
0699     mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
0700     mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
0701     mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
0702 };
0703 
0704 static const u32 stoney_mgcg_cgcg_init[] =
0705 {
0706     mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
0707     mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
0708     mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
0709     mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
0710     mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
0711 };
0712 
0713 
0714 static const char * const sq_edc_source_names[] = {
0715     "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
0716     "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
0717     "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
0718     "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
0719     "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
0720     "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
0721     "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
0722 };
0723 
0724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
0725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
0726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
0727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
0728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
0729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
0730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
0731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
0732 
0733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
0734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
0735 
0736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
0737 {
0738     uint32_t data;
0739 
0740     switch (adev->asic_type) {
0741     case CHIP_TOPAZ:
0742         amdgpu_device_program_register_sequence(adev,
0743                             iceland_mgcg_cgcg_init,
0744                             ARRAY_SIZE(iceland_mgcg_cgcg_init));
0745         amdgpu_device_program_register_sequence(adev,
0746                             golden_settings_iceland_a11,
0747                             ARRAY_SIZE(golden_settings_iceland_a11));
0748         amdgpu_device_program_register_sequence(adev,
0749                             iceland_golden_common_all,
0750                             ARRAY_SIZE(iceland_golden_common_all));
0751         break;
0752     case CHIP_FIJI:
0753         amdgpu_device_program_register_sequence(adev,
0754                             fiji_mgcg_cgcg_init,
0755                             ARRAY_SIZE(fiji_mgcg_cgcg_init));
0756         amdgpu_device_program_register_sequence(adev,
0757                             golden_settings_fiji_a10,
0758                             ARRAY_SIZE(golden_settings_fiji_a10));
0759         amdgpu_device_program_register_sequence(adev,
0760                             fiji_golden_common_all,
0761                             ARRAY_SIZE(fiji_golden_common_all));
0762         break;
0763 
0764     case CHIP_TONGA:
0765         amdgpu_device_program_register_sequence(adev,
0766                             tonga_mgcg_cgcg_init,
0767                             ARRAY_SIZE(tonga_mgcg_cgcg_init));
0768         amdgpu_device_program_register_sequence(adev,
0769                             golden_settings_tonga_a11,
0770                             ARRAY_SIZE(golden_settings_tonga_a11));
0771         amdgpu_device_program_register_sequence(adev,
0772                             tonga_golden_common_all,
0773                             ARRAY_SIZE(tonga_golden_common_all));
0774         break;
0775     case CHIP_VEGAM:
0776         amdgpu_device_program_register_sequence(adev,
0777                             golden_settings_vegam_a11,
0778                             ARRAY_SIZE(golden_settings_vegam_a11));
0779         amdgpu_device_program_register_sequence(adev,
0780                             vegam_golden_common_all,
0781                             ARRAY_SIZE(vegam_golden_common_all));
0782         break;
0783     case CHIP_POLARIS11:
0784     case CHIP_POLARIS12:
0785         amdgpu_device_program_register_sequence(adev,
0786                             golden_settings_polaris11_a11,
0787                             ARRAY_SIZE(golden_settings_polaris11_a11));
0788         amdgpu_device_program_register_sequence(adev,
0789                             polaris11_golden_common_all,
0790                             ARRAY_SIZE(polaris11_golden_common_all));
0791         break;
0792     case CHIP_POLARIS10:
0793         amdgpu_device_program_register_sequence(adev,
0794                             golden_settings_polaris10_a11,
0795                             ARRAY_SIZE(golden_settings_polaris10_a11));
0796         amdgpu_device_program_register_sequence(adev,
0797                             polaris10_golden_common_all,
0798                             ARRAY_SIZE(polaris10_golden_common_all));
0799         data = RREG32_SMC(ixCG_ACLK_CNTL);
0800         data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
0801         data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
0802         WREG32_SMC(ixCG_ACLK_CNTL, data);
0803         if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
0804             ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
0805              (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
0806              (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
0807             amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
0808             amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
0809         }
0810         break;
0811     case CHIP_CARRIZO:
0812         amdgpu_device_program_register_sequence(adev,
0813                             cz_mgcg_cgcg_init,
0814                             ARRAY_SIZE(cz_mgcg_cgcg_init));
0815         amdgpu_device_program_register_sequence(adev,
0816                             cz_golden_settings_a11,
0817                             ARRAY_SIZE(cz_golden_settings_a11));
0818         amdgpu_device_program_register_sequence(adev,
0819                             cz_golden_common_all,
0820                             ARRAY_SIZE(cz_golden_common_all));
0821         break;
0822     case CHIP_STONEY:
0823         amdgpu_device_program_register_sequence(adev,
0824                             stoney_mgcg_cgcg_init,
0825                             ARRAY_SIZE(stoney_mgcg_cgcg_init));
0826         amdgpu_device_program_register_sequence(adev,
0827                             stoney_golden_settings_a11,
0828                             ARRAY_SIZE(stoney_golden_settings_a11));
0829         amdgpu_device_program_register_sequence(adev,
0830                             stoney_golden_common_all,
0831                             ARRAY_SIZE(stoney_golden_common_all));
0832         break;
0833     default:
0834         break;
0835     }
0836 }
0837 
0838 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
0839 {
0840     struct amdgpu_device *adev = ring->adev;
0841     uint32_t tmp = 0;
0842     unsigned i;
0843     int r;
0844 
0845     WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
0846     r = amdgpu_ring_alloc(ring, 3);
0847     if (r)
0848         return r;
0849 
0850     amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
0851     amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
0852     amdgpu_ring_write(ring, 0xDEADBEEF);
0853     amdgpu_ring_commit(ring);
0854 
0855     for (i = 0; i < adev->usec_timeout; i++) {
0856         tmp = RREG32(mmSCRATCH_REG0);
0857         if (tmp == 0xDEADBEEF)
0858             break;
0859         udelay(1);
0860     }
0861 
0862     if (i >= adev->usec_timeout)
0863         r = -ETIMEDOUT;
0864 
0865     return r;
0866 }
0867 
0868 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
0869 {
0870     struct amdgpu_device *adev = ring->adev;
0871     struct amdgpu_ib ib;
0872     struct dma_fence *f = NULL;
0873 
0874     unsigned int index;
0875     uint64_t gpu_addr;
0876     uint32_t tmp;
0877     long r;
0878 
0879     r = amdgpu_device_wb_get(adev, &index);
0880     if (r)
0881         return r;
0882 
0883     gpu_addr = adev->wb.gpu_addr + (index * 4);
0884     adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
0885     memset(&ib, 0, sizeof(ib));
0886     r = amdgpu_ib_get(adev, NULL, 16,
0887                     AMDGPU_IB_POOL_DIRECT, &ib);
0888     if (r)
0889         goto err1;
0890 
0891     ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
0892     ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
0893     ib.ptr[2] = lower_32_bits(gpu_addr);
0894     ib.ptr[3] = upper_32_bits(gpu_addr);
0895     ib.ptr[4] = 0xDEADBEEF;
0896     ib.length_dw = 5;
0897 
0898     r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
0899     if (r)
0900         goto err2;
0901 
0902     r = dma_fence_wait_timeout(f, false, timeout);
0903     if (r == 0) {
0904         r = -ETIMEDOUT;
0905         goto err2;
0906     } else if (r < 0) {
0907         goto err2;
0908     }
0909 
0910     tmp = adev->wb.wb[index];
0911     if (tmp == 0xDEADBEEF)
0912         r = 0;
0913     else
0914         r = -EINVAL;
0915 
0916 err2:
0917     amdgpu_ib_free(adev, &ib, NULL);
0918     dma_fence_put(f);
0919 err1:
0920     amdgpu_device_wb_free(adev, index);
0921     return r;
0922 }
0923 
0924 
0925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
0926 {
0927     release_firmware(adev->gfx.pfp_fw);
0928     adev->gfx.pfp_fw = NULL;
0929     release_firmware(adev->gfx.me_fw);
0930     adev->gfx.me_fw = NULL;
0931     release_firmware(adev->gfx.ce_fw);
0932     adev->gfx.ce_fw = NULL;
0933     release_firmware(adev->gfx.rlc_fw);
0934     adev->gfx.rlc_fw = NULL;
0935     release_firmware(adev->gfx.mec_fw);
0936     adev->gfx.mec_fw = NULL;
0937     if ((adev->asic_type != CHIP_STONEY) &&
0938         (adev->asic_type != CHIP_TOPAZ))
0939         release_firmware(adev->gfx.mec2_fw);
0940     adev->gfx.mec2_fw = NULL;
0941 
0942     kfree(adev->gfx.rlc.register_list_format);
0943 }
0944 
0945 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
0946 {
0947     const char *chip_name;
0948     char fw_name[30];
0949     int err;
0950     struct amdgpu_firmware_info *info = NULL;
0951     const struct common_firmware_header *header = NULL;
0952     const struct gfx_firmware_header_v1_0 *cp_hdr;
0953     const struct rlc_firmware_header_v2_0 *rlc_hdr;
0954     unsigned int *tmp = NULL, i;
0955 
0956     DRM_DEBUG("\n");
0957 
0958     switch (adev->asic_type) {
0959     case CHIP_TOPAZ:
0960         chip_name = "topaz";
0961         break;
0962     case CHIP_TONGA:
0963         chip_name = "tonga";
0964         break;
0965     case CHIP_CARRIZO:
0966         chip_name = "carrizo";
0967         break;
0968     case CHIP_FIJI:
0969         chip_name = "fiji";
0970         break;
0971     case CHIP_STONEY:
0972         chip_name = "stoney";
0973         break;
0974     case CHIP_POLARIS10:
0975         chip_name = "polaris10";
0976         break;
0977     case CHIP_POLARIS11:
0978         chip_name = "polaris11";
0979         break;
0980     case CHIP_POLARIS12:
0981         chip_name = "polaris12";
0982         break;
0983     case CHIP_VEGAM:
0984         chip_name = "vegam";
0985         break;
0986     default:
0987         BUG();
0988     }
0989 
0990     if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
0991         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
0992         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
0993         if (err == -ENOENT) {
0994             snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
0995             err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
0996         }
0997     } else {
0998         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
0999         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000     }
1001     if (err)
1002         goto out;
1003     err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1004     if (err)
1005         goto out;
1006     cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1007     adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1008     adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1009 
1010     if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1011         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1012         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1013         if (err == -ENOENT) {
1014             snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015             err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016         }
1017     } else {
1018         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020     }
1021     if (err)
1022         goto out;
1023     err = amdgpu_ucode_validate(adev->gfx.me_fw);
1024     if (err)
1025         goto out;
1026     cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1027     adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1028 
1029     adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1030 
1031     if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1032         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1033         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1034         if (err == -ENOENT) {
1035             snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036             err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037         }
1038     } else {
1039         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041     }
1042     if (err)
1043         goto out;
1044     err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1045     if (err)
1046         goto out;
1047     cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1048     adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1049     adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1050 
1051     /*
1052      * Support for MCBP/Virtualization in combination with chained IBs is
1053      * formal released on feature version #46
1054      */
1055     if (adev->gfx.ce_feature_version >= 46 &&
1056         adev->gfx.pfp_feature_version >= 46) {
1057         adev->virt.chained_ib_support = true;
1058         DRM_INFO("Chained IB support enabled!\n");
1059     } else
1060         adev->virt.chained_ib_support = false;
1061 
1062     snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1063     err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1064     if (err)
1065         goto out;
1066     err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1067     rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1068     adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1069     adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1070 
1071     adev->gfx.rlc.save_and_restore_offset =
1072             le32_to_cpu(rlc_hdr->save_and_restore_offset);
1073     adev->gfx.rlc.clear_state_descriptor_offset =
1074             le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1075     adev->gfx.rlc.avail_scratch_ram_locations =
1076             le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1077     adev->gfx.rlc.reg_restore_list_size =
1078             le32_to_cpu(rlc_hdr->reg_restore_list_size);
1079     adev->gfx.rlc.reg_list_format_start =
1080             le32_to_cpu(rlc_hdr->reg_list_format_start);
1081     adev->gfx.rlc.reg_list_format_separate_start =
1082             le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1083     adev->gfx.rlc.starting_offsets_start =
1084             le32_to_cpu(rlc_hdr->starting_offsets_start);
1085     adev->gfx.rlc.reg_list_format_size_bytes =
1086             le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1087     adev->gfx.rlc.reg_list_size_bytes =
1088             le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1089 
1090     adev->gfx.rlc.register_list_format =
1091             kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1092                     adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1093 
1094     if (!adev->gfx.rlc.register_list_format) {
1095         err = -ENOMEM;
1096         goto out;
1097     }
1098 
1099     tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100             le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1101     for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1102         adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1103 
1104     adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1105 
1106     tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1107             le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1108     for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1109         adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1110 
1111     if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1112         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1113         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1114         if (err == -ENOENT) {
1115             snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116             err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117         }
1118     } else {
1119         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121     }
1122     if (err)
1123         goto out;
1124     err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1125     if (err)
1126         goto out;
1127     cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1128     adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1129     adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1130 
1131     if ((adev->asic_type != CHIP_STONEY) &&
1132         (adev->asic_type != CHIP_TOPAZ)) {
1133         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1134             snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1135             err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1136             if (err == -ENOENT) {
1137                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139             }
1140         } else {
1141             snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142             err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143         }
1144         if (!err) {
1145             err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1146             if (err)
1147                 goto out;
1148             cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1149                 adev->gfx.mec2_fw->data;
1150             adev->gfx.mec2_fw_version =
1151                 le32_to_cpu(cp_hdr->header.ucode_version);
1152             adev->gfx.mec2_feature_version =
1153                 le32_to_cpu(cp_hdr->ucode_feature_version);
1154         } else {
1155             err = 0;
1156             adev->gfx.mec2_fw = NULL;
1157         }
1158     }
1159 
1160     info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1161     info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1162     info->fw = adev->gfx.pfp_fw;
1163     header = (const struct common_firmware_header *)info->fw->data;
1164     adev->firmware.fw_size +=
1165         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166 
1167     info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1168     info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1169     info->fw = adev->gfx.me_fw;
1170     header = (const struct common_firmware_header *)info->fw->data;
1171     adev->firmware.fw_size +=
1172         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173 
1174     info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1175     info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1176     info->fw = adev->gfx.ce_fw;
1177     header = (const struct common_firmware_header *)info->fw->data;
1178     adev->firmware.fw_size +=
1179         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180 
1181     info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1182     info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1183     info->fw = adev->gfx.rlc_fw;
1184     header = (const struct common_firmware_header *)info->fw->data;
1185     adev->firmware.fw_size +=
1186         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187 
1188     info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1189     info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1190     info->fw = adev->gfx.mec_fw;
1191     header = (const struct common_firmware_header *)info->fw->data;
1192     adev->firmware.fw_size +=
1193         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194 
1195     /* we need account JT in */
1196     cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1197     adev->firmware.fw_size +=
1198         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1199 
1200     if (amdgpu_sriov_vf(adev)) {
1201         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1202         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1203         info->fw = adev->gfx.mec_fw;
1204         adev->firmware.fw_size +=
1205             ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1206     }
1207 
1208     if (adev->gfx.mec2_fw) {
1209         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1210         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1211         info->fw = adev->gfx.mec2_fw;
1212         header = (const struct common_firmware_header *)info->fw->data;
1213         adev->firmware.fw_size +=
1214             ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1215     }
1216 
1217 out:
1218     if (err) {
1219         dev_err(adev->dev,
1220             "gfx8: Failed to load firmware \"%s\"\n",
1221             fw_name);
1222         release_firmware(adev->gfx.pfp_fw);
1223         adev->gfx.pfp_fw = NULL;
1224         release_firmware(adev->gfx.me_fw);
1225         adev->gfx.me_fw = NULL;
1226         release_firmware(adev->gfx.ce_fw);
1227         adev->gfx.ce_fw = NULL;
1228         release_firmware(adev->gfx.rlc_fw);
1229         adev->gfx.rlc_fw = NULL;
1230         release_firmware(adev->gfx.mec_fw);
1231         adev->gfx.mec_fw = NULL;
1232         release_firmware(adev->gfx.mec2_fw);
1233         adev->gfx.mec2_fw = NULL;
1234     }
1235     return err;
1236 }
1237 
1238 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1239                     volatile u32 *buffer)
1240 {
1241     u32 count = 0, i;
1242     const struct cs_section_def *sect = NULL;
1243     const struct cs_extent_def *ext = NULL;
1244 
1245     if (adev->gfx.rlc.cs_data == NULL)
1246         return;
1247     if (buffer == NULL)
1248         return;
1249 
1250     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251     buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1252 
1253     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1254     buffer[count++] = cpu_to_le32(0x80000000);
1255     buffer[count++] = cpu_to_le32(0x80000000);
1256 
1257     for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1258         for (ext = sect->section; ext->extent != NULL; ++ext) {
1259             if (sect->id == SECT_CONTEXT) {
1260                 buffer[count++] =
1261                     cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1262                 buffer[count++] = cpu_to_le32(ext->reg_index -
1263                         PACKET3_SET_CONTEXT_REG_START);
1264                 for (i = 0; i < ext->reg_count; i++)
1265                     buffer[count++] = cpu_to_le32(ext->extent[i]);
1266             } else {
1267                 return;
1268             }
1269         }
1270     }
1271 
1272     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1273     buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1274             PACKET3_SET_CONTEXT_REG_START);
1275     buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1276     buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1277 
1278     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1279     buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1280 
1281     buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1282     buffer[count++] = cpu_to_le32(0);
1283 }
1284 
1285 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1286 {
1287     if (adev->asic_type == CHIP_CARRIZO)
1288         return 5;
1289     else
1290         return 4;
1291 }
1292 
1293 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1294 {
1295     const struct cs_section_def *cs_data;
1296     int r;
1297 
1298     adev->gfx.rlc.cs_data = vi_cs_data;
1299 
1300     cs_data = adev->gfx.rlc.cs_data;
1301 
1302     if (cs_data) {
1303         /* init clear state block */
1304         r = amdgpu_gfx_rlc_init_csb(adev);
1305         if (r)
1306             return r;
1307     }
1308 
1309     if ((adev->asic_type == CHIP_CARRIZO) ||
1310         (adev->asic_type == CHIP_STONEY)) {
1311         adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1312         r = amdgpu_gfx_rlc_init_cpt(adev);
1313         if (r)
1314             return r;
1315     }
1316 
1317     /* init spm vmid with 0xf */
1318     if (adev->gfx.rlc.funcs->update_spm_vmid)
1319         adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1320 
1321     return 0;
1322 }
1323 
1324 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1325 {
1326     amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1327 }
1328 
1329 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1330 {
1331     int r;
1332     u32 *hpd;
1333     size_t mec_hpd_size;
1334 
1335     bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1336 
1337     /* take ownership of the relevant compute queues */
1338     amdgpu_gfx_compute_queue_acquire(adev);
1339 
1340     mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1341     if (mec_hpd_size) {
1342         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1343                           AMDGPU_GEM_DOMAIN_VRAM,
1344                           &adev->gfx.mec.hpd_eop_obj,
1345                           &adev->gfx.mec.hpd_eop_gpu_addr,
1346                           (void **)&hpd);
1347         if (r) {
1348             dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1349             return r;
1350         }
1351 
1352         memset(hpd, 0, mec_hpd_size);
1353 
1354         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1355         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1356     }
1357 
1358     return 0;
1359 }
1360 
1361 static const u32 vgpr_init_compute_shader[] =
1362 {
1363     0x7e000209, 0x7e020208,
1364     0x7e040207, 0x7e060206,
1365     0x7e080205, 0x7e0a0204,
1366     0x7e0c0203, 0x7e0e0202,
1367     0x7e100201, 0x7e120200,
1368     0x7e140209, 0x7e160208,
1369     0x7e180207, 0x7e1a0206,
1370     0x7e1c0205, 0x7e1e0204,
1371     0x7e200203, 0x7e220202,
1372     0x7e240201, 0x7e260200,
1373     0x7e280209, 0x7e2a0208,
1374     0x7e2c0207, 0x7e2e0206,
1375     0x7e300205, 0x7e320204,
1376     0x7e340203, 0x7e360202,
1377     0x7e380201, 0x7e3a0200,
1378     0x7e3c0209, 0x7e3e0208,
1379     0x7e400207, 0x7e420206,
1380     0x7e440205, 0x7e460204,
1381     0x7e480203, 0x7e4a0202,
1382     0x7e4c0201, 0x7e4e0200,
1383     0x7e500209, 0x7e520208,
1384     0x7e540207, 0x7e560206,
1385     0x7e580205, 0x7e5a0204,
1386     0x7e5c0203, 0x7e5e0202,
1387     0x7e600201, 0x7e620200,
1388     0x7e640209, 0x7e660208,
1389     0x7e680207, 0x7e6a0206,
1390     0x7e6c0205, 0x7e6e0204,
1391     0x7e700203, 0x7e720202,
1392     0x7e740201, 0x7e760200,
1393     0x7e780209, 0x7e7a0208,
1394     0x7e7c0207, 0x7e7e0206,
1395     0xbf8a0000, 0xbf810000,
1396 };
1397 
1398 static const u32 sgpr_init_compute_shader[] =
1399 {
1400     0xbe8a0100, 0xbe8c0102,
1401     0xbe8e0104, 0xbe900106,
1402     0xbe920108, 0xbe940100,
1403     0xbe960102, 0xbe980104,
1404     0xbe9a0106, 0xbe9c0108,
1405     0xbe9e0100, 0xbea00102,
1406     0xbea20104, 0xbea40106,
1407     0xbea60108, 0xbea80100,
1408     0xbeaa0102, 0xbeac0104,
1409     0xbeae0106, 0xbeb00108,
1410     0xbeb20100, 0xbeb40102,
1411     0xbeb60104, 0xbeb80106,
1412     0xbeba0108, 0xbebc0100,
1413     0xbebe0102, 0xbec00104,
1414     0xbec20106, 0xbec40108,
1415     0xbec60100, 0xbec80102,
1416     0xbee60004, 0xbee70005,
1417     0xbeea0006, 0xbeeb0007,
1418     0xbee80008, 0xbee90009,
1419     0xbefc0000, 0xbf8a0000,
1420     0xbf810000, 0x00000000,
1421 };
1422 
1423 static const u32 vgpr_init_regs[] =
1424 {
1425     mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1426     mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1427     mmCOMPUTE_NUM_THREAD_X, 256*4,
1428     mmCOMPUTE_NUM_THREAD_Y, 1,
1429     mmCOMPUTE_NUM_THREAD_Z, 1,
1430     mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1431     mmCOMPUTE_PGM_RSRC2, 20,
1432     mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1433     mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1434     mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1435     mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1436     mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1437     mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1438     mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1439     mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1440     mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1441     mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1442 };
1443 
1444 static const u32 sgpr1_init_regs[] =
1445 {
1446     mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1447     mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1448     mmCOMPUTE_NUM_THREAD_X, 256*5,
1449     mmCOMPUTE_NUM_THREAD_Y, 1,
1450     mmCOMPUTE_NUM_THREAD_Z, 1,
1451     mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1452     mmCOMPUTE_PGM_RSRC2, 20,
1453     mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1454     mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1455     mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1456     mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1457     mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1458     mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1459     mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1460     mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1461     mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1462     mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1463 };
1464 
1465 static const u32 sgpr2_init_regs[] =
1466 {
1467     mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1468     mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1469     mmCOMPUTE_NUM_THREAD_X, 256*5,
1470     mmCOMPUTE_NUM_THREAD_Y, 1,
1471     mmCOMPUTE_NUM_THREAD_Z, 1,
1472     mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1473     mmCOMPUTE_PGM_RSRC2, 20,
1474     mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1475     mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1476     mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1477     mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1478     mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1479     mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1480     mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1481     mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1482     mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1483     mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1484 };
1485 
1486 static const u32 sec_ded_counter_registers[] =
1487 {
1488     mmCPC_EDC_ATC_CNT,
1489     mmCPC_EDC_SCRATCH_CNT,
1490     mmCPC_EDC_UCODE_CNT,
1491     mmCPF_EDC_ATC_CNT,
1492     mmCPF_EDC_ROQ_CNT,
1493     mmCPF_EDC_TAG_CNT,
1494     mmCPG_EDC_ATC_CNT,
1495     mmCPG_EDC_DMA_CNT,
1496     mmCPG_EDC_TAG_CNT,
1497     mmDC_EDC_CSINVOC_CNT,
1498     mmDC_EDC_RESTORE_CNT,
1499     mmDC_EDC_STATE_CNT,
1500     mmGDS_EDC_CNT,
1501     mmGDS_EDC_GRBM_CNT,
1502     mmGDS_EDC_OA_DED,
1503     mmSPI_EDC_CNT,
1504     mmSQC_ATC_EDC_GATCL1_CNT,
1505     mmSQC_EDC_CNT,
1506     mmSQ_EDC_DED_CNT,
1507     mmSQ_EDC_INFO,
1508     mmSQ_EDC_SEC_CNT,
1509     mmTCC_EDC_CNT,
1510     mmTCP_ATC_EDC_GATCL1_CNT,
1511     mmTCP_EDC_CNT,
1512     mmTD_EDC_CNT
1513 };
1514 
1515 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1516 {
1517     struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1518     struct amdgpu_ib ib;
1519     struct dma_fence *f = NULL;
1520     int r, i;
1521     u32 tmp;
1522     unsigned total_size, vgpr_offset, sgpr_offset;
1523     u64 gpu_addr;
1524 
1525     /* only supported on CZ */
1526     if (adev->asic_type != CHIP_CARRIZO)
1527         return 0;
1528 
1529     /* bail if the compute ring is not ready */
1530     if (!ring->sched.ready)
1531         return 0;
1532 
1533     tmp = RREG32(mmGB_EDC_MODE);
1534     WREG32(mmGB_EDC_MODE, 0);
1535 
1536     total_size =
1537         (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1538     total_size +=
1539         (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1540     total_size +=
1541         (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1542     total_size = ALIGN(total_size, 256);
1543     vgpr_offset = total_size;
1544     total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1545     sgpr_offset = total_size;
1546     total_size += sizeof(sgpr_init_compute_shader);
1547 
1548     /* allocate an indirect buffer to put the commands in */
1549     memset(&ib, 0, sizeof(ib));
1550     r = amdgpu_ib_get(adev, NULL, total_size,
1551                     AMDGPU_IB_POOL_DIRECT, &ib);
1552     if (r) {
1553         DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1554         return r;
1555     }
1556 
1557     /* load the compute shaders */
1558     for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1559         ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1560 
1561     for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1562         ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1563 
1564     /* init the ib length to 0 */
1565     ib.length_dw = 0;
1566 
1567     /* VGPR */
1568     /* write the register state for the compute dispatch */
1569     for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1570         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1571         ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1572         ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1573     }
1574     /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1575     gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1576     ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1577     ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1578     ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1579     ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1580 
1581     /* write dispatch packet */
1582     ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1583     ib.ptr[ib.length_dw++] = 8; /* x */
1584     ib.ptr[ib.length_dw++] = 1; /* y */
1585     ib.ptr[ib.length_dw++] = 1; /* z */
1586     ib.ptr[ib.length_dw++] =
1587         REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1588 
1589     /* write CS partial flush packet */
1590     ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1591     ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1592 
1593     /* SGPR1 */
1594     /* write the register state for the compute dispatch */
1595     for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1596         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1597         ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1598         ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1599     }
1600     /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1601     gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1602     ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1603     ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1604     ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1605     ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1606 
1607     /* write dispatch packet */
1608     ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1609     ib.ptr[ib.length_dw++] = 8; /* x */
1610     ib.ptr[ib.length_dw++] = 1; /* y */
1611     ib.ptr[ib.length_dw++] = 1; /* z */
1612     ib.ptr[ib.length_dw++] =
1613         REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1614 
1615     /* write CS partial flush packet */
1616     ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1617     ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1618 
1619     /* SGPR2 */
1620     /* write the register state for the compute dispatch */
1621     for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1622         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1623         ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1624         ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1625     }
1626     /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1627     gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1628     ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1629     ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1630     ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1631     ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1632 
1633     /* write dispatch packet */
1634     ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1635     ib.ptr[ib.length_dw++] = 8; /* x */
1636     ib.ptr[ib.length_dw++] = 1; /* y */
1637     ib.ptr[ib.length_dw++] = 1; /* z */
1638     ib.ptr[ib.length_dw++] =
1639         REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1640 
1641     /* write CS partial flush packet */
1642     ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1643     ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1644 
1645     /* shedule the ib on the ring */
1646     r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1647     if (r) {
1648         DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1649         goto fail;
1650     }
1651 
1652     /* wait for the GPU to finish processing the IB */
1653     r = dma_fence_wait(f, false);
1654     if (r) {
1655         DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1656         goto fail;
1657     }
1658 
1659     tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1660     tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1661     WREG32(mmGB_EDC_MODE, tmp);
1662 
1663     tmp = RREG32(mmCC_GC_EDC_CONFIG);
1664     tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1665     WREG32(mmCC_GC_EDC_CONFIG, tmp);
1666 
1667 
1668     /* read back registers to clear the counters */
1669     for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1670         RREG32(sec_ded_counter_registers[i]);
1671 
1672 fail:
1673     amdgpu_ib_free(adev, &ib, NULL);
1674     dma_fence_put(f);
1675 
1676     return r;
1677 }
1678 
1679 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1680 {
1681     u32 gb_addr_config;
1682     u32 mc_arb_ramcfg;
1683     u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1684     u32 tmp;
1685     int ret;
1686 
1687     switch (adev->asic_type) {
1688     case CHIP_TOPAZ:
1689         adev->gfx.config.max_shader_engines = 1;
1690         adev->gfx.config.max_tile_pipes = 2;
1691         adev->gfx.config.max_cu_per_sh = 6;
1692         adev->gfx.config.max_sh_per_se = 1;
1693         adev->gfx.config.max_backends_per_se = 2;
1694         adev->gfx.config.max_texture_channel_caches = 2;
1695         adev->gfx.config.max_gprs = 256;
1696         adev->gfx.config.max_gs_threads = 32;
1697         adev->gfx.config.max_hw_contexts = 8;
1698 
1699         adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1700         adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1701         adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1702         adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1703         gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1704         break;
1705     case CHIP_FIJI:
1706         adev->gfx.config.max_shader_engines = 4;
1707         adev->gfx.config.max_tile_pipes = 16;
1708         adev->gfx.config.max_cu_per_sh = 16;
1709         adev->gfx.config.max_sh_per_se = 1;
1710         adev->gfx.config.max_backends_per_se = 4;
1711         adev->gfx.config.max_texture_channel_caches = 16;
1712         adev->gfx.config.max_gprs = 256;
1713         adev->gfx.config.max_gs_threads = 32;
1714         adev->gfx.config.max_hw_contexts = 8;
1715 
1716         adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717         adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718         adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719         adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720         gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721         break;
1722     case CHIP_POLARIS11:
1723     case CHIP_POLARIS12:
1724         ret = amdgpu_atombios_get_gfx_info(adev);
1725         if (ret)
1726             return ret;
1727         adev->gfx.config.max_gprs = 256;
1728         adev->gfx.config.max_gs_threads = 32;
1729         adev->gfx.config.max_hw_contexts = 8;
1730 
1731         adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732         adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733         adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734         adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735         gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1736         break;
1737     case CHIP_POLARIS10:
1738     case CHIP_VEGAM:
1739         ret = amdgpu_atombios_get_gfx_info(adev);
1740         if (ret)
1741             return ret;
1742         adev->gfx.config.max_gprs = 256;
1743         adev->gfx.config.max_gs_threads = 32;
1744         adev->gfx.config.max_hw_contexts = 8;
1745 
1746         adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1747         adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1748         adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1749         adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1750         gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1751         break;
1752     case CHIP_TONGA:
1753         adev->gfx.config.max_shader_engines = 4;
1754         adev->gfx.config.max_tile_pipes = 8;
1755         adev->gfx.config.max_cu_per_sh = 8;
1756         adev->gfx.config.max_sh_per_se = 1;
1757         adev->gfx.config.max_backends_per_se = 2;
1758         adev->gfx.config.max_texture_channel_caches = 8;
1759         adev->gfx.config.max_gprs = 256;
1760         adev->gfx.config.max_gs_threads = 32;
1761         adev->gfx.config.max_hw_contexts = 8;
1762 
1763         adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764         adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765         adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766         adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767         gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1768         break;
1769     case CHIP_CARRIZO:
1770         adev->gfx.config.max_shader_engines = 1;
1771         adev->gfx.config.max_tile_pipes = 2;
1772         adev->gfx.config.max_sh_per_se = 1;
1773         adev->gfx.config.max_backends_per_se = 2;
1774         adev->gfx.config.max_cu_per_sh = 8;
1775         adev->gfx.config.max_texture_channel_caches = 2;
1776         adev->gfx.config.max_gprs = 256;
1777         adev->gfx.config.max_gs_threads = 32;
1778         adev->gfx.config.max_hw_contexts = 8;
1779 
1780         adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1781         adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1782         adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1783         adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1784         gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1785         break;
1786     case CHIP_STONEY:
1787         adev->gfx.config.max_shader_engines = 1;
1788         adev->gfx.config.max_tile_pipes = 2;
1789         adev->gfx.config.max_sh_per_se = 1;
1790         adev->gfx.config.max_backends_per_se = 1;
1791         adev->gfx.config.max_cu_per_sh = 3;
1792         adev->gfx.config.max_texture_channel_caches = 2;
1793         adev->gfx.config.max_gprs = 256;
1794         adev->gfx.config.max_gs_threads = 16;
1795         adev->gfx.config.max_hw_contexts = 8;
1796 
1797         adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798         adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799         adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800         adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1801         gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1802         break;
1803     default:
1804         adev->gfx.config.max_shader_engines = 2;
1805         adev->gfx.config.max_tile_pipes = 4;
1806         adev->gfx.config.max_cu_per_sh = 2;
1807         adev->gfx.config.max_sh_per_se = 1;
1808         adev->gfx.config.max_backends_per_se = 2;
1809         adev->gfx.config.max_texture_channel_caches = 4;
1810         adev->gfx.config.max_gprs = 256;
1811         adev->gfx.config.max_gs_threads = 32;
1812         adev->gfx.config.max_hw_contexts = 8;
1813 
1814         adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1815         adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1816         adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1817         adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1818         gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1819         break;
1820     }
1821 
1822     adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1823     mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1824 
1825     adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1826                 MC_ARB_RAMCFG, NOOFBANK);
1827     adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1828                 MC_ARB_RAMCFG, NOOFRANKS);
1829 
1830     adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1831     adev->gfx.config.mem_max_burst_length_bytes = 256;
1832     if (adev->flags & AMD_IS_APU) {
1833         /* Get memory bank mapping mode. */
1834         tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1835         dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1836         dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1837 
1838         tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1839         dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1840         dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1841 
1842         /* Validate settings in case only one DIMM installed. */
1843         if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1844             dimm00_addr_map = 0;
1845         if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1846             dimm01_addr_map = 0;
1847         if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1848             dimm10_addr_map = 0;
1849         if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1850             dimm11_addr_map = 0;
1851 
1852         /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1853         /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1854         if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1855             adev->gfx.config.mem_row_size_in_kb = 2;
1856         else
1857             adev->gfx.config.mem_row_size_in_kb = 1;
1858     } else {
1859         tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1860         adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1861         if (adev->gfx.config.mem_row_size_in_kb > 4)
1862             adev->gfx.config.mem_row_size_in_kb = 4;
1863     }
1864 
1865     adev->gfx.config.shader_engine_tile_size = 32;
1866     adev->gfx.config.num_gpus = 1;
1867     adev->gfx.config.multi_gpu_tile_size = 64;
1868 
1869     /* fix up row size */
1870     switch (adev->gfx.config.mem_row_size_in_kb) {
1871     case 1:
1872     default:
1873         gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1874         break;
1875     case 2:
1876         gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1877         break;
1878     case 4:
1879         gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1880         break;
1881     }
1882     adev->gfx.config.gb_addr_config = gb_addr_config;
1883 
1884     return 0;
1885 }
1886 
1887 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1888                     int mec, int pipe, int queue)
1889 {
1890     int r;
1891     unsigned irq_type;
1892     struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1893     unsigned int hw_prio;
1894 
1895     ring = &adev->gfx.compute_ring[ring_id];
1896 
1897     /* mec0 is me1 */
1898     ring->me = mec + 1;
1899     ring->pipe = pipe;
1900     ring->queue = queue;
1901 
1902     ring->ring_obj = NULL;
1903     ring->use_doorbell = true;
1904     ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1905     ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1906                 + (ring_id * GFX8_MEC_HPD_SIZE);
1907     sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1908 
1909     irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1910         + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1911         + ring->pipe;
1912 
1913     hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1914             AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1915     /* type-2 packets are deprecated on MEC, use type-3 instead */
1916     r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1917                  hw_prio, NULL);
1918     if (r)
1919         return r;
1920 
1921 
1922     return 0;
1923 }
1924 
1925 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1926 
1927 static int gfx_v8_0_sw_init(void *handle)
1928 {
1929     int i, j, k, r, ring_id;
1930     struct amdgpu_ring *ring;
1931     struct amdgpu_kiq *kiq;
1932     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1933 
1934     switch (adev->asic_type) {
1935     case CHIP_TONGA:
1936     case CHIP_CARRIZO:
1937     case CHIP_FIJI:
1938     case CHIP_POLARIS10:
1939     case CHIP_POLARIS11:
1940     case CHIP_POLARIS12:
1941     case CHIP_VEGAM:
1942         adev->gfx.mec.num_mec = 2;
1943         break;
1944     case CHIP_TOPAZ:
1945     case CHIP_STONEY:
1946     default:
1947         adev->gfx.mec.num_mec = 1;
1948         break;
1949     }
1950 
1951     adev->gfx.mec.num_pipe_per_mec = 4;
1952     adev->gfx.mec.num_queue_per_pipe = 8;
1953 
1954     /* EOP Event */
1955     r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1956     if (r)
1957         return r;
1958 
1959     /* Privileged reg */
1960     r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1961                   &adev->gfx.priv_reg_irq);
1962     if (r)
1963         return r;
1964 
1965     /* Privileged inst */
1966     r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1967                   &adev->gfx.priv_inst_irq);
1968     if (r)
1969         return r;
1970 
1971     /* Add CP EDC/ECC irq  */
1972     r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1973                   &adev->gfx.cp_ecc_error_irq);
1974     if (r)
1975         return r;
1976 
1977     /* SQ interrupts. */
1978     r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1979                   &adev->gfx.sq_irq);
1980     if (r) {
1981         DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1982         return r;
1983     }
1984 
1985     INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1986 
1987     adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1988 
1989     r = gfx_v8_0_init_microcode(adev);
1990     if (r) {
1991         DRM_ERROR("Failed to load gfx firmware!\n");
1992         return r;
1993     }
1994 
1995     r = adev->gfx.rlc.funcs->init(adev);
1996     if (r) {
1997         DRM_ERROR("Failed to init rlc BOs!\n");
1998         return r;
1999     }
2000 
2001     r = gfx_v8_0_mec_init(adev);
2002     if (r) {
2003         DRM_ERROR("Failed to init MEC BOs!\n");
2004         return r;
2005     }
2006 
2007     /* set up the gfx ring */
2008     for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2009         ring = &adev->gfx.gfx_ring[i];
2010         ring->ring_obj = NULL;
2011         sprintf(ring->name, "gfx");
2012         /* no gfx doorbells on iceland */
2013         if (adev->asic_type != CHIP_TOPAZ) {
2014             ring->use_doorbell = true;
2015             ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2016         }
2017 
2018         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2019                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2020                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2021         if (r)
2022             return r;
2023     }
2024 
2025 
2026     /* set up the compute queues - allocate horizontally across pipes */
2027     ring_id = 0;
2028     for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2029         for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2030             for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2031                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2032                     continue;
2033 
2034                 r = gfx_v8_0_compute_ring_init(adev,
2035                                 ring_id,
2036                                 i, k, j);
2037                 if (r)
2038                     return r;
2039 
2040                 ring_id++;
2041             }
2042         }
2043     }
2044 
2045     r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2046     if (r) {
2047         DRM_ERROR("Failed to init KIQ BOs!\n");
2048         return r;
2049     }
2050 
2051     kiq = &adev->gfx.kiq;
2052     r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2053     if (r)
2054         return r;
2055 
2056     /* create MQD for all compute queues as well as KIQ for SRIOV case */
2057     r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2058     if (r)
2059         return r;
2060 
2061     adev->gfx.ce_ram_size = 0x8000;
2062 
2063     r = gfx_v8_0_gpu_early_init(adev);
2064     if (r)
2065         return r;
2066 
2067     return 0;
2068 }
2069 
2070 static int gfx_v8_0_sw_fini(void *handle)
2071 {
2072     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2073     int i;
2074 
2075     for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2076         amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2077     for (i = 0; i < adev->gfx.num_compute_rings; i++)
2078         amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2079 
2080     amdgpu_gfx_mqd_sw_fini(adev);
2081     amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2082     amdgpu_gfx_kiq_fini(adev);
2083 
2084     gfx_v8_0_mec_fini(adev);
2085     amdgpu_gfx_rlc_fini(adev);
2086     amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2087                 &adev->gfx.rlc.clear_state_gpu_addr,
2088                 (void **)&adev->gfx.rlc.cs_ptr);
2089     if ((adev->asic_type == CHIP_CARRIZO) ||
2090         (adev->asic_type == CHIP_STONEY)) {
2091         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2092                 &adev->gfx.rlc.cp_table_gpu_addr,
2093                 (void **)&adev->gfx.rlc.cp_table_ptr);
2094     }
2095     gfx_v8_0_free_microcode(adev);
2096 
2097     return 0;
2098 }
2099 
2100 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2101 {
2102     uint32_t *modearray, *mod2array;
2103     const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2104     const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2105     u32 reg_offset;
2106 
2107     modearray = adev->gfx.config.tile_mode_array;
2108     mod2array = adev->gfx.config.macrotile_mode_array;
2109 
2110     for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2111         modearray[reg_offset] = 0;
2112 
2113     for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2114         mod2array[reg_offset] = 0;
2115 
2116     switch (adev->asic_type) {
2117     case CHIP_TOPAZ:
2118         modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119                 PIPE_CONFIG(ADDR_SURF_P2) |
2120                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2121                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2122         modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123                 PIPE_CONFIG(ADDR_SURF_P2) |
2124                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2125                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2126         modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127                 PIPE_CONFIG(ADDR_SURF_P2) |
2128                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2129                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130         modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2131                 PIPE_CONFIG(ADDR_SURF_P2) |
2132                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2133                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2134         modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                 PIPE_CONFIG(ADDR_SURF_P2) |
2136                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2137                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138         modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2139                 PIPE_CONFIG(ADDR_SURF_P2) |
2140                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2141                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142         modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143                 PIPE_CONFIG(ADDR_SURF_P2) |
2144                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2145                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146         modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2147                 PIPE_CONFIG(ADDR_SURF_P2));
2148         modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2149                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2151                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2152         modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                  PIPE_CONFIG(ADDR_SURF_P2) |
2154                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2155                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156         modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2157                  PIPE_CONFIG(ADDR_SURF_P2) |
2158                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2159                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2160         modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2161                  PIPE_CONFIG(ADDR_SURF_P2) |
2162                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2164         modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                  PIPE_CONFIG(ADDR_SURF_P2) |
2166                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2167                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168         modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2169                  PIPE_CONFIG(ADDR_SURF_P2) |
2170                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172         modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173                  PIPE_CONFIG(ADDR_SURF_P2) |
2174                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2175                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176         modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2177                  PIPE_CONFIG(ADDR_SURF_P2) |
2178                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2180         modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2181                  PIPE_CONFIG(ADDR_SURF_P2) |
2182                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2183                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2184         modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2185                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2187                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2188         modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2189                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2191                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2192         modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2193                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2195                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196         modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2197                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200         modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2201                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204         modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2205                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208         modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2212         modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2215                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216         modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2217                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2219                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2220 
2221         mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2222                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2224                 NUM_BANKS(ADDR_SURF_8_BANK));
2225         mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2226                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2227                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2228                 NUM_BANKS(ADDR_SURF_8_BANK));
2229         mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2230                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2231                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2232                 NUM_BANKS(ADDR_SURF_8_BANK));
2233         mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2235                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236                 NUM_BANKS(ADDR_SURF_8_BANK));
2237         mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                 NUM_BANKS(ADDR_SURF_8_BANK));
2241         mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2243                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244                 NUM_BANKS(ADDR_SURF_8_BANK));
2245         mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2246                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2247                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248                 NUM_BANKS(ADDR_SURF_8_BANK));
2249         mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2250                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2251                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                 NUM_BANKS(ADDR_SURF_16_BANK));
2253         mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2256                 NUM_BANKS(ADDR_SURF_16_BANK));
2257         mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2258                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2260                  NUM_BANKS(ADDR_SURF_16_BANK));
2261         mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2263                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2264                  NUM_BANKS(ADDR_SURF_16_BANK));
2265         mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2267                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268                  NUM_BANKS(ADDR_SURF_16_BANK));
2269         mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2271                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272                  NUM_BANKS(ADDR_SURF_16_BANK));
2273         mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276                  NUM_BANKS(ADDR_SURF_8_BANK));
2277 
2278         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2279             if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2280                 reg_offset != 23)
2281                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2282 
2283         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2284             if (reg_offset != 7)
2285                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2286 
2287         break;
2288     case CHIP_FIJI:
2289     case CHIP_VEGAM:
2290         modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2293                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294         modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2297                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298         modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2301                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302         modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2305                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306         modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310         modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2313                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314         modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2315                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2317                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318         modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2320                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2321                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322         modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2323                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2324         modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2325                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328         modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332         modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2333                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2336         modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2337                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2338                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2339                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2340         modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344         modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348         modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2349                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352         modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356         modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2357                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2358                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2360         modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2361                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364         modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2365                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368         modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2369                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372         modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2373                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376         modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2377                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2379                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380         modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2381                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2382                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384         modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388         modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2389                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392         modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2393                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396         modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2397                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400         modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404         modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2405                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2407                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2408         modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2409                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2411                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2412 
2413         mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416                 NUM_BANKS(ADDR_SURF_8_BANK));
2417         mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2420                 NUM_BANKS(ADDR_SURF_8_BANK));
2421         mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2423                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2424                 NUM_BANKS(ADDR_SURF_8_BANK));
2425         mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2427                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2428                 NUM_BANKS(ADDR_SURF_8_BANK));
2429         mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2431                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2432                 NUM_BANKS(ADDR_SURF_8_BANK));
2433         mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2435                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2436                 NUM_BANKS(ADDR_SURF_8_BANK));
2437         mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440                 NUM_BANKS(ADDR_SURF_8_BANK));
2441         mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2443                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                 NUM_BANKS(ADDR_SURF_8_BANK));
2445         mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448                 NUM_BANKS(ADDR_SURF_8_BANK));
2449         mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452                  NUM_BANKS(ADDR_SURF_8_BANK));
2453         mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                  NUM_BANKS(ADDR_SURF_8_BANK));
2457         mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460                  NUM_BANKS(ADDR_SURF_8_BANK));
2461         mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464                  NUM_BANKS(ADDR_SURF_8_BANK));
2465         mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                  NUM_BANKS(ADDR_SURF_4_BANK));
2469 
2470         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2471             WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2472 
2473         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2474             if (reg_offset != 7)
2475                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2476 
2477         break;
2478     case CHIP_TONGA:
2479         modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2482                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483         modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2486                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487         modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2490                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491         modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2494                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495         modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499         modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2502                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503         modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2504                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2506                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507         modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2509                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2510                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511         modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2512                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2513         modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2514                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517         modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2520                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521         modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2522                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2524                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2525         modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2526                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2527                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2528                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2529         modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533         modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537         modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2538                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541         modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545         modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2546                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2547                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2549         modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2550                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553         modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2554                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557         modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2558                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561         modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2562                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565         modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2566                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2568                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569         modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2570                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2571                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573         modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577         modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2578                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581         modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2582                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585         modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589         modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2593         modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2594                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2597         modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2598                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2601 
2602         mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605                 NUM_BANKS(ADDR_SURF_16_BANK));
2606         mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2609                 NUM_BANKS(ADDR_SURF_16_BANK));
2610         mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2613                 NUM_BANKS(ADDR_SURF_16_BANK));
2614         mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2616                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2617                 NUM_BANKS(ADDR_SURF_16_BANK));
2618         mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2620                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2621                 NUM_BANKS(ADDR_SURF_16_BANK));
2622         mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2624                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2625                 NUM_BANKS(ADDR_SURF_16_BANK));
2626         mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2629                 NUM_BANKS(ADDR_SURF_16_BANK));
2630         mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2632                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                 NUM_BANKS(ADDR_SURF_16_BANK));
2634         mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637                 NUM_BANKS(ADDR_SURF_16_BANK));
2638         mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2641                  NUM_BANKS(ADDR_SURF_16_BANK));
2642         mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2645                  NUM_BANKS(ADDR_SURF_16_BANK));
2646         mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649                  NUM_BANKS(ADDR_SURF_8_BANK));
2650         mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653                  NUM_BANKS(ADDR_SURF_4_BANK));
2654         mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2657                  NUM_BANKS(ADDR_SURF_4_BANK));
2658 
2659         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2660             WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2661 
2662         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2663             if (reg_offset != 7)
2664                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2665 
2666         break;
2667     case CHIP_POLARIS11:
2668     case CHIP_POLARIS12:
2669         modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2672                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673         modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2676                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677         modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2680                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681         modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2684                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685         modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2688                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689         modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2692                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693         modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2696                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697         modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2700                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701         modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2702                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2703         modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707         modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711         modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2715         modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2719         modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723         modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727         modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2728                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731         modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735         modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2736                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2739         modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2740                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743         modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2744                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747         modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2748                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2750                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751         modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2752                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755         modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2756                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2758                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759         modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2760                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763         modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767         modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2768                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771         modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2772                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775         modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2776                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2778                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779         modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2780                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2782                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783         modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2784                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2786                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2787         modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2788                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2790                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2791 
2792         mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2794                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2795                 NUM_BANKS(ADDR_SURF_16_BANK));
2796 
2797         mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2799                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2800                 NUM_BANKS(ADDR_SURF_16_BANK));
2801 
2802         mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2804                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2805                 NUM_BANKS(ADDR_SURF_16_BANK));
2806 
2807         mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810                 NUM_BANKS(ADDR_SURF_16_BANK));
2811 
2812         mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815                 NUM_BANKS(ADDR_SURF_16_BANK));
2816 
2817         mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2820                 NUM_BANKS(ADDR_SURF_16_BANK));
2821 
2822         mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2824                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2825                 NUM_BANKS(ADDR_SURF_16_BANK));
2826 
2827         mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2828                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2829                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830                 NUM_BANKS(ADDR_SURF_16_BANK));
2831 
2832         mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                 NUM_BANKS(ADDR_SURF_16_BANK));
2836 
2837         mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840                 NUM_BANKS(ADDR_SURF_16_BANK));
2841 
2842         mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845                 NUM_BANKS(ADDR_SURF_16_BANK));
2846 
2847         mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850                 NUM_BANKS(ADDR_SURF_16_BANK));
2851 
2852         mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                 NUM_BANKS(ADDR_SURF_8_BANK));
2856 
2857         mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2860                 NUM_BANKS(ADDR_SURF_4_BANK));
2861 
2862         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2863             WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2864 
2865         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2866             if (reg_offset != 7)
2867                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2868 
2869         break;
2870     case CHIP_POLARIS10:
2871         modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2874                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875         modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2878                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879         modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2882                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883         modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2886                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887         modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2890                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891         modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2894                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895         modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2896                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2898                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899         modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2900                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2902                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903         modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2904                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2905         modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2906                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2908                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909         modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913         modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917         modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2919                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2920                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2921         modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925         modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929         modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2930                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933         modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2936                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937         modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2938                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2939                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2941         modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2942                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945         modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2946                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949         modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2950                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2952                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953         modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2954                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957         modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2958                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2960                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961         modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2962                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2963                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965         modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969         modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2970                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973         modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2974                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977         modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981         modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2982                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2984                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985         modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2989         modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2990                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2991                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2992                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2993 
2994         mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997                 NUM_BANKS(ADDR_SURF_16_BANK));
2998 
2999         mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3001                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002                 NUM_BANKS(ADDR_SURF_16_BANK));
3003 
3004         mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3006                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                 NUM_BANKS(ADDR_SURF_16_BANK));
3008 
3009         mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012                 NUM_BANKS(ADDR_SURF_16_BANK));
3013 
3014         mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3016                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3017                 NUM_BANKS(ADDR_SURF_16_BANK));
3018 
3019         mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3022                 NUM_BANKS(ADDR_SURF_16_BANK));
3023 
3024         mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3027                 NUM_BANKS(ADDR_SURF_16_BANK));
3028 
3029         mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3031                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                 NUM_BANKS(ADDR_SURF_16_BANK));
3033 
3034         mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037                 NUM_BANKS(ADDR_SURF_16_BANK));
3038 
3039         mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042                 NUM_BANKS(ADDR_SURF_16_BANK));
3043 
3044         mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047                 NUM_BANKS(ADDR_SURF_16_BANK));
3048 
3049         mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052                 NUM_BANKS(ADDR_SURF_8_BANK));
3053 
3054         mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057                 NUM_BANKS(ADDR_SURF_4_BANK));
3058 
3059         mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062                 NUM_BANKS(ADDR_SURF_4_BANK));
3063 
3064         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3065             WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3066 
3067         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3068             if (reg_offset != 7)
3069                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3070 
3071         break;
3072     case CHIP_STONEY:
3073         modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074                 PIPE_CONFIG(ADDR_SURF_P2) |
3075                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3076                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077         modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078                 PIPE_CONFIG(ADDR_SURF_P2) |
3079                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3080                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081         modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082                 PIPE_CONFIG(ADDR_SURF_P2) |
3083                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3084                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085         modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3086                 PIPE_CONFIG(ADDR_SURF_P2) |
3087                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3088                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3089         modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                 PIPE_CONFIG(ADDR_SURF_P2) |
3091                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3092                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093         modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3094                 PIPE_CONFIG(ADDR_SURF_P2) |
3095                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3096                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097         modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3098                 PIPE_CONFIG(ADDR_SURF_P2) |
3099                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3100                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101         modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3102                 PIPE_CONFIG(ADDR_SURF_P2));
3103         modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104                 PIPE_CONFIG(ADDR_SURF_P2) |
3105                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3106                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107         modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                  PIPE_CONFIG(ADDR_SURF_P2) |
3109                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3110                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111         modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112                  PIPE_CONFIG(ADDR_SURF_P2) |
3113                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3114                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115         modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3116                  PIPE_CONFIG(ADDR_SURF_P2) |
3117                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3119         modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120                  PIPE_CONFIG(ADDR_SURF_P2) |
3121                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3122                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123         modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3124                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3126                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127         modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3130                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131         modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3132                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135         modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3136                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3138                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139         modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3140                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143         modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3144                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3146                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147         modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3148                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3150                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151         modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3152                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155         modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3156                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159         modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3160                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163         modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3164                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3166                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3167         modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3168                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3170                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3171         modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3172                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3174                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3175 
3176         mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3178                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3179                 NUM_BANKS(ADDR_SURF_8_BANK));
3180         mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3182                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3183                 NUM_BANKS(ADDR_SURF_8_BANK));
3184         mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187                 NUM_BANKS(ADDR_SURF_8_BANK));
3188         mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3191                 NUM_BANKS(ADDR_SURF_8_BANK));
3192         mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195                 NUM_BANKS(ADDR_SURF_8_BANK));
3196         mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3198                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3199                 NUM_BANKS(ADDR_SURF_8_BANK));
3200         mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203                 NUM_BANKS(ADDR_SURF_8_BANK));
3204         mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3205                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3206                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207                 NUM_BANKS(ADDR_SURF_16_BANK));
3208         mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3209                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3210                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211                 NUM_BANKS(ADDR_SURF_16_BANK));
3212         mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3213                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3214                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                  NUM_BANKS(ADDR_SURF_16_BANK));
3216         mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3217                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3218                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219                  NUM_BANKS(ADDR_SURF_16_BANK));
3220         mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3222                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223                  NUM_BANKS(ADDR_SURF_16_BANK));
3224         mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227                  NUM_BANKS(ADDR_SURF_16_BANK));
3228         mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231                  NUM_BANKS(ADDR_SURF_8_BANK));
3232 
3233         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3234             if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3235                 reg_offset != 23)
3236                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3237 
3238         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3239             if (reg_offset != 7)
3240                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3241 
3242         break;
3243     default:
3244         dev_warn(adev->dev,
3245              "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3246              adev->asic_type);
3247         fallthrough;
3248 
3249     case CHIP_CARRIZO:
3250         modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3251                 PIPE_CONFIG(ADDR_SURF_P2) |
3252                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3253                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3254         modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                 PIPE_CONFIG(ADDR_SURF_P2) |
3256                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3257                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3258         modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259                 PIPE_CONFIG(ADDR_SURF_P2) |
3260                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3261                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3262         modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3263                 PIPE_CONFIG(ADDR_SURF_P2) |
3264                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3265                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3266         modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267                 PIPE_CONFIG(ADDR_SURF_P2) |
3268                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3269                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270         modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3271                 PIPE_CONFIG(ADDR_SURF_P2) |
3272                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3273                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3274         modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275                 PIPE_CONFIG(ADDR_SURF_P2) |
3276                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3277                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278         modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3279                 PIPE_CONFIG(ADDR_SURF_P2));
3280         modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3281                 PIPE_CONFIG(ADDR_SURF_P2) |
3282                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3283                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3284         modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3285                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3287                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3288         modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3289                  PIPE_CONFIG(ADDR_SURF_P2) |
3290                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3291                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3292         modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3293                  PIPE_CONFIG(ADDR_SURF_P2) |
3294                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3296         modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3297                  PIPE_CONFIG(ADDR_SURF_P2) |
3298                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3299                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300         modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3301                  PIPE_CONFIG(ADDR_SURF_P2) |
3302                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3303                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304         modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305                  PIPE_CONFIG(ADDR_SURF_P2) |
3306                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3307                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308         modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3309                  PIPE_CONFIG(ADDR_SURF_P2) |
3310                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3311                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3312         modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3313                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3315                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3316         modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3317                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3319                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3320         modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3321                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3323                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3324         modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3325                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3327                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328         modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3329                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332         modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3333                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3335                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336         modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3337                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340         modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3341                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3343                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3344         modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3345                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3347                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3348         modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3349                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3351                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3352 
3353         mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3355                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356                 NUM_BANKS(ADDR_SURF_8_BANK));
3357         mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3359                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360                 NUM_BANKS(ADDR_SURF_8_BANK));
3361         mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364                 NUM_BANKS(ADDR_SURF_8_BANK));
3365         mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368                 NUM_BANKS(ADDR_SURF_8_BANK));
3369         mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3371                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3372                 NUM_BANKS(ADDR_SURF_8_BANK));
3373         mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3374                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3375                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3376                 NUM_BANKS(ADDR_SURF_8_BANK));
3377         mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3379                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3380                 NUM_BANKS(ADDR_SURF_8_BANK));
3381         mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3382                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3383                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                 NUM_BANKS(ADDR_SURF_16_BANK));
3385         mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3386                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3387                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                 NUM_BANKS(ADDR_SURF_16_BANK));
3389         mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3390                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3391                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3392                  NUM_BANKS(ADDR_SURF_16_BANK));
3393         mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3394                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3395                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3396                  NUM_BANKS(ADDR_SURF_16_BANK));
3397         mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3399                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3400                  NUM_BANKS(ADDR_SURF_16_BANK));
3401         mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3404                  NUM_BANKS(ADDR_SURF_16_BANK));
3405         mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408                  NUM_BANKS(ADDR_SURF_8_BANK));
3409 
3410         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3411             if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3412                 reg_offset != 23)
3413                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3414 
3415         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3416             if (reg_offset != 7)
3417                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3418 
3419         break;
3420     }
3421 }
3422 
3423 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3424                   u32 se_num, u32 sh_num, u32 instance)
3425 {
3426     u32 data;
3427 
3428     if (instance == 0xffffffff)
3429         data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3430     else
3431         data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3432 
3433     if (se_num == 0xffffffff)
3434         data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3435     else
3436         data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3437 
3438     if (sh_num == 0xffffffff)
3439         data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3440     else
3441         data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3442 
3443     WREG32(mmGRBM_GFX_INDEX, data);
3444 }
3445 
3446 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3447                   u32 me, u32 pipe, u32 q, u32 vm)
3448 {
3449     vi_srbm_select(adev, me, pipe, q, vm);
3450 }
3451 
3452 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3453 {
3454     u32 data, mask;
3455 
3456     data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3457         RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3458 
3459     data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3460 
3461     mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3462                      adev->gfx.config.max_sh_per_se);
3463 
3464     return (~data) & mask;
3465 }
3466 
3467 static void
3468 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3469 {
3470     switch (adev->asic_type) {
3471     case CHIP_FIJI:
3472     case CHIP_VEGAM:
3473         *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3474               RB_XSEL2(1) | PKR_MAP(2) |
3475               PKR_XSEL(1) | PKR_YSEL(1) |
3476               SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3477         *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3478                SE_PAIR_YSEL(2);
3479         break;
3480     case CHIP_TONGA:
3481     case CHIP_POLARIS10:
3482         *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3483               SE_XSEL(1) | SE_YSEL(1);
3484         *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3485                SE_PAIR_YSEL(2);
3486         break;
3487     case CHIP_TOPAZ:
3488     case CHIP_CARRIZO:
3489         *rconf |= RB_MAP_PKR0(2);
3490         *rconf1 |= 0x0;
3491         break;
3492     case CHIP_POLARIS11:
3493     case CHIP_POLARIS12:
3494         *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3495               SE_XSEL(1) | SE_YSEL(1);
3496         *rconf1 |= 0x0;
3497         break;
3498     case CHIP_STONEY:
3499         *rconf |= 0x0;
3500         *rconf1 |= 0x0;
3501         break;
3502     default:
3503         DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3504         break;
3505     }
3506 }
3507 
3508 static void
3509 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3510                     u32 raster_config, u32 raster_config_1,
3511                     unsigned rb_mask, unsigned num_rb)
3512 {
3513     unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3514     unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3515     unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3516     unsigned rb_per_se = num_rb / num_se;
3517     unsigned se_mask[4];
3518     unsigned se;
3519 
3520     se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3521     se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3522     se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3523     se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3524 
3525     WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3526     WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3527     WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3528 
3529     if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3530                  (!se_mask[2] && !se_mask[3]))) {
3531         raster_config_1 &= ~SE_PAIR_MAP_MASK;
3532 
3533         if (!se_mask[0] && !se_mask[1]) {
3534             raster_config_1 |=
3535                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3536         } else {
3537             raster_config_1 |=
3538                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3539         }
3540     }
3541 
3542     for (se = 0; se < num_se; se++) {
3543         unsigned raster_config_se = raster_config;
3544         unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3545         unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3546         int idx = (se / 2) * 2;
3547 
3548         if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3549             raster_config_se &= ~SE_MAP_MASK;
3550 
3551             if (!se_mask[idx]) {
3552                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3553             } else {
3554                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3555             }
3556         }
3557 
3558         pkr0_mask &= rb_mask;
3559         pkr1_mask &= rb_mask;
3560         if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3561             raster_config_se &= ~PKR_MAP_MASK;
3562 
3563             if (!pkr0_mask) {
3564                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3565             } else {
3566                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3567             }
3568         }
3569 
3570         if (rb_per_se >= 2) {
3571             unsigned rb0_mask = 1 << (se * rb_per_se);
3572             unsigned rb1_mask = rb0_mask << 1;
3573 
3574             rb0_mask &= rb_mask;
3575             rb1_mask &= rb_mask;
3576             if (!rb0_mask || !rb1_mask) {
3577                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3578 
3579                 if (!rb0_mask) {
3580                     raster_config_se |=
3581                         RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3582                 } else {
3583                     raster_config_se |=
3584                         RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3585                 }
3586             }
3587 
3588             if (rb_per_se > 2) {
3589                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3590                 rb1_mask = rb0_mask << 1;
3591                 rb0_mask &= rb_mask;
3592                 rb1_mask &= rb_mask;
3593                 if (!rb0_mask || !rb1_mask) {
3594                     raster_config_se &= ~RB_MAP_PKR1_MASK;
3595 
3596                     if (!rb0_mask) {
3597                         raster_config_se |=
3598                             RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3599                     } else {
3600                         raster_config_se |=
3601                             RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3602                     }
3603                 }
3604             }
3605         }
3606 
3607         /* GRBM_GFX_INDEX has a different offset on VI */
3608         gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3609         WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3610         WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3611     }
3612 
3613     /* GRBM_GFX_INDEX has a different offset on VI */
3614     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3615 }
3616 
3617 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3618 {
3619     int i, j;
3620     u32 data;
3621     u32 raster_config = 0, raster_config_1 = 0;
3622     u32 active_rbs = 0;
3623     u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3624                     adev->gfx.config.max_sh_per_se;
3625     unsigned num_rb_pipes;
3626 
3627     mutex_lock(&adev->grbm_idx_mutex);
3628     for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3629         for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3630             gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3631             data = gfx_v8_0_get_rb_active_bitmap(adev);
3632             active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3633                            rb_bitmap_width_per_sh);
3634         }
3635     }
3636     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3637 
3638     adev->gfx.config.backend_enable_mask = active_rbs;
3639     adev->gfx.config.num_rbs = hweight32(active_rbs);
3640 
3641     num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3642                  adev->gfx.config.max_shader_engines, 16);
3643 
3644     gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3645 
3646     if (!adev->gfx.config.backend_enable_mask ||
3647             adev->gfx.config.num_rbs >= num_rb_pipes) {
3648         WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3649         WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3650     } else {
3651         gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3652                             adev->gfx.config.backend_enable_mask,
3653                             num_rb_pipes);
3654     }
3655 
3656     /* cache the values for userspace */
3657     for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3658         for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3659             gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3660             adev->gfx.config.rb_config[i][j].rb_backend_disable =
3661                 RREG32(mmCC_RB_BACKEND_DISABLE);
3662             adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3663                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3664             adev->gfx.config.rb_config[i][j].raster_config =
3665                 RREG32(mmPA_SC_RASTER_CONFIG);
3666             adev->gfx.config.rb_config[i][j].raster_config_1 =
3667                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3668         }
3669     }
3670     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3671     mutex_unlock(&adev->grbm_idx_mutex);
3672 }
3673 
3674 #define DEFAULT_SH_MEM_BASES    (0x6000)
3675 /**
3676  * gfx_v8_0_init_compute_vmid - gart enable
3677  *
3678  * @adev: amdgpu_device pointer
3679  *
3680  * Initialize compute vmid sh_mem registers
3681  *
3682  */
3683 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3684 {
3685     int i;
3686     uint32_t sh_mem_config;
3687     uint32_t sh_mem_bases;
3688 
3689     /*
3690      * Configure apertures:
3691      * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3692      * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3693      * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3694      */
3695     sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3696 
3697     sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3698             SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3699             SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3700             SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3701             MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3702             SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3703 
3704     mutex_lock(&adev->srbm_mutex);
3705     for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3706         vi_srbm_select(adev, 0, 0, 0, i);
3707         /* CP and shaders */
3708         WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3709         WREG32(mmSH_MEM_APE1_BASE, 1);
3710         WREG32(mmSH_MEM_APE1_LIMIT, 0);
3711         WREG32(mmSH_MEM_BASES, sh_mem_bases);
3712     }
3713     vi_srbm_select(adev, 0, 0, 0, 0);
3714     mutex_unlock(&adev->srbm_mutex);
3715 
3716     /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3717        access. These should be enabled by FW for target VMIDs. */
3718     for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3719         WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3720         WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3721         WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3722         WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3723     }
3724 }
3725 
3726 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3727 {
3728     int vmid;
3729 
3730     /*
3731      * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3732      * access. Compute VMIDs should be enabled by FW for target VMIDs,
3733      * the driver can enable them for graphics. VMID0 should maintain
3734      * access so that HWS firmware can save/restore entries.
3735      */
3736     for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3737         WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3738         WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3739         WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3740         WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3741     }
3742 }
3743 
3744 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3745 {
3746     switch (adev->asic_type) {
3747     default:
3748         adev->gfx.config.double_offchip_lds_buf = 1;
3749         break;
3750     case CHIP_CARRIZO:
3751     case CHIP_STONEY:
3752         adev->gfx.config.double_offchip_lds_buf = 0;
3753         break;
3754     }
3755 }
3756 
3757 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3758 {
3759     u32 tmp, sh_static_mem_cfg;
3760     int i;
3761 
3762     WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3763     WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3764     WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765     WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3766 
3767     gfx_v8_0_tiling_mode_table_init(adev);
3768     gfx_v8_0_setup_rb(adev);
3769     gfx_v8_0_get_cu_info(adev);
3770     gfx_v8_0_config_init(adev);
3771 
3772     /* XXX SH_MEM regs */
3773     /* where to put LDS, scratch, GPUVM in FSA64 space */
3774     sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3775                    SWIZZLE_ENABLE, 1);
3776     sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3777                    ELEMENT_SIZE, 1);
3778     sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3779                    INDEX_STRIDE, 3);
3780     WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3781 
3782     mutex_lock(&adev->srbm_mutex);
3783     for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3784         vi_srbm_select(adev, 0, 0, 0, i);
3785         /* CP and shaders */
3786         if (i == 0) {
3787             tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3788             tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3789             tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3790                         SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3791             WREG32(mmSH_MEM_CONFIG, tmp);
3792             WREG32(mmSH_MEM_BASES, 0);
3793         } else {
3794             tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3795             tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3796             tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3797                         SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3798             WREG32(mmSH_MEM_CONFIG, tmp);
3799             tmp = adev->gmc.shared_aperture_start >> 48;
3800             WREG32(mmSH_MEM_BASES, tmp);
3801         }
3802 
3803         WREG32(mmSH_MEM_APE1_BASE, 1);
3804         WREG32(mmSH_MEM_APE1_LIMIT, 0);
3805     }
3806     vi_srbm_select(adev, 0, 0, 0, 0);
3807     mutex_unlock(&adev->srbm_mutex);
3808 
3809     gfx_v8_0_init_compute_vmid(adev);
3810     gfx_v8_0_init_gds_vmid(adev);
3811 
3812     mutex_lock(&adev->grbm_idx_mutex);
3813     /*
3814      * making sure that the following register writes will be broadcasted
3815      * to all the shaders
3816      */
3817     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3818 
3819     WREG32(mmPA_SC_FIFO_SIZE,
3820            (adev->gfx.config.sc_prim_fifo_size_frontend <<
3821             PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3822            (adev->gfx.config.sc_prim_fifo_size_backend <<
3823             PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3824            (adev->gfx.config.sc_hiz_tile_fifo_size <<
3825             PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3826            (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3827             PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3828 
3829     tmp = RREG32(mmSPI_ARB_PRIORITY);
3830     tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3831     tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3832     tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3833     tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3834     WREG32(mmSPI_ARB_PRIORITY, tmp);
3835 
3836     mutex_unlock(&adev->grbm_idx_mutex);
3837 
3838 }
3839 
3840 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3841 {
3842     u32 i, j, k;
3843     u32 mask;
3844 
3845     mutex_lock(&adev->grbm_idx_mutex);
3846     for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3847         for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3848             gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3849             for (k = 0; k < adev->usec_timeout; k++) {
3850                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3851                     break;
3852                 udelay(1);
3853             }
3854             if (k == adev->usec_timeout) {
3855                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3856                               0xffffffff, 0xffffffff);
3857                 mutex_unlock(&adev->grbm_idx_mutex);
3858                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3859                      i, j);
3860                 return;
3861             }
3862         }
3863     }
3864     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3865     mutex_unlock(&adev->grbm_idx_mutex);
3866 
3867     mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3868         RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3869         RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3870         RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3871     for (k = 0; k < adev->usec_timeout; k++) {
3872         if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3873             break;
3874         udelay(1);
3875     }
3876 }
3877 
3878 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3879                            bool enable)
3880 {
3881     u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3882 
3883     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3884     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3885     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3886     tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3887 
3888     WREG32(mmCP_INT_CNTL_RING0, tmp);
3889 }
3890 
3891 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3892 {
3893     adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3894     /* csib */
3895     WREG32(mmRLC_CSIB_ADDR_HI,
3896             adev->gfx.rlc.clear_state_gpu_addr >> 32);
3897     WREG32(mmRLC_CSIB_ADDR_LO,
3898             adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3899     WREG32(mmRLC_CSIB_LENGTH,
3900             adev->gfx.rlc.clear_state_size);
3901 }
3902 
3903 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3904                 int ind_offset,
3905                 int list_size,
3906                 int *unique_indices,
3907                 int *indices_count,
3908                 int max_indices,
3909                 int *ind_start_offsets,
3910                 int *offset_count,
3911                 int max_offset)
3912 {
3913     int indices;
3914     bool new_entry = true;
3915 
3916     for (; ind_offset < list_size; ind_offset++) {
3917 
3918         if (new_entry) {
3919             new_entry = false;
3920             ind_start_offsets[*offset_count] = ind_offset;
3921             *offset_count = *offset_count + 1;
3922             BUG_ON(*offset_count >= max_offset);
3923         }
3924 
3925         if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3926             new_entry = true;
3927             continue;
3928         }
3929 
3930         ind_offset += 2;
3931 
3932         /* look for the matching indice */
3933         for (indices = 0;
3934             indices < *indices_count;
3935             indices++) {
3936             if (unique_indices[indices] ==
3937                 register_list_format[ind_offset])
3938                 break;
3939         }
3940 
3941         if (indices >= *indices_count) {
3942             unique_indices[*indices_count] =
3943                 register_list_format[ind_offset];
3944             indices = *indices_count;
3945             *indices_count = *indices_count + 1;
3946             BUG_ON(*indices_count >= max_indices);
3947         }
3948 
3949         register_list_format[ind_offset] = indices;
3950     }
3951 }
3952 
3953 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3954 {
3955     int i, temp, data;
3956     int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3957     int indices_count = 0;
3958     int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3959     int offset_count = 0;
3960 
3961     int list_size;
3962     unsigned int *register_list_format =
3963         kmemdup(adev->gfx.rlc.register_list_format,
3964             adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3965     if (!register_list_format)
3966         return -ENOMEM;
3967 
3968     gfx_v8_0_parse_ind_reg_list(register_list_format,
3969                 RLC_FormatDirectRegListLength,
3970                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3971                 unique_indices,
3972                 &indices_count,
3973                 ARRAY_SIZE(unique_indices),
3974                 indirect_start_offsets,
3975                 &offset_count,
3976                 ARRAY_SIZE(indirect_start_offsets));
3977 
3978     /* save and restore list */
3979     WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3980 
3981     WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3982     for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3983         WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3984 
3985     /* indirect list */
3986     WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3987     for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3988         WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3989 
3990     list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3991     list_size = list_size >> 1;
3992     WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3993     WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3994 
3995     /* starting offsets starts */
3996     WREG32(mmRLC_GPM_SCRATCH_ADDR,
3997         adev->gfx.rlc.starting_offsets_start);
3998     for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3999         WREG32(mmRLC_GPM_SCRATCH_DATA,
4000                 indirect_start_offsets[i]);
4001 
4002     /* unique indices */
4003     temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4004     data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4005     for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4006         if (unique_indices[i] != 0) {
4007             WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4008             WREG32(data + i, unique_indices[i] >> 20);
4009         }
4010     }
4011     kfree(register_list_format);
4012 
4013     return 0;
4014 }
4015 
4016 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4017 {
4018     WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4019 }
4020 
4021 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4022 {
4023     uint32_t data;
4024 
4025     WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4026 
4027     data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4028     data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4029     data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4030     data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4031     WREG32(mmRLC_PG_DELAY, data);
4032 
4033     WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4034     WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4035 
4036 }
4037 
4038 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4039                         bool enable)
4040 {
4041     WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4042 }
4043 
4044 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4045                           bool enable)
4046 {
4047     WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4048 }
4049 
4050 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4051 {
4052     WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4053 }
4054 
4055 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4056 {
4057     if ((adev->asic_type == CHIP_CARRIZO) ||
4058         (adev->asic_type == CHIP_STONEY)) {
4059         gfx_v8_0_init_csb(adev);
4060         gfx_v8_0_init_save_restore_list(adev);
4061         gfx_v8_0_enable_save_restore_machine(adev);
4062         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4063         gfx_v8_0_init_power_gating(adev);
4064         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4065     } else if ((adev->asic_type == CHIP_POLARIS11) ||
4066            (adev->asic_type == CHIP_POLARIS12) ||
4067            (adev->asic_type == CHIP_VEGAM)) {
4068         gfx_v8_0_init_csb(adev);
4069         gfx_v8_0_init_save_restore_list(adev);
4070         gfx_v8_0_enable_save_restore_machine(adev);
4071         gfx_v8_0_init_power_gating(adev);
4072     }
4073 
4074 }
4075 
4076 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4077 {
4078     WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4079 
4080     gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4081     gfx_v8_0_wait_for_rlc_serdes(adev);
4082 }
4083 
4084 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4085 {
4086     WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4087     udelay(50);
4088 
4089     WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4090     udelay(50);
4091 }
4092 
4093 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4094 {
4095     WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4096 
4097     /* carrizo do enable cp interrupt after cp inited */
4098     if (!(adev->flags & AMD_IS_APU))
4099         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4100 
4101     udelay(50);
4102 }
4103 
4104 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4105 {
4106     if (amdgpu_sriov_vf(adev)) {
4107         gfx_v8_0_init_csb(adev);
4108         return 0;
4109     }
4110 
4111     adev->gfx.rlc.funcs->stop(adev);
4112     adev->gfx.rlc.funcs->reset(adev);
4113     gfx_v8_0_init_pg(adev);
4114     adev->gfx.rlc.funcs->start(adev);
4115 
4116     return 0;
4117 }
4118 
4119 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4120 {
4121     u32 tmp = RREG32(mmCP_ME_CNTL);
4122 
4123     if (enable) {
4124         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4125         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4126         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4127     } else {
4128         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4129         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4130         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4131     }
4132     WREG32(mmCP_ME_CNTL, tmp);
4133     udelay(50);
4134 }
4135 
4136 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4137 {
4138     u32 count = 0;
4139     const struct cs_section_def *sect = NULL;
4140     const struct cs_extent_def *ext = NULL;
4141 
4142     /* begin clear state */
4143     count += 2;
4144     /* context control state */
4145     count += 3;
4146 
4147     for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4148         for (ext = sect->section; ext->extent != NULL; ++ext) {
4149             if (sect->id == SECT_CONTEXT)
4150                 count += 2 + ext->reg_count;
4151             else
4152                 return 0;
4153         }
4154     }
4155     /* pa_sc_raster_config/pa_sc_raster_config1 */
4156     count += 4;
4157     /* end clear state */
4158     count += 2;
4159     /* clear state */
4160     count += 2;
4161 
4162     return count;
4163 }
4164 
4165 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4166 {
4167     struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4168     const struct cs_section_def *sect = NULL;
4169     const struct cs_extent_def *ext = NULL;
4170     int r, i;
4171 
4172     /* init the CP */
4173     WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4174     WREG32(mmCP_ENDIAN_SWAP, 0);
4175     WREG32(mmCP_DEVICE_ID, 1);
4176 
4177     gfx_v8_0_cp_gfx_enable(adev, true);
4178 
4179     r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4180     if (r) {
4181         DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4182         return r;
4183     }
4184 
4185     /* clear state buffer */
4186     amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4187     amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4188 
4189     amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4190     amdgpu_ring_write(ring, 0x80000000);
4191     amdgpu_ring_write(ring, 0x80000000);
4192 
4193     for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4194         for (ext = sect->section; ext->extent != NULL; ++ext) {
4195             if (sect->id == SECT_CONTEXT) {
4196                 amdgpu_ring_write(ring,
4197                        PACKET3(PACKET3_SET_CONTEXT_REG,
4198                            ext->reg_count));
4199                 amdgpu_ring_write(ring,
4200                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4201                 for (i = 0; i < ext->reg_count; i++)
4202                     amdgpu_ring_write(ring, ext->extent[i]);
4203             }
4204         }
4205     }
4206 
4207     amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4208     amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4209     amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4210     amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4211 
4212     amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4213     amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4214 
4215     amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4216     amdgpu_ring_write(ring, 0);
4217 
4218     /* init the CE partitions */
4219     amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4220     amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4221     amdgpu_ring_write(ring, 0x8000);
4222     amdgpu_ring_write(ring, 0x8000);
4223 
4224     amdgpu_ring_commit(ring);
4225 
4226     return 0;
4227 }
4228 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4229 {
4230     u32 tmp;
4231     /* no gfx doorbells on iceland */
4232     if (adev->asic_type == CHIP_TOPAZ)
4233         return;
4234 
4235     tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4236 
4237     if (ring->use_doorbell) {
4238         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4239                 DOORBELL_OFFSET, ring->doorbell_index);
4240         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4241                         DOORBELL_HIT, 0);
4242         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4243                         DOORBELL_EN, 1);
4244     } else {
4245         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4246     }
4247 
4248     WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4249 
4250     if (adev->flags & AMD_IS_APU)
4251         return;
4252 
4253     tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4254                     DOORBELL_RANGE_LOWER,
4255                     adev->doorbell_index.gfx_ring0);
4256     WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4257 
4258     WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4259         CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4260 }
4261 
4262 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4263 {
4264     struct amdgpu_ring *ring;
4265     u32 tmp;
4266     u32 rb_bufsz;
4267     u64 rb_addr, rptr_addr, wptr_gpu_addr;
4268 
4269     /* Set the write pointer delay */
4270     WREG32(mmCP_RB_WPTR_DELAY, 0);
4271 
4272     /* set the RB to use vmid 0 */
4273     WREG32(mmCP_RB_VMID, 0);
4274 
4275     /* Set ring buffer size */
4276     ring = &adev->gfx.gfx_ring[0];
4277     rb_bufsz = order_base_2(ring->ring_size / 8);
4278     tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4279     tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4280     tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4281     tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4282 #ifdef __BIG_ENDIAN
4283     tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4284 #endif
4285     WREG32(mmCP_RB0_CNTL, tmp);
4286 
4287     /* Initialize the ring buffer's read and write pointers */
4288     WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4289     ring->wptr = 0;
4290     WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4291 
4292     /* set the wb address wether it's enabled or not */
4293     rptr_addr = ring->rptr_gpu_addr;
4294     WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4295     WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4296 
4297     wptr_gpu_addr = ring->wptr_gpu_addr;
4298     WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4299     WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4300     mdelay(1);
4301     WREG32(mmCP_RB0_CNTL, tmp);
4302 
4303     rb_addr = ring->gpu_addr >> 8;
4304     WREG32(mmCP_RB0_BASE, rb_addr);
4305     WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4306 
4307     gfx_v8_0_set_cpg_door_bell(adev, ring);
4308     /* start the ring */
4309     amdgpu_ring_clear_ring(ring);
4310     gfx_v8_0_cp_gfx_start(adev);
4311     ring->sched.ready = true;
4312 
4313     return 0;
4314 }
4315 
4316 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4317 {
4318     if (enable) {
4319         WREG32(mmCP_MEC_CNTL, 0);
4320     } else {
4321         WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4322         adev->gfx.kiq.ring.sched.ready = false;
4323     }
4324     udelay(50);
4325 }
4326 
4327 /* KIQ functions */
4328 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4329 {
4330     uint32_t tmp;
4331     struct amdgpu_device *adev = ring->adev;
4332 
4333     /* tell RLC which is KIQ queue */
4334     tmp = RREG32(mmRLC_CP_SCHEDULERS);
4335     tmp &= 0xffffff00;
4336     tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4337     WREG32(mmRLC_CP_SCHEDULERS, tmp);
4338     tmp |= 0x80;
4339     WREG32(mmRLC_CP_SCHEDULERS, tmp);
4340 }
4341 
4342 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4343 {
4344     struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4345     uint64_t queue_mask = 0;
4346     int r, i;
4347 
4348     for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4349         if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4350             continue;
4351 
4352         /* This situation may be hit in the future if a new HW
4353          * generation exposes more than 64 queues. If so, the
4354          * definition of queue_mask needs updating */
4355         if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4356             DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4357             break;
4358         }
4359 
4360         queue_mask |= (1ull << i);
4361     }
4362 
4363     r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4364     if (r) {
4365         DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4366         return r;
4367     }
4368     /* set resources */
4369     amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4370     amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4371     amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4372     amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4373     amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4374     amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4375     amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4376     amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4377     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4378         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4379         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4380         uint64_t wptr_addr = ring->wptr_gpu_addr;
4381 
4382         /* map queues */
4383         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4384         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4385         amdgpu_ring_write(kiq_ring,
4386                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4387         amdgpu_ring_write(kiq_ring,
4388                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4389                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4390                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4391                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4392         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4393         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4394         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4395         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4396     }
4397 
4398     amdgpu_ring_commit(kiq_ring);
4399 
4400     return 0;
4401 }
4402 
4403 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4404 {
4405     int i, r = 0;
4406 
4407     if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4408         WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4409         for (i = 0; i < adev->usec_timeout; i++) {
4410             if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4411                 break;
4412             udelay(1);
4413         }
4414         if (i == adev->usec_timeout)
4415             r = -ETIMEDOUT;
4416     }
4417     WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4418     WREG32(mmCP_HQD_PQ_RPTR, 0);
4419     WREG32(mmCP_HQD_PQ_WPTR, 0);
4420 
4421     return r;
4422 }
4423 
4424 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4425 {
4426     struct amdgpu_device *adev = ring->adev;
4427 
4428     if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4429         if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4430             mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4431             mqd->cp_hqd_queue_priority =
4432                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4433         }
4434     }
4435 }
4436 
4437 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4438 {
4439     struct amdgpu_device *adev = ring->adev;
4440     struct vi_mqd *mqd = ring->mqd_ptr;
4441     uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4442     uint32_t tmp;
4443 
4444     mqd->header = 0xC0310800;
4445     mqd->compute_pipelinestat_enable = 0x00000001;
4446     mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4447     mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4448     mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4449     mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4450     mqd->compute_misc_reserved = 0x00000003;
4451     mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4452                              + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4453     mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4454                              + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4455     eop_base_addr = ring->eop_gpu_addr >> 8;
4456     mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4457     mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4458 
4459     /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4460     tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4461     tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4462             (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4463 
4464     mqd->cp_hqd_eop_control = tmp;
4465 
4466     /* enable doorbell? */
4467     tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4468                 CP_HQD_PQ_DOORBELL_CONTROL,
4469                 DOORBELL_EN,
4470                 ring->use_doorbell ? 1 : 0);
4471 
4472     mqd->cp_hqd_pq_doorbell_control = tmp;
4473 
4474     /* set the pointer to the MQD */
4475     mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4476     mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4477 
4478     /* set MQD vmid to 0 */
4479     tmp = RREG32(mmCP_MQD_CONTROL);
4480     tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4481     mqd->cp_mqd_control = tmp;
4482 
4483     /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4484     hqd_gpu_addr = ring->gpu_addr >> 8;
4485     mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4486     mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4487 
4488     /* set up the HQD, this is similar to CP_RB0_CNTL */
4489     tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4490     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4491                 (order_base_2(ring->ring_size / 4) - 1));
4492     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4493             (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4494 #ifdef __BIG_ENDIAN
4495     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4496 #endif
4497     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4498     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4499     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4500     tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4501     mqd->cp_hqd_pq_control = tmp;
4502 
4503     /* set the wb address whether it's enabled or not */
4504     wb_gpu_addr = ring->rptr_gpu_addr;
4505     mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4506     mqd->cp_hqd_pq_rptr_report_addr_hi =
4507         upper_32_bits(wb_gpu_addr) & 0xffff;
4508 
4509     /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4510     wb_gpu_addr = ring->wptr_gpu_addr;
4511     mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4512     mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4513 
4514     tmp = 0;
4515     /* enable the doorbell if requested */
4516     if (ring->use_doorbell) {
4517         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4518         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4519                 DOORBELL_OFFSET, ring->doorbell_index);
4520 
4521         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4522                      DOORBELL_EN, 1);
4523         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4524                      DOORBELL_SOURCE, 0);
4525         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4526                      DOORBELL_HIT, 0);
4527     }
4528 
4529     mqd->cp_hqd_pq_doorbell_control = tmp;
4530 
4531     /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4532     ring->wptr = 0;
4533     mqd->cp_hqd_pq_wptr = ring->wptr;
4534     mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4535 
4536     /* set the vmid for the queue */
4537     mqd->cp_hqd_vmid = 0;
4538 
4539     tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4540     tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4541     mqd->cp_hqd_persistent_state = tmp;
4542 
4543     /* set MTYPE */
4544     tmp = RREG32(mmCP_HQD_IB_CONTROL);
4545     tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4546     tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4547     mqd->cp_hqd_ib_control = tmp;
4548 
4549     tmp = RREG32(mmCP_HQD_IQ_TIMER);
4550     tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4551     mqd->cp_hqd_iq_timer = tmp;
4552 
4553     tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4554     tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4555     mqd->cp_hqd_ctx_save_control = tmp;
4556 
4557     /* defaults */
4558     mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4559     mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4560     mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4561     mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4562     mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4563     mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4564     mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4565     mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4566     mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4567     mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4568     mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4569     mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4570 
4571     /* set static priority for a queue/ring */
4572     gfx_v8_0_mqd_set_priority(ring, mqd);
4573     mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4574 
4575     /* map_queues packet doesn't need activate the queue,
4576      * so only kiq need set this field.
4577      */
4578     if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4579         mqd->cp_hqd_active = 1;
4580 
4581     return 0;
4582 }
4583 
4584 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4585             struct vi_mqd *mqd)
4586 {
4587     uint32_t mqd_reg;
4588     uint32_t *mqd_data;
4589 
4590     /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4591     mqd_data = &mqd->cp_mqd_base_addr_lo;
4592 
4593     /* disable wptr polling */
4594     WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4595 
4596     /* program all HQD registers */
4597     for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4598         WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4599 
4600     /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4601      * This is safe since EOP RPTR==WPTR for any inactive HQD
4602      * on ASICs that do not support context-save.
4603      * EOP writes/reads can start anywhere in the ring.
4604      */
4605     if (adev->asic_type != CHIP_TONGA) {
4606         WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4607         WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4608         WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4609     }
4610 
4611     for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4612         WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4613 
4614     /* activate the HQD */
4615     for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4616         WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4617 
4618     return 0;
4619 }
4620 
4621 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4622 {
4623     struct amdgpu_device *adev = ring->adev;
4624     struct vi_mqd *mqd = ring->mqd_ptr;
4625     int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4626 
4627     gfx_v8_0_kiq_setting(ring);
4628 
4629     if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4630         /* reset MQD to a clean status */
4631         if (adev->gfx.mec.mqd_backup[mqd_idx])
4632             memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4633 
4634         /* reset ring buffer */
4635         ring->wptr = 0;
4636         amdgpu_ring_clear_ring(ring);
4637         mutex_lock(&adev->srbm_mutex);
4638         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4639         gfx_v8_0_mqd_commit(adev, mqd);
4640         vi_srbm_select(adev, 0, 0, 0, 0);
4641         mutex_unlock(&adev->srbm_mutex);
4642     } else {
4643         memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4644         ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4645         ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4646         mutex_lock(&adev->srbm_mutex);
4647         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4648         gfx_v8_0_mqd_init(ring);
4649         gfx_v8_0_mqd_commit(adev, mqd);
4650         vi_srbm_select(adev, 0, 0, 0, 0);
4651         mutex_unlock(&adev->srbm_mutex);
4652 
4653         if (adev->gfx.mec.mqd_backup[mqd_idx])
4654             memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4655     }
4656 
4657     return 0;
4658 }
4659 
4660 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4661 {
4662     struct amdgpu_device *adev = ring->adev;
4663     struct vi_mqd *mqd = ring->mqd_ptr;
4664     int mqd_idx = ring - &adev->gfx.compute_ring[0];
4665 
4666     if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4667         memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4668         ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4669         ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4670         mutex_lock(&adev->srbm_mutex);
4671         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4672         gfx_v8_0_mqd_init(ring);
4673         vi_srbm_select(adev, 0, 0, 0, 0);
4674         mutex_unlock(&adev->srbm_mutex);
4675 
4676         if (adev->gfx.mec.mqd_backup[mqd_idx])
4677             memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4678     } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4679         /* reset MQD to a clean status */
4680         if (adev->gfx.mec.mqd_backup[mqd_idx])
4681             memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4682         /* reset ring buffer */
4683         ring->wptr = 0;
4684         amdgpu_ring_clear_ring(ring);
4685     } else {
4686         amdgpu_ring_clear_ring(ring);
4687     }
4688     return 0;
4689 }
4690 
4691 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4692 {
4693     if (adev->asic_type > CHIP_TONGA) {
4694         WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4695         WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4696     }
4697     /* enable doorbells */
4698     WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4699 }
4700 
4701 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4702 {
4703     struct amdgpu_ring *ring;
4704     int r;
4705 
4706     ring = &adev->gfx.kiq.ring;
4707 
4708     r = amdgpu_bo_reserve(ring->mqd_obj, false);
4709     if (unlikely(r != 0))
4710         return r;
4711 
4712     r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4713     if (unlikely(r != 0))
4714         return r;
4715 
4716     gfx_v8_0_kiq_init_queue(ring);
4717     amdgpu_bo_kunmap(ring->mqd_obj);
4718     ring->mqd_ptr = NULL;
4719     amdgpu_bo_unreserve(ring->mqd_obj);
4720     ring->sched.ready = true;
4721     return 0;
4722 }
4723 
4724 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4725 {
4726     struct amdgpu_ring *ring = NULL;
4727     int r = 0, i;
4728 
4729     gfx_v8_0_cp_compute_enable(adev, true);
4730 
4731     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4732         ring = &adev->gfx.compute_ring[i];
4733 
4734         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4735         if (unlikely(r != 0))
4736             goto done;
4737         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4738         if (!r) {
4739             r = gfx_v8_0_kcq_init_queue(ring);
4740             amdgpu_bo_kunmap(ring->mqd_obj);
4741             ring->mqd_ptr = NULL;
4742         }
4743         amdgpu_bo_unreserve(ring->mqd_obj);
4744         if (r)
4745             goto done;
4746     }
4747 
4748     gfx_v8_0_set_mec_doorbell_range(adev);
4749 
4750     r = gfx_v8_0_kiq_kcq_enable(adev);
4751     if (r)
4752         goto done;
4753 
4754 done:
4755     return r;
4756 }
4757 
4758 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4759 {
4760     int r, i;
4761     struct amdgpu_ring *ring;
4762 
4763     /* collect all the ring_tests here, gfx, kiq, compute */
4764     ring = &adev->gfx.gfx_ring[0];
4765     r = amdgpu_ring_test_helper(ring);
4766     if (r)
4767         return r;
4768 
4769     ring = &adev->gfx.kiq.ring;
4770     r = amdgpu_ring_test_helper(ring);
4771     if (r)
4772         return r;
4773 
4774     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4775         ring = &adev->gfx.compute_ring[i];
4776         amdgpu_ring_test_helper(ring);
4777     }
4778 
4779     return 0;
4780 }
4781 
4782 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4783 {
4784     int r;
4785 
4786     if (!(adev->flags & AMD_IS_APU))
4787         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4788 
4789     r = gfx_v8_0_kiq_resume(adev);
4790     if (r)
4791         return r;
4792 
4793     r = gfx_v8_0_cp_gfx_resume(adev);
4794     if (r)
4795         return r;
4796 
4797     r = gfx_v8_0_kcq_resume(adev);
4798     if (r)
4799         return r;
4800 
4801     r = gfx_v8_0_cp_test_all_rings(adev);
4802     if (r)
4803         return r;
4804 
4805     gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4806 
4807     return 0;
4808 }
4809 
4810 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4811 {
4812     gfx_v8_0_cp_gfx_enable(adev, enable);
4813     gfx_v8_0_cp_compute_enable(adev, enable);
4814 }
4815 
4816 static int gfx_v8_0_hw_init(void *handle)
4817 {
4818     int r;
4819     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4820 
4821     gfx_v8_0_init_golden_registers(adev);
4822     gfx_v8_0_constants_init(adev);
4823 
4824     r = adev->gfx.rlc.funcs->resume(adev);
4825     if (r)
4826         return r;
4827 
4828     r = gfx_v8_0_cp_resume(adev);
4829 
4830     return r;
4831 }
4832 
4833 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4834 {
4835     int r, i;
4836     struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4837 
4838     r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4839     if (r)
4840         DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4841 
4842     for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4843         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4844 
4845         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4846         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4847                         PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4848                         PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4849                         PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4850                         PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4851         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4852         amdgpu_ring_write(kiq_ring, 0);
4853         amdgpu_ring_write(kiq_ring, 0);
4854         amdgpu_ring_write(kiq_ring, 0);
4855     }
4856     r = amdgpu_ring_test_helper(kiq_ring);
4857     if (r)
4858         DRM_ERROR("KCQ disable failed\n");
4859 
4860     return r;
4861 }
4862 
4863 static bool gfx_v8_0_is_idle(void *handle)
4864 {
4865     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4866 
4867     if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4868         || RREG32(mmGRBM_STATUS2) != 0x8)
4869         return false;
4870     else
4871         return true;
4872 }
4873 
4874 static bool gfx_v8_0_rlc_is_idle(void *handle)
4875 {
4876     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4877 
4878     if (RREG32(mmGRBM_STATUS2) != 0x8)
4879         return false;
4880     else
4881         return true;
4882 }
4883 
4884 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4885 {
4886     unsigned int i;
4887     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4888 
4889     for (i = 0; i < adev->usec_timeout; i++) {
4890         if (gfx_v8_0_rlc_is_idle(handle))
4891             return 0;
4892 
4893         udelay(1);
4894     }
4895     return -ETIMEDOUT;
4896 }
4897 
4898 static int gfx_v8_0_wait_for_idle(void *handle)
4899 {
4900     unsigned int i;
4901     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4902 
4903     for (i = 0; i < adev->usec_timeout; i++) {
4904         if (gfx_v8_0_is_idle(handle))
4905             return 0;
4906 
4907         udelay(1);
4908     }
4909     return -ETIMEDOUT;
4910 }
4911 
4912 static int gfx_v8_0_hw_fini(void *handle)
4913 {
4914     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4915 
4916     amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4917     amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4918 
4919     amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4920 
4921     amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4922 
4923     /* disable KCQ to avoid CPC touch memory not valid anymore */
4924     gfx_v8_0_kcq_disable(adev);
4925 
4926     if (amdgpu_sriov_vf(adev)) {
4927         pr_debug("For SRIOV client, shouldn't do anything.\n");
4928         return 0;
4929     }
4930     amdgpu_gfx_rlc_enter_safe_mode(adev);
4931     if (!gfx_v8_0_wait_for_idle(adev))
4932         gfx_v8_0_cp_enable(adev, false);
4933     else
4934         pr_err("cp is busy, skip halt cp\n");
4935     if (!gfx_v8_0_wait_for_rlc_idle(adev))
4936         adev->gfx.rlc.funcs->stop(adev);
4937     else
4938         pr_err("rlc is busy, skip halt rlc\n");
4939     amdgpu_gfx_rlc_exit_safe_mode(adev);
4940 
4941     return 0;
4942 }
4943 
4944 static int gfx_v8_0_suspend(void *handle)
4945 {
4946     return gfx_v8_0_hw_fini(handle);
4947 }
4948 
4949 static int gfx_v8_0_resume(void *handle)
4950 {
4951     return gfx_v8_0_hw_init(handle);
4952 }
4953 
4954 static bool gfx_v8_0_check_soft_reset(void *handle)
4955 {
4956     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4957     u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4958     u32 tmp;
4959 
4960     /* GRBM_STATUS */
4961     tmp = RREG32(mmGRBM_STATUS);
4962     if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4963            GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4964            GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4965            GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4966            GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4967            GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4968            GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4969         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4970                         GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4971         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4972                         GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4973         srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4974                         SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4975     }
4976 
4977     /* GRBM_STATUS2 */
4978     tmp = RREG32(mmGRBM_STATUS2);
4979     if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4980         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4981                         GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4982 
4983     if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4984         REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4985         REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4986         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4987                         SOFT_RESET_CPF, 1);
4988         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4989                         SOFT_RESET_CPC, 1);
4990         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4991                         SOFT_RESET_CPG, 1);
4992         srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4993                         SOFT_RESET_GRBM, 1);
4994     }
4995 
4996     /* SRBM_STATUS */
4997     tmp = RREG32(mmSRBM_STATUS);
4998     if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4999         srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5000                         SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5001     if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5002         srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5003                         SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5004 
5005     if (grbm_soft_reset || srbm_soft_reset) {
5006         adev->gfx.grbm_soft_reset = grbm_soft_reset;
5007         adev->gfx.srbm_soft_reset = srbm_soft_reset;
5008         return true;
5009     } else {
5010         adev->gfx.grbm_soft_reset = 0;
5011         adev->gfx.srbm_soft_reset = 0;
5012         return false;
5013     }
5014 }
5015 
5016 static int gfx_v8_0_pre_soft_reset(void *handle)
5017 {
5018     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5019     u32 grbm_soft_reset = 0;
5020 
5021     if ((!adev->gfx.grbm_soft_reset) &&
5022         (!adev->gfx.srbm_soft_reset))
5023         return 0;
5024 
5025     grbm_soft_reset = adev->gfx.grbm_soft_reset;
5026 
5027     /* stop the rlc */
5028     adev->gfx.rlc.funcs->stop(adev);
5029 
5030     if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5031         REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5032         /* Disable GFX parsing/prefetching */
5033         gfx_v8_0_cp_gfx_enable(adev, false);
5034 
5035     if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5036         REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5037         REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5038         REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5039         int i;
5040 
5041         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5042             struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5043 
5044             mutex_lock(&adev->srbm_mutex);
5045             vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5046             gfx_v8_0_deactivate_hqd(adev, 2);
5047             vi_srbm_select(adev, 0, 0, 0, 0);
5048             mutex_unlock(&adev->srbm_mutex);
5049         }
5050         /* Disable MEC parsing/prefetching */
5051         gfx_v8_0_cp_compute_enable(adev, false);
5052     }
5053 
5054     return 0;
5055 }
5056 
5057 static int gfx_v8_0_soft_reset(void *handle)
5058 {
5059     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5060     u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5061     u32 tmp;
5062 
5063     if ((!adev->gfx.grbm_soft_reset) &&
5064         (!adev->gfx.srbm_soft_reset))
5065         return 0;
5066 
5067     grbm_soft_reset = adev->gfx.grbm_soft_reset;
5068     srbm_soft_reset = adev->gfx.srbm_soft_reset;
5069 
5070     if (grbm_soft_reset || srbm_soft_reset) {
5071         tmp = RREG32(mmGMCON_DEBUG);
5072         tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5073         tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5074         WREG32(mmGMCON_DEBUG, tmp);
5075         udelay(50);
5076     }
5077 
5078     if (grbm_soft_reset) {
5079         tmp = RREG32(mmGRBM_SOFT_RESET);
5080         tmp |= grbm_soft_reset;
5081         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5082         WREG32(mmGRBM_SOFT_RESET, tmp);
5083         tmp = RREG32(mmGRBM_SOFT_RESET);
5084 
5085         udelay(50);
5086 
5087         tmp &= ~grbm_soft_reset;
5088         WREG32(mmGRBM_SOFT_RESET, tmp);
5089         tmp = RREG32(mmGRBM_SOFT_RESET);
5090     }
5091 
5092     if (srbm_soft_reset) {
5093         tmp = RREG32(mmSRBM_SOFT_RESET);
5094         tmp |= srbm_soft_reset;
5095         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5096         WREG32(mmSRBM_SOFT_RESET, tmp);
5097         tmp = RREG32(mmSRBM_SOFT_RESET);
5098 
5099         udelay(50);
5100 
5101         tmp &= ~srbm_soft_reset;
5102         WREG32(mmSRBM_SOFT_RESET, tmp);
5103         tmp = RREG32(mmSRBM_SOFT_RESET);
5104     }
5105 
5106     if (grbm_soft_reset || srbm_soft_reset) {
5107         tmp = RREG32(mmGMCON_DEBUG);
5108         tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5109         tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5110         WREG32(mmGMCON_DEBUG, tmp);
5111     }
5112 
5113     /* Wait a little for things to settle down */
5114     udelay(50);
5115 
5116     return 0;
5117 }
5118 
5119 static int gfx_v8_0_post_soft_reset(void *handle)
5120 {
5121     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5122     u32 grbm_soft_reset = 0;
5123 
5124     if ((!adev->gfx.grbm_soft_reset) &&
5125         (!adev->gfx.srbm_soft_reset))
5126         return 0;
5127 
5128     grbm_soft_reset = adev->gfx.grbm_soft_reset;
5129 
5130     if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5131         REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5132         REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5133         REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5134         int i;
5135 
5136         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5137             struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5138 
5139             mutex_lock(&adev->srbm_mutex);
5140             vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5141             gfx_v8_0_deactivate_hqd(adev, 2);
5142             vi_srbm_select(adev, 0, 0, 0, 0);
5143             mutex_unlock(&adev->srbm_mutex);
5144         }
5145         gfx_v8_0_kiq_resume(adev);
5146         gfx_v8_0_kcq_resume(adev);
5147     }
5148 
5149     if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5150         REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5151         gfx_v8_0_cp_gfx_resume(adev);
5152 
5153     gfx_v8_0_cp_test_all_rings(adev);
5154 
5155     adev->gfx.rlc.funcs->start(adev);
5156 
5157     return 0;
5158 }
5159 
5160 /**
5161  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5162  *
5163  * @adev: amdgpu_device pointer
5164  *
5165  * Fetches a GPU clock counter snapshot.
5166  * Returns the 64 bit clock counter snapshot.
5167  */
5168 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5169 {
5170     uint64_t clock;
5171 
5172     mutex_lock(&adev->gfx.gpu_clock_mutex);
5173     WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5174     clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5175         ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5176     mutex_unlock(&adev->gfx.gpu_clock_mutex);
5177     return clock;
5178 }
5179 
5180 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5181                       uint32_t vmid,
5182                       uint32_t gds_base, uint32_t gds_size,
5183                       uint32_t gws_base, uint32_t gws_size,
5184                       uint32_t oa_base, uint32_t oa_size)
5185 {
5186     /* GDS Base */
5187     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5188     amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5189                 WRITE_DATA_DST_SEL(0)));
5190     amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5191     amdgpu_ring_write(ring, 0);
5192     amdgpu_ring_write(ring, gds_base);
5193 
5194     /* GDS Size */
5195     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5196     amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5197                 WRITE_DATA_DST_SEL(0)));
5198     amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5199     amdgpu_ring_write(ring, 0);
5200     amdgpu_ring_write(ring, gds_size);
5201 
5202     /* GWS */
5203     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5204     amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5205                 WRITE_DATA_DST_SEL(0)));
5206     amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5207     amdgpu_ring_write(ring, 0);
5208     amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5209 
5210     /* OA */
5211     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5212     amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5213                 WRITE_DATA_DST_SEL(0)));
5214     amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5215     amdgpu_ring_write(ring, 0);
5216     amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5217 }
5218 
5219 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5220 {
5221     WREG32(mmSQ_IND_INDEX,
5222         (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5223         (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5224         (address << SQ_IND_INDEX__INDEX__SHIFT) |
5225         (SQ_IND_INDEX__FORCE_READ_MASK));
5226     return RREG32(mmSQ_IND_DATA);
5227 }
5228 
5229 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5230                uint32_t wave, uint32_t thread,
5231                uint32_t regno, uint32_t num, uint32_t *out)
5232 {
5233     WREG32(mmSQ_IND_INDEX,
5234         (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5235         (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5236         (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5237         (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5238         (SQ_IND_INDEX__FORCE_READ_MASK) |
5239         (SQ_IND_INDEX__AUTO_INCR_MASK));
5240     while (num--)
5241         *(out++) = RREG32(mmSQ_IND_DATA);
5242 }
5243 
5244 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5245 {
5246     /* type 0 wave data */
5247     dst[(*no_fields)++] = 0;
5248     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5249     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5250     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5251     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5252     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5253     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5254     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5255     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5256     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5257     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5258     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5259     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5260     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5261     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5262     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5263     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5264     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5265     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5266     dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5267 }
5268 
5269 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5270                      uint32_t wave, uint32_t start,
5271                      uint32_t size, uint32_t *dst)
5272 {
5273     wave_read_regs(
5274         adev, simd, wave, 0,
5275         start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5276 }
5277 
5278 
5279 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5280     .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5281     .select_se_sh = &gfx_v8_0_select_se_sh,
5282     .read_wave_data = &gfx_v8_0_read_wave_data,
5283     .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5284     .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5285 };
5286 
5287 static int gfx_v8_0_early_init(void *handle)
5288 {
5289     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5290 
5291     adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5292     adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5293                       AMDGPU_MAX_COMPUTE_RINGS);
5294     adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5295     gfx_v8_0_set_ring_funcs(adev);
5296     gfx_v8_0_set_irq_funcs(adev);
5297     gfx_v8_0_set_gds_init(adev);
5298     gfx_v8_0_set_rlc_funcs(adev);
5299 
5300     return 0;
5301 }
5302 
5303 static int gfx_v8_0_late_init(void *handle)
5304 {
5305     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5306     int r;
5307 
5308     r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5309     if (r)
5310         return r;
5311 
5312     r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5313     if (r)
5314         return r;
5315 
5316     /* requires IBs so do in late init after IB pool is initialized */
5317     r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5318     if (r)
5319         return r;
5320 
5321     r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5322     if (r) {
5323         DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5324         return r;
5325     }
5326 
5327     r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5328     if (r) {
5329         DRM_ERROR(
5330             "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5331             r);
5332         return r;
5333     }
5334 
5335     return 0;
5336 }
5337 
5338 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5339                                bool enable)
5340 {
5341     if ((adev->asic_type == CHIP_POLARIS11) ||
5342         (adev->asic_type == CHIP_POLARIS12) ||
5343         (adev->asic_type == CHIP_VEGAM))
5344         /* Send msg to SMU via Powerplay */
5345         amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5346 
5347     WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5348 }
5349 
5350 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5351                             bool enable)
5352 {
5353     WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5354 }
5355 
5356 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5357         bool enable)
5358 {
5359     WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5360 }
5361 
5362 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5363                       bool enable)
5364 {
5365     WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5366 }
5367 
5368 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5369                         bool enable)
5370 {
5371     WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5372 
5373     /* Read any GFX register to wake up GFX. */
5374     if (!enable)
5375         RREG32(mmDB_RENDER_CONTROL);
5376 }
5377 
5378 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5379                       bool enable)
5380 {
5381     if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5382         cz_enable_gfx_cg_power_gating(adev, true);
5383         if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5384             cz_enable_gfx_pipeline_power_gating(adev, true);
5385     } else {
5386         cz_enable_gfx_cg_power_gating(adev, false);
5387         cz_enable_gfx_pipeline_power_gating(adev, false);
5388     }
5389 }
5390 
5391 static int gfx_v8_0_set_powergating_state(void *handle,
5392                       enum amd_powergating_state state)
5393 {
5394     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5395     bool enable = (state == AMD_PG_STATE_GATE);
5396 
5397     if (amdgpu_sriov_vf(adev))
5398         return 0;
5399 
5400     if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5401                 AMD_PG_SUPPORT_RLC_SMU_HS |
5402                 AMD_PG_SUPPORT_CP |
5403                 AMD_PG_SUPPORT_GFX_DMG))
5404         amdgpu_gfx_rlc_enter_safe_mode(adev);
5405     switch (adev->asic_type) {
5406     case CHIP_CARRIZO:
5407     case CHIP_STONEY:
5408 
5409         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5410             cz_enable_sck_slow_down_on_power_up(adev, true);
5411             cz_enable_sck_slow_down_on_power_down(adev, true);
5412         } else {
5413             cz_enable_sck_slow_down_on_power_up(adev, false);
5414             cz_enable_sck_slow_down_on_power_down(adev, false);
5415         }
5416         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5417             cz_enable_cp_power_gating(adev, true);
5418         else
5419             cz_enable_cp_power_gating(adev, false);
5420 
5421         cz_update_gfx_cg_power_gating(adev, enable);
5422 
5423         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5424             gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5425         else
5426             gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5427 
5428         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5429             gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5430         else
5431             gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5432         break;
5433     case CHIP_POLARIS11:
5434     case CHIP_POLARIS12:
5435     case CHIP_VEGAM:
5436         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5437             gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5438         else
5439             gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5440 
5441         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5442             gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5443         else
5444             gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5445 
5446         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5447             polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5448         else
5449             polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5450         break;
5451     default:
5452         break;
5453     }
5454     if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5455                 AMD_PG_SUPPORT_RLC_SMU_HS |
5456                 AMD_PG_SUPPORT_CP |
5457                 AMD_PG_SUPPORT_GFX_DMG))
5458         amdgpu_gfx_rlc_exit_safe_mode(adev);
5459     return 0;
5460 }
5461 
5462 static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5463 {
5464     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5465     int data;
5466 
5467     if (amdgpu_sriov_vf(adev))
5468         *flags = 0;
5469 
5470     /* AMD_CG_SUPPORT_GFX_MGCG */
5471     data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5472     if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5473         *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5474 
5475     /* AMD_CG_SUPPORT_GFX_CGLG */
5476     data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5477     if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5478         *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5479 
5480     /* AMD_CG_SUPPORT_GFX_CGLS */
5481     if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5482         *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5483 
5484     /* AMD_CG_SUPPORT_GFX_CGTS */
5485     data = RREG32(mmCGTS_SM_CTRL_REG);
5486     if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5487         *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5488 
5489     /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5490     if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5491         *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5492 
5493     /* AMD_CG_SUPPORT_GFX_RLC_LS */
5494     data = RREG32(mmRLC_MEM_SLP_CNTL);
5495     if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5496         *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5497 
5498     /* AMD_CG_SUPPORT_GFX_CP_LS */
5499     data = RREG32(mmCP_MEM_SLP_CNTL);
5500     if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5501         *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5502 }
5503 
5504 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5505                      uint32_t reg_addr, uint32_t cmd)
5506 {
5507     uint32_t data;
5508 
5509     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5510 
5511     WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5512     WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5513 
5514     data = RREG32(mmRLC_SERDES_WR_CTRL);
5515     if (adev->asic_type == CHIP_STONEY)
5516         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5517               RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5518               RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5519               RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5520               RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5521               RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5522               RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5523               RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5524               RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5525     else
5526         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5527               RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5528               RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5529               RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5530               RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5531               RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5532               RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5533               RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5534               RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5535               RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5536               RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5537     data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5538          (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5539          (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5540          (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5541 
5542     WREG32(mmRLC_SERDES_WR_CTRL, data);
5543 }
5544 
5545 #define MSG_ENTER_RLC_SAFE_MODE     1
5546 #define MSG_EXIT_RLC_SAFE_MODE      0
5547 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5548 #define RLC_GPR_REG2__REQ__SHIFT 0
5549 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5550 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5551 
5552 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5553 {
5554     uint32_t rlc_setting;
5555 
5556     rlc_setting = RREG32(mmRLC_CNTL);
5557     if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5558         return false;
5559 
5560     return true;
5561 }
5562 
5563 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5564 {
5565     uint32_t data;
5566     unsigned i;
5567     data = RREG32(mmRLC_CNTL);
5568     data |= RLC_SAFE_MODE__CMD_MASK;
5569     data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5570     data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5571     WREG32(mmRLC_SAFE_MODE, data);
5572 
5573     /* wait for RLC_SAFE_MODE */
5574     for (i = 0; i < adev->usec_timeout; i++) {
5575         if ((RREG32(mmRLC_GPM_STAT) &
5576              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5577               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5578             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5579              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5580             break;
5581         udelay(1);
5582     }
5583     for (i = 0; i < adev->usec_timeout; i++) {
5584         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5585             break;
5586         udelay(1);
5587     }
5588 }
5589 
5590 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5591 {
5592     uint32_t data;
5593     unsigned i;
5594 
5595     data = RREG32(mmRLC_CNTL);
5596     data |= RLC_SAFE_MODE__CMD_MASK;
5597     data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5598     WREG32(mmRLC_SAFE_MODE, data);
5599 
5600     for (i = 0; i < adev->usec_timeout; i++) {
5601         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5602             break;
5603         udelay(1);
5604     }
5605 }
5606 
5607 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5608 {
5609     u32 data;
5610 
5611     amdgpu_gfx_off_ctrl(adev, false);
5612 
5613     if (amdgpu_sriov_is_pp_one_vf(adev))
5614         data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5615     else
5616         data = RREG32(mmRLC_SPM_VMID);
5617 
5618     data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5619     data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5620 
5621     if (amdgpu_sriov_is_pp_one_vf(adev))
5622         WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5623     else
5624         WREG32(mmRLC_SPM_VMID, data);
5625 
5626     amdgpu_gfx_off_ctrl(adev, true);
5627 }
5628 
5629 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5630     .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5631     .set_safe_mode = gfx_v8_0_set_safe_mode,
5632     .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5633     .init = gfx_v8_0_rlc_init,
5634     .get_csb_size = gfx_v8_0_get_csb_size,
5635     .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5636     .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5637     .resume = gfx_v8_0_rlc_resume,
5638     .stop = gfx_v8_0_rlc_stop,
5639     .reset = gfx_v8_0_rlc_reset,
5640     .start = gfx_v8_0_rlc_start,
5641     .update_spm_vmid = gfx_v8_0_update_spm_vmid
5642 };
5643 
5644 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5645                               bool enable)
5646 {
5647     uint32_t temp, data;
5648 
5649     amdgpu_gfx_rlc_enter_safe_mode(adev);
5650 
5651     /* It is disabled by HW by default */
5652     if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5653         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5654             if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5655                 /* 1 - RLC memory Light sleep */
5656                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5657 
5658             if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5659                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5660         }
5661 
5662         /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5663         temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5664         if (adev->flags & AMD_IS_APU)
5665             data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5666                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5667                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5668         else
5669             data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5670                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5671                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5672                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5673 
5674         if (temp != data)
5675             WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5676 
5677         /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5678         gfx_v8_0_wait_for_rlc_serdes(adev);
5679 
5680         /* 5 - clear mgcg override */
5681         gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5682 
5683         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5684             /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5685             temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5686             data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5687             data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5688             data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5689             data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5690             if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5691                 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5692                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5693             data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5694             data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5695             if (temp != data)
5696                 WREG32(mmCGTS_SM_CTRL_REG, data);
5697         }
5698         udelay(50);
5699 
5700         /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5701         gfx_v8_0_wait_for_rlc_serdes(adev);
5702     } else {
5703         /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5704         temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5705         data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5706                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5707                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5708                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5709         if (temp != data)
5710             WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5711 
5712         /* 2 - disable MGLS in RLC */
5713         data = RREG32(mmRLC_MEM_SLP_CNTL);
5714         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5715             data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5716             WREG32(mmRLC_MEM_SLP_CNTL, data);
5717         }
5718 
5719         /* 3 - disable MGLS in CP */
5720         data = RREG32(mmCP_MEM_SLP_CNTL);
5721         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5722             data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5723             WREG32(mmCP_MEM_SLP_CNTL, data);
5724         }
5725 
5726         /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5727         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5728         data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5729                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5730         if (temp != data)
5731             WREG32(mmCGTS_SM_CTRL_REG, data);
5732 
5733         /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5734         gfx_v8_0_wait_for_rlc_serdes(adev);
5735 
5736         /* 6 - set mgcg override */
5737         gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5738 
5739         udelay(50);
5740 
5741         /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5742         gfx_v8_0_wait_for_rlc_serdes(adev);
5743     }
5744 
5745     amdgpu_gfx_rlc_exit_safe_mode(adev);
5746 }
5747 
5748 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5749                               bool enable)
5750 {
5751     uint32_t temp, temp1, data, data1;
5752 
5753     temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5754 
5755     amdgpu_gfx_rlc_enter_safe_mode(adev);
5756 
5757     if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5758         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5759         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5760         if (temp1 != data1)
5761             WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5762 
5763         /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5764         gfx_v8_0_wait_for_rlc_serdes(adev);
5765 
5766         /* 2 - clear cgcg override */
5767         gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5768 
5769         /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5770         gfx_v8_0_wait_for_rlc_serdes(adev);
5771 
5772         /* 3 - write cmd to set CGLS */
5773         gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5774 
5775         /* 4 - enable cgcg */
5776         data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5777 
5778         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5779             /* enable cgls*/
5780             data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5781 
5782             temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5783             data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5784 
5785             if (temp1 != data1)
5786                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5787         } else {
5788             data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5789         }
5790 
5791         if (temp != data)
5792             WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5793 
5794         /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5795          * Cmp_busy/GFX_Idle interrupts
5796          */
5797         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5798     } else {
5799         /* disable cntx_empty_int_enable & GFX Idle interrupt */
5800         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5801 
5802         /* TEST CGCG */
5803         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5804         data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5805                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5806         if (temp1 != data1)
5807             WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5808 
5809         /* read gfx register to wake up cgcg */
5810         RREG32(mmCB_CGTT_SCLK_CTRL);
5811         RREG32(mmCB_CGTT_SCLK_CTRL);
5812         RREG32(mmCB_CGTT_SCLK_CTRL);
5813         RREG32(mmCB_CGTT_SCLK_CTRL);
5814 
5815         /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5816         gfx_v8_0_wait_for_rlc_serdes(adev);
5817 
5818         /* write cmd to Set CGCG Override */
5819         gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5820 
5821         /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5822         gfx_v8_0_wait_for_rlc_serdes(adev);
5823 
5824         /* write cmd to Clear CGLS */
5825         gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5826 
5827         /* disable cgcg, cgls should be disabled too. */
5828         data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5829               RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5830         if (temp != data)
5831             WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5832         /* enable interrupts again for PG */
5833         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5834     }
5835 
5836     gfx_v8_0_wait_for_rlc_serdes(adev);
5837 
5838     amdgpu_gfx_rlc_exit_safe_mode(adev);
5839 }
5840 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5841                         bool enable)
5842 {
5843     if (enable) {
5844         /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5845          * ===  MGCG + MGLS + TS(CG/LS) ===
5846          */
5847         gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5848         gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5849     } else {
5850         /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5851          * ===  CGCG + CGLS ===
5852          */
5853         gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5854         gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5855     }
5856     return 0;
5857 }
5858 
5859 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5860                       enum amd_clockgating_state state)
5861 {
5862     uint32_t msg_id, pp_state = 0;
5863     uint32_t pp_support_state = 0;
5864 
5865     if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5866         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5867             pp_support_state = PP_STATE_SUPPORT_LS;
5868             pp_state = PP_STATE_LS;
5869         }
5870         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5871             pp_support_state |= PP_STATE_SUPPORT_CG;
5872             pp_state |= PP_STATE_CG;
5873         }
5874         if (state == AMD_CG_STATE_UNGATE)
5875             pp_state = 0;
5876 
5877         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5878                 PP_BLOCK_GFX_CG,
5879                 pp_support_state,
5880                 pp_state);
5881         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5882     }
5883 
5884     if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5885         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5886             pp_support_state = PP_STATE_SUPPORT_LS;
5887             pp_state = PP_STATE_LS;
5888         }
5889 
5890         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5891             pp_support_state |= PP_STATE_SUPPORT_CG;
5892             pp_state |= PP_STATE_CG;
5893         }
5894 
5895         if (state == AMD_CG_STATE_UNGATE)
5896             pp_state = 0;
5897 
5898         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5899                 PP_BLOCK_GFX_MG,
5900                 pp_support_state,
5901                 pp_state);
5902         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5903     }
5904 
5905     return 0;
5906 }
5907 
5908 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5909                       enum amd_clockgating_state state)
5910 {
5911 
5912     uint32_t msg_id, pp_state = 0;
5913     uint32_t pp_support_state = 0;
5914 
5915     if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5916         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5917             pp_support_state = PP_STATE_SUPPORT_LS;
5918             pp_state = PP_STATE_LS;
5919         }
5920         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5921             pp_support_state |= PP_STATE_SUPPORT_CG;
5922             pp_state |= PP_STATE_CG;
5923         }
5924         if (state == AMD_CG_STATE_UNGATE)
5925             pp_state = 0;
5926 
5927         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5928                 PP_BLOCK_GFX_CG,
5929                 pp_support_state,
5930                 pp_state);
5931         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5932     }
5933 
5934     if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5935         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5936             pp_support_state = PP_STATE_SUPPORT_LS;
5937             pp_state = PP_STATE_LS;
5938         }
5939         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5940             pp_support_state |= PP_STATE_SUPPORT_CG;
5941             pp_state |= PP_STATE_CG;
5942         }
5943         if (state == AMD_CG_STATE_UNGATE)
5944             pp_state = 0;
5945 
5946         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5947                 PP_BLOCK_GFX_3D,
5948                 pp_support_state,
5949                 pp_state);
5950         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5951     }
5952 
5953     if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5954         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5955             pp_support_state = PP_STATE_SUPPORT_LS;
5956             pp_state = PP_STATE_LS;
5957         }
5958 
5959         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5960             pp_support_state |= PP_STATE_SUPPORT_CG;
5961             pp_state |= PP_STATE_CG;
5962         }
5963 
5964         if (state == AMD_CG_STATE_UNGATE)
5965             pp_state = 0;
5966 
5967         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5968                 PP_BLOCK_GFX_MG,
5969                 pp_support_state,
5970                 pp_state);
5971         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5972     }
5973 
5974     if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5975         pp_support_state = PP_STATE_SUPPORT_LS;
5976 
5977         if (state == AMD_CG_STATE_UNGATE)
5978             pp_state = 0;
5979         else
5980             pp_state = PP_STATE_LS;
5981 
5982         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5983                 PP_BLOCK_GFX_RLC,
5984                 pp_support_state,
5985                 pp_state);
5986         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5987     }
5988 
5989     if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5990         pp_support_state = PP_STATE_SUPPORT_LS;
5991 
5992         if (state == AMD_CG_STATE_UNGATE)
5993             pp_state = 0;
5994         else
5995             pp_state = PP_STATE_LS;
5996         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5997             PP_BLOCK_GFX_CP,
5998             pp_support_state,
5999             pp_state);
6000         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6001     }
6002 
6003     return 0;
6004 }
6005 
6006 static int gfx_v8_0_set_clockgating_state(void *handle,
6007                       enum amd_clockgating_state state)
6008 {
6009     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6010 
6011     if (amdgpu_sriov_vf(adev))
6012         return 0;
6013 
6014     switch (adev->asic_type) {
6015     case CHIP_FIJI:
6016     case CHIP_CARRIZO:
6017     case CHIP_STONEY:
6018         gfx_v8_0_update_gfx_clock_gating(adev,
6019                          state == AMD_CG_STATE_GATE);
6020         break;
6021     case CHIP_TONGA:
6022         gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6023         break;
6024     case CHIP_POLARIS10:
6025     case CHIP_POLARIS11:
6026     case CHIP_POLARIS12:
6027     case CHIP_VEGAM:
6028         gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6029         break;
6030     default:
6031         break;
6032     }
6033     return 0;
6034 }
6035 
6036 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6037 {
6038     return *ring->rptr_cpu_addr;
6039 }
6040 
6041 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6042 {
6043     struct amdgpu_device *adev = ring->adev;
6044 
6045     if (ring->use_doorbell)
6046         /* XXX check if swapping is necessary on BE */
6047         return *ring->wptr_cpu_addr;
6048     else
6049         return RREG32(mmCP_RB0_WPTR);
6050 }
6051 
6052 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6053 {
6054     struct amdgpu_device *adev = ring->adev;
6055 
6056     if (ring->use_doorbell) {
6057         /* XXX check if swapping is necessary on BE */
6058         *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6059         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6060     } else {
6061         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6062         (void)RREG32(mmCP_RB0_WPTR);
6063     }
6064 }
6065 
6066 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6067 {
6068     u32 ref_and_mask, reg_mem_engine;
6069 
6070     if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6071         (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6072         switch (ring->me) {
6073         case 1:
6074             ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6075             break;
6076         case 2:
6077             ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6078             break;
6079         default:
6080             return;
6081         }
6082         reg_mem_engine = 0;
6083     } else {
6084         ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6085         reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6086     }
6087 
6088     amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6089     amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6090                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6091                  reg_mem_engine));
6092     amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6093     amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6094     amdgpu_ring_write(ring, ref_and_mask);
6095     amdgpu_ring_write(ring, ref_and_mask);
6096     amdgpu_ring_write(ring, 0x20); /* poll interval */
6097 }
6098 
6099 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6100 {
6101     amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6102     amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6103         EVENT_INDEX(4));
6104 
6105     amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6106     amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6107         EVENT_INDEX(0));
6108 }
6109 
6110 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6111                     struct amdgpu_job *job,
6112                     struct amdgpu_ib *ib,
6113                     uint32_t flags)
6114 {
6115     unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6116     u32 header, control = 0;
6117 
6118     if (ib->flags & AMDGPU_IB_FLAG_CE)
6119         header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6120     else
6121         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6122 
6123     control |= ib->length_dw | (vmid << 24);
6124 
6125     if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6126         control |= INDIRECT_BUFFER_PRE_ENB(1);
6127 
6128         if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6129             gfx_v8_0_ring_emit_de_meta(ring);
6130     }
6131 
6132     amdgpu_ring_write(ring, header);
6133     amdgpu_ring_write(ring,
6134 #ifdef __BIG_ENDIAN
6135               (2 << 0) |
6136 #endif
6137               (ib->gpu_addr & 0xFFFFFFFC));
6138     amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6139     amdgpu_ring_write(ring, control);
6140 }
6141 
6142 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6143                       struct amdgpu_job *job,
6144                       struct amdgpu_ib *ib,
6145                       uint32_t flags)
6146 {
6147     unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6148     u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6149 
6150     /* Currently, there is a high possibility to get wave ID mismatch
6151      * between ME and GDS, leading to a hw deadlock, because ME generates
6152      * different wave IDs than the GDS expects. This situation happens
6153      * randomly when at least 5 compute pipes use GDS ordered append.
6154      * The wave IDs generated by ME are also wrong after suspend/resume.
6155      * Those are probably bugs somewhere else in the kernel driver.
6156      *
6157      * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6158      * GDS to 0 for this ring (me/pipe).
6159      */
6160     if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6161         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6162         amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6163         amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6164     }
6165 
6166     amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6167     amdgpu_ring_write(ring,
6168 #ifdef __BIG_ENDIAN
6169                 (2 << 0) |
6170 #endif
6171                 (ib->gpu_addr & 0xFFFFFFFC));
6172     amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6173     amdgpu_ring_write(ring, control);
6174 }
6175 
6176 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6177                      u64 seq, unsigned flags)
6178 {
6179     bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6180     bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6181 
6182     /* Workaround for cache flush problems. First send a dummy EOP
6183      * event down the pipe with seq one below.
6184      */
6185     amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6186     amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6187                  EOP_TC_ACTION_EN |
6188                  EOP_TC_WB_ACTION_EN |
6189                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6190                  EVENT_INDEX(5)));
6191     amdgpu_ring_write(ring, addr & 0xfffffffc);
6192     amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6193                 DATA_SEL(1) | INT_SEL(0));
6194     amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6195     amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6196 
6197     /* Then send the real EOP event down the pipe:
6198      * EVENT_WRITE_EOP - flush caches, send int */
6199     amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6200     amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6201                  EOP_TC_ACTION_EN |
6202                  EOP_TC_WB_ACTION_EN |
6203                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6204                  EVENT_INDEX(5)));
6205     amdgpu_ring_write(ring, addr & 0xfffffffc);
6206     amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6207               DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6208     amdgpu_ring_write(ring, lower_32_bits(seq));
6209     amdgpu_ring_write(ring, upper_32_bits(seq));
6210 
6211 }
6212 
6213 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6214 {
6215     int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6216     uint32_t seq = ring->fence_drv.sync_seq;
6217     uint64_t addr = ring->fence_drv.gpu_addr;
6218 
6219     amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6220     amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6221                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6222                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6223     amdgpu_ring_write(ring, addr & 0xfffffffc);
6224     amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6225     amdgpu_ring_write(ring, seq);
6226     amdgpu_ring_write(ring, 0xffffffff);
6227     amdgpu_ring_write(ring, 4); /* poll interval */
6228 }
6229 
6230 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6231                     unsigned vmid, uint64_t pd_addr)
6232 {
6233     int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6234 
6235     amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6236 
6237     /* wait for the invalidate to complete */
6238     amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6239     amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6240                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6241                  WAIT_REG_MEM_ENGINE(0))); /* me */
6242     amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6243     amdgpu_ring_write(ring, 0);
6244     amdgpu_ring_write(ring, 0); /* ref */
6245     amdgpu_ring_write(ring, 0); /* mask */
6246     amdgpu_ring_write(ring, 0x20); /* poll interval */
6247 
6248     /* compute doesn't have PFP */
6249     if (usepfp) {
6250         /* sync PFP to ME, otherwise we might get invalid PFP reads */
6251         amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6252         amdgpu_ring_write(ring, 0x0);
6253     }
6254 }
6255 
6256 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6257 {
6258     return *ring->wptr_cpu_addr;
6259 }
6260 
6261 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6262 {
6263     struct amdgpu_device *adev = ring->adev;
6264 
6265     /* XXX check if swapping is necessary on BE */
6266     *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6267     WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6268 }
6269 
6270 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6271                          u64 addr, u64 seq,
6272                          unsigned flags)
6273 {
6274     bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6275     bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6276 
6277     /* RELEASE_MEM - flush caches, send int */
6278     amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6279     amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6280                  EOP_TC_ACTION_EN |
6281                  EOP_TC_WB_ACTION_EN |
6282                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6283                  EVENT_INDEX(5)));
6284     amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6285     amdgpu_ring_write(ring, addr & 0xfffffffc);
6286     amdgpu_ring_write(ring, upper_32_bits(addr));
6287     amdgpu_ring_write(ring, lower_32_bits(seq));
6288     amdgpu_ring_write(ring, upper_32_bits(seq));
6289 }
6290 
6291 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6292                      u64 seq, unsigned int flags)
6293 {
6294     /* we only allocate 32bit for each seq wb address */
6295     BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6296 
6297     /* write fence seq to the "addr" */
6298     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6299     amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6300                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6301     amdgpu_ring_write(ring, lower_32_bits(addr));
6302     amdgpu_ring_write(ring, upper_32_bits(addr));
6303     amdgpu_ring_write(ring, lower_32_bits(seq));
6304 
6305     if (flags & AMDGPU_FENCE_FLAG_INT) {
6306         /* set register to trigger INT */
6307         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6308         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6309                      WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6310         amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6311         amdgpu_ring_write(ring, 0);
6312         amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6313     }
6314 }
6315 
6316 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6317 {
6318     amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6319     amdgpu_ring_write(ring, 0);
6320 }
6321 
6322 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6323 {
6324     uint32_t dw2 = 0;
6325 
6326     if (amdgpu_sriov_vf(ring->adev))
6327         gfx_v8_0_ring_emit_ce_meta(ring);
6328 
6329     dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6330     if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6331         gfx_v8_0_ring_emit_vgt_flush(ring);
6332         /* set load_global_config & load_global_uconfig */
6333         dw2 |= 0x8001;
6334         /* set load_cs_sh_regs */
6335         dw2 |= 0x01000000;
6336         /* set load_per_context_state & load_gfx_sh_regs for GFX */
6337         dw2 |= 0x10002;
6338 
6339         /* set load_ce_ram if preamble presented */
6340         if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6341             dw2 |= 0x10000000;
6342     } else {
6343         /* still load_ce_ram if this is the first time preamble presented
6344          * although there is no context switch happens.
6345          */
6346         if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6347             dw2 |= 0x10000000;
6348     }
6349 
6350     amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6351     amdgpu_ring_write(ring, dw2);
6352     amdgpu_ring_write(ring, 0);
6353 }
6354 
6355 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6356 {
6357     unsigned ret;
6358 
6359     amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6360     amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6361     amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6362     amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6363     ret = ring->wptr & ring->buf_mask;
6364     amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6365     return ret;
6366 }
6367 
6368 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6369 {
6370     unsigned cur;
6371 
6372     BUG_ON(offset > ring->buf_mask);
6373     BUG_ON(ring->ring[offset] != 0x55aa55aa);
6374 
6375     cur = (ring->wptr & ring->buf_mask) - 1;
6376     if (likely(cur > offset))
6377         ring->ring[offset] = cur - offset;
6378     else
6379         ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6380 }
6381 
6382 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6383                     uint32_t reg_val_offs)
6384 {
6385     struct amdgpu_device *adev = ring->adev;
6386 
6387     amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6388     amdgpu_ring_write(ring, 0 | /* src: register*/
6389                 (5 << 8) |  /* dst: memory */
6390                 (1 << 20)); /* write confirm */
6391     amdgpu_ring_write(ring, reg);
6392     amdgpu_ring_write(ring, 0);
6393     amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6394                 reg_val_offs * 4));
6395     amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6396                 reg_val_offs * 4));
6397 }
6398 
6399 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6400                   uint32_t val)
6401 {
6402     uint32_t cmd;
6403 
6404     switch (ring->funcs->type) {
6405     case AMDGPU_RING_TYPE_GFX:
6406         cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6407         break;
6408     case AMDGPU_RING_TYPE_KIQ:
6409         cmd = 1 << 16; /* no inc addr */
6410         break;
6411     default:
6412         cmd = WR_CONFIRM;
6413         break;
6414     }
6415 
6416     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6417     amdgpu_ring_write(ring, cmd);
6418     amdgpu_ring_write(ring, reg);
6419     amdgpu_ring_write(ring, 0);
6420     amdgpu_ring_write(ring, val);
6421 }
6422 
6423 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6424 {
6425     struct amdgpu_device *adev = ring->adev;
6426     uint32_t value = 0;
6427 
6428     value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6429     value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6430     value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6431     value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6432     WREG32(mmSQ_CMD, value);
6433 }
6434 
6435 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6436                          enum amdgpu_interrupt_state state)
6437 {
6438     WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6439              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6440 }
6441 
6442 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6443                              int me, int pipe,
6444                              enum amdgpu_interrupt_state state)
6445 {
6446     u32 mec_int_cntl, mec_int_cntl_reg;
6447 
6448     /*
6449      * amdgpu controls only the first MEC. That's why this function only
6450      * handles the setting of interrupts for this specific MEC. All other
6451      * pipes' interrupts are set by amdkfd.
6452      */
6453 
6454     if (me == 1) {
6455         switch (pipe) {
6456         case 0:
6457             mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6458             break;
6459         case 1:
6460             mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6461             break;
6462         case 2:
6463             mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6464             break;
6465         case 3:
6466             mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6467             break;
6468         default:
6469             DRM_DEBUG("invalid pipe %d\n", pipe);
6470             return;
6471         }
6472     } else {
6473         DRM_DEBUG("invalid me %d\n", me);
6474         return;
6475     }
6476 
6477     switch (state) {
6478     case AMDGPU_IRQ_STATE_DISABLE:
6479         mec_int_cntl = RREG32(mec_int_cntl_reg);
6480         mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6481         WREG32(mec_int_cntl_reg, mec_int_cntl);
6482         break;
6483     case AMDGPU_IRQ_STATE_ENABLE:
6484         mec_int_cntl = RREG32(mec_int_cntl_reg);
6485         mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6486         WREG32(mec_int_cntl_reg, mec_int_cntl);
6487         break;
6488     default:
6489         break;
6490     }
6491 }
6492 
6493 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6494                          struct amdgpu_irq_src *source,
6495                          unsigned type,
6496                          enum amdgpu_interrupt_state state)
6497 {
6498     WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6499              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6500 
6501     return 0;
6502 }
6503 
6504 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6505                           struct amdgpu_irq_src *source,
6506                           unsigned type,
6507                           enum amdgpu_interrupt_state state)
6508 {
6509     WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6510              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6511 
6512     return 0;
6513 }
6514 
6515 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6516                         struct amdgpu_irq_src *src,
6517                         unsigned type,
6518                         enum amdgpu_interrupt_state state)
6519 {
6520     switch (type) {
6521     case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6522         gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6523         break;
6524     case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6525         gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6526         break;
6527     case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6528         gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6529         break;
6530     case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6531         gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6532         break;
6533     case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6534         gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6535         break;
6536     case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6537         gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6538         break;
6539     case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6540         gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6541         break;
6542     case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6543         gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6544         break;
6545     case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6546         gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6547         break;
6548     default:
6549         break;
6550     }
6551     return 0;
6552 }
6553 
6554 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6555                      struct amdgpu_irq_src *source,
6556                      unsigned int type,
6557                      enum amdgpu_interrupt_state state)
6558 {
6559     int enable_flag;
6560 
6561     switch (state) {
6562     case AMDGPU_IRQ_STATE_DISABLE:
6563         enable_flag = 0;
6564         break;
6565 
6566     case AMDGPU_IRQ_STATE_ENABLE:
6567         enable_flag = 1;
6568         break;
6569 
6570     default:
6571         return -EINVAL;
6572     }
6573 
6574     WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6575     WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6576     WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6577     WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6578     WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6579     WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6580              enable_flag);
6581     WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6582              enable_flag);
6583     WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6584              enable_flag);
6585     WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6586              enable_flag);
6587     WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6588              enable_flag);
6589     WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590              enable_flag);
6591     WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592              enable_flag);
6593     WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594              enable_flag);
6595 
6596     return 0;
6597 }
6598 
6599 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6600                      struct amdgpu_irq_src *source,
6601                      unsigned int type,
6602                      enum amdgpu_interrupt_state state)
6603 {
6604     int enable_flag;
6605 
6606     switch (state) {
6607     case AMDGPU_IRQ_STATE_DISABLE:
6608         enable_flag = 1;
6609         break;
6610 
6611     case AMDGPU_IRQ_STATE_ENABLE:
6612         enable_flag = 0;
6613         break;
6614 
6615     default:
6616         return -EINVAL;
6617     }
6618 
6619     WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6620              enable_flag);
6621 
6622     return 0;
6623 }
6624 
6625 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6626                 struct amdgpu_irq_src *source,
6627                 struct amdgpu_iv_entry *entry)
6628 {
6629     int i;
6630     u8 me_id, pipe_id, queue_id;
6631     struct amdgpu_ring *ring;
6632 
6633     DRM_DEBUG("IH: CP EOP\n");
6634     me_id = (entry->ring_id & 0x0c) >> 2;
6635     pipe_id = (entry->ring_id & 0x03) >> 0;
6636     queue_id = (entry->ring_id & 0x70) >> 4;
6637 
6638     switch (me_id) {
6639     case 0:
6640         amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6641         break;
6642     case 1:
6643     case 2:
6644         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6645             ring = &adev->gfx.compute_ring[i];
6646             /* Per-queue interrupt is supported for MEC starting from VI.
6647               * The interrupt can only be enabled/disabled per pipe instead of per queue.
6648               */
6649             if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6650                 amdgpu_fence_process(ring);
6651         }
6652         break;
6653     }
6654     return 0;
6655 }
6656 
6657 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6658                struct amdgpu_iv_entry *entry)
6659 {
6660     u8 me_id, pipe_id, queue_id;
6661     struct amdgpu_ring *ring;
6662     int i;
6663 
6664     me_id = (entry->ring_id & 0x0c) >> 2;
6665     pipe_id = (entry->ring_id & 0x03) >> 0;
6666     queue_id = (entry->ring_id & 0x70) >> 4;
6667 
6668     switch (me_id) {
6669     case 0:
6670         drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6671         break;
6672     case 1:
6673     case 2:
6674         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6675             ring = &adev->gfx.compute_ring[i];
6676             if (ring->me == me_id && ring->pipe == pipe_id &&
6677                 ring->queue == queue_id)
6678                 drm_sched_fault(&ring->sched);
6679         }
6680         break;
6681     }
6682 }
6683 
6684 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6685                  struct amdgpu_irq_src *source,
6686                  struct amdgpu_iv_entry *entry)
6687 {
6688     DRM_ERROR("Illegal register access in command stream\n");
6689     gfx_v8_0_fault(adev, entry);
6690     return 0;
6691 }
6692 
6693 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6694                   struct amdgpu_irq_src *source,
6695                   struct amdgpu_iv_entry *entry)
6696 {
6697     DRM_ERROR("Illegal instruction in command stream\n");
6698     gfx_v8_0_fault(adev, entry);
6699     return 0;
6700 }
6701 
6702 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6703                      struct amdgpu_irq_src *source,
6704                      struct amdgpu_iv_entry *entry)
6705 {
6706     DRM_ERROR("CP EDC/ECC error detected.");
6707     return 0;
6708 }
6709 
6710 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6711                   bool from_wq)
6712 {
6713     u32 enc, se_id, sh_id, cu_id;
6714     char type[20];
6715     int sq_edc_source = -1;
6716 
6717     enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6718     se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6719 
6720     switch (enc) {
6721         case 0:
6722             DRM_INFO("SQ general purpose intr detected:"
6723                     "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6724                     "host_cmd_overflow %d, cmd_timestamp %d,"
6725                     "reg_timestamp %d, thread_trace_buff_full %d,"
6726                     "wlt %d, thread_trace %d.\n",
6727                     se_id,
6728                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6729                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6730                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6731                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6732                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6733                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6734                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6735                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6736                     );
6737             break;
6738         case 1:
6739         case 2:
6740 
6741             cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6742             sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6743 
6744             /*
6745              * This function can be called either directly from ISR
6746              * or from BH in which case we can access SQ_EDC_INFO
6747              * instance
6748              */
6749             if (from_wq) {
6750                 mutex_lock(&adev->grbm_idx_mutex);
6751                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6752 
6753                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6754 
6755                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6756                 mutex_unlock(&adev->grbm_idx_mutex);
6757             }
6758 
6759             if (enc == 1)
6760                 sprintf(type, "instruction intr");
6761             else
6762                 sprintf(type, "EDC/ECC error");
6763 
6764             DRM_INFO(
6765                 "SQ %s detected: "
6766                     "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6767                     "trap %s, sq_ed_info.source %s.\n",
6768                     type, se_id, sh_id, cu_id,
6769                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6770                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6771                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6772                     REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6773                     (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6774                 );
6775             break;
6776         default:
6777             DRM_ERROR("SQ invalid encoding type\n.");
6778     }
6779 }
6780 
6781 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6782 {
6783 
6784     struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6785     struct sq_work *sq_work = container_of(work, struct sq_work, work);
6786 
6787     gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6788 }
6789 
6790 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6791                struct amdgpu_irq_src *source,
6792                struct amdgpu_iv_entry *entry)
6793 {
6794     unsigned ih_data = entry->src_data[0];
6795 
6796     /*
6797      * Try to submit work so SQ_EDC_INFO can be accessed from
6798      * BH. If previous work submission hasn't finished yet
6799      * just print whatever info is possible directly from the ISR.
6800      */
6801     if (work_pending(&adev->gfx.sq_work.work)) {
6802         gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6803     } else {
6804         adev->gfx.sq_work.ih_data = ih_data;
6805         schedule_work(&adev->gfx.sq_work.work);
6806     }
6807 
6808     return 0;
6809 }
6810 
6811 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6812 {
6813     amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6814     amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6815               PACKET3_TC_ACTION_ENA |
6816               PACKET3_SH_KCACHE_ACTION_ENA |
6817               PACKET3_SH_ICACHE_ACTION_ENA |
6818               PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6819     amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6820     amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6821     amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6822 }
6823 
6824 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6825 {
6826     amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6827     amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6828               PACKET3_TC_ACTION_ENA |
6829               PACKET3_SH_KCACHE_ACTION_ENA |
6830               PACKET3_SH_ICACHE_ACTION_ENA |
6831               PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6832     amdgpu_ring_write(ring, 0xffffffff);    /* CP_COHER_SIZE */
6833     amdgpu_ring_write(ring, 0xff);      /* CP_COHER_SIZE_HI */
6834     amdgpu_ring_write(ring, 0);     /* CP_COHER_BASE */
6835     amdgpu_ring_write(ring, 0);     /* CP_COHER_BASE_HI */
6836     amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
6837 }
6838 
6839 
6840 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6841 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT   0x0000007f
6842 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6843                     uint32_t pipe, bool enable)
6844 {
6845     uint32_t val;
6846     uint32_t wcl_cs_reg;
6847 
6848     val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6849 
6850     switch (pipe) {
6851     case 0:
6852         wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6853         break;
6854     case 1:
6855         wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6856         break;
6857     case 2:
6858         wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6859         break;
6860     case 3:
6861         wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6862         break;
6863     default:
6864         DRM_DEBUG("invalid pipe %d\n", pipe);
6865         return;
6866     }
6867 
6868     amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6869 
6870 }
6871 
6872 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT  0x07ffffff
6873 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6874 {
6875     struct amdgpu_device *adev = ring->adev;
6876     uint32_t val;
6877     int i;
6878 
6879     /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6880      * number of gfx waves. Setting 5 bit will make sure gfx only gets
6881      * around 25% of gpu resources.
6882      */
6883     val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6884     amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6885 
6886     /* Restrict waves for normal/low priority compute queues as well
6887      * to get best QoS for high priority compute jobs.
6888      *
6889      * amdgpu controls only 1st ME(0-3 CS pipes).
6890      */
6891     for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6892         if (i != ring->pipe)
6893             gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6894 
6895     }
6896 
6897 }
6898 
6899 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6900     .name = "gfx_v8_0",
6901     .early_init = gfx_v8_0_early_init,
6902     .late_init = gfx_v8_0_late_init,
6903     .sw_init = gfx_v8_0_sw_init,
6904     .sw_fini = gfx_v8_0_sw_fini,
6905     .hw_init = gfx_v8_0_hw_init,
6906     .hw_fini = gfx_v8_0_hw_fini,
6907     .suspend = gfx_v8_0_suspend,
6908     .resume = gfx_v8_0_resume,
6909     .is_idle = gfx_v8_0_is_idle,
6910     .wait_for_idle = gfx_v8_0_wait_for_idle,
6911     .check_soft_reset = gfx_v8_0_check_soft_reset,
6912     .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6913     .soft_reset = gfx_v8_0_soft_reset,
6914     .post_soft_reset = gfx_v8_0_post_soft_reset,
6915     .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6916     .set_powergating_state = gfx_v8_0_set_powergating_state,
6917     .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6918 };
6919 
6920 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6921     .type = AMDGPU_RING_TYPE_GFX,
6922     .align_mask = 0xff,
6923     .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6924     .support_64bit_ptrs = false,
6925     .get_rptr = gfx_v8_0_ring_get_rptr,
6926     .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6927     .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6928     .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6929         5 +  /* COND_EXEC */
6930         7 +  /* PIPELINE_SYNC */
6931         VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6932         12 +  /* FENCE for VM_FLUSH */
6933         20 + /* GDS switch */
6934         4 + /* double SWITCH_BUFFER,
6935                the first COND_EXEC jump to the place just
6936                prior to this double SWITCH_BUFFER  */
6937         5 + /* COND_EXEC */
6938         7 +  /* HDP_flush */
6939         4 +  /* VGT_flush */
6940         14 + /* CE_META */
6941         31 + /* DE_META */
6942         3 + /* CNTX_CTRL */
6943         5 + /* HDP_INVL */
6944         12 + 12 + /* FENCE x2 */
6945         2 + /* SWITCH_BUFFER */
6946         5, /* SURFACE_SYNC */
6947     .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6948     .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6949     .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6950     .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6951     .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6952     .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6953     .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6954     .test_ring = gfx_v8_0_ring_test_ring,
6955     .test_ib = gfx_v8_0_ring_test_ib,
6956     .insert_nop = amdgpu_ring_insert_nop,
6957     .pad_ib = amdgpu_ring_generic_pad_ib,
6958     .emit_switch_buffer = gfx_v8_ring_emit_sb,
6959     .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6960     .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6961     .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6962     .emit_wreg = gfx_v8_0_ring_emit_wreg,
6963     .soft_recovery = gfx_v8_0_ring_soft_recovery,
6964     .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6965 };
6966 
6967 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6968     .type = AMDGPU_RING_TYPE_COMPUTE,
6969     .align_mask = 0xff,
6970     .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6971     .support_64bit_ptrs = false,
6972     .get_rptr = gfx_v8_0_ring_get_rptr,
6973     .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6974     .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6975     .emit_frame_size =
6976         20 + /* gfx_v8_0_ring_emit_gds_switch */
6977         7 + /* gfx_v8_0_ring_emit_hdp_flush */
6978         5 + /* hdp_invalidate */
6979         7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6980         VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6981         7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6982         7 + /* gfx_v8_0_emit_mem_sync_compute */
6983         5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6984         15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6985     .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6986     .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6987     .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6988     .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6989     .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6990     .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6991     .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6992     .test_ring = gfx_v8_0_ring_test_ring,
6993     .test_ib = gfx_v8_0_ring_test_ib,
6994     .insert_nop = amdgpu_ring_insert_nop,
6995     .pad_ib = amdgpu_ring_generic_pad_ib,
6996     .emit_wreg = gfx_v8_0_ring_emit_wreg,
6997     .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6998     .emit_wave_limit = gfx_v8_0_emit_wave_limit,
6999 };
7000 
7001 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7002     .type = AMDGPU_RING_TYPE_KIQ,
7003     .align_mask = 0xff,
7004     .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7005     .support_64bit_ptrs = false,
7006     .get_rptr = gfx_v8_0_ring_get_rptr,
7007     .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7008     .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7009     .emit_frame_size =
7010         20 + /* gfx_v8_0_ring_emit_gds_switch */
7011         7 + /* gfx_v8_0_ring_emit_hdp_flush */
7012         5 + /* hdp_invalidate */
7013         7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7014         17 + /* gfx_v8_0_ring_emit_vm_flush */
7015         7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7016     .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
7017     .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7018     .test_ring = gfx_v8_0_ring_test_ring,
7019     .insert_nop = amdgpu_ring_insert_nop,
7020     .pad_ib = amdgpu_ring_generic_pad_ib,
7021     .emit_rreg = gfx_v8_0_ring_emit_rreg,
7022     .emit_wreg = gfx_v8_0_ring_emit_wreg,
7023 };
7024 
7025 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7026 {
7027     int i;
7028 
7029     adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7030 
7031     for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7032         adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7033 
7034     for (i = 0; i < adev->gfx.num_compute_rings; i++)
7035         adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7036 }
7037 
7038 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7039     .set = gfx_v8_0_set_eop_interrupt_state,
7040     .process = gfx_v8_0_eop_irq,
7041 };
7042 
7043 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7044     .set = gfx_v8_0_set_priv_reg_fault_state,
7045     .process = gfx_v8_0_priv_reg_irq,
7046 };
7047 
7048 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7049     .set = gfx_v8_0_set_priv_inst_fault_state,
7050     .process = gfx_v8_0_priv_inst_irq,
7051 };
7052 
7053 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7054     .set = gfx_v8_0_set_cp_ecc_int_state,
7055     .process = gfx_v8_0_cp_ecc_error_irq,
7056 };
7057 
7058 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7059     .set = gfx_v8_0_set_sq_int_state,
7060     .process = gfx_v8_0_sq_irq,
7061 };
7062 
7063 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7064 {
7065     adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7066     adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7067 
7068     adev->gfx.priv_reg_irq.num_types = 1;
7069     adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7070 
7071     adev->gfx.priv_inst_irq.num_types = 1;
7072     adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7073 
7074     adev->gfx.cp_ecc_error_irq.num_types = 1;
7075     adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7076 
7077     adev->gfx.sq_irq.num_types = 1;
7078     adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7079 }
7080 
7081 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7082 {
7083     adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7084 }
7085 
7086 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7087 {
7088     /* init asci gds info */
7089     adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7090     adev->gds.gws_size = 64;
7091     adev->gds.oa_size = 16;
7092     adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7093 }
7094 
7095 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7096                          u32 bitmap)
7097 {
7098     u32 data;
7099 
7100     if (!bitmap)
7101         return;
7102 
7103     data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7104     data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7105 
7106     WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7107 }
7108 
7109 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7110 {
7111     u32 data, mask;
7112 
7113     data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7114         RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7115 
7116     mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7117 
7118     return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7119 }
7120 
7121 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7122 {
7123     int i, j, k, counter, active_cu_number = 0;
7124     u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7125     struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7126     unsigned disable_masks[4 * 2];
7127     u32 ao_cu_num;
7128 
7129     memset(cu_info, 0, sizeof(*cu_info));
7130 
7131     if (adev->flags & AMD_IS_APU)
7132         ao_cu_num = 2;
7133     else
7134         ao_cu_num = adev->gfx.config.max_cu_per_sh;
7135 
7136     amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7137 
7138     mutex_lock(&adev->grbm_idx_mutex);
7139     for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7140         for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7141             mask = 1;
7142             ao_bitmap = 0;
7143             counter = 0;
7144             gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7145             if (i < 4 && j < 2)
7146                 gfx_v8_0_set_user_cu_inactive_bitmap(
7147                     adev, disable_masks[i * 2 + j]);
7148             bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7149             cu_info->bitmap[i][j] = bitmap;
7150 
7151             for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7152                 if (bitmap & mask) {
7153                     if (counter < ao_cu_num)
7154                         ao_bitmap |= mask;
7155                     counter ++;
7156                 }
7157                 mask <<= 1;
7158             }
7159             active_cu_number += counter;
7160             if (i < 2 && j < 2)
7161                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7162             cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7163         }
7164     }
7165     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7166     mutex_unlock(&adev->grbm_idx_mutex);
7167 
7168     cu_info->number = active_cu_number;
7169     cu_info->ao_cu_mask = ao_cu_mask;
7170     cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7171     cu_info->max_waves_per_simd = 10;
7172     cu_info->max_scratch_slots_per_cu = 32;
7173     cu_info->wave_front_size = 64;
7174     cu_info->lds_size = 64;
7175 }
7176 
7177 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7178 {
7179     .type = AMD_IP_BLOCK_TYPE_GFX,
7180     .major = 8,
7181     .minor = 0,
7182     .rev = 0,
7183     .funcs = &gfx_v8_0_ip_funcs,
7184 };
7185 
7186 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7187 {
7188     .type = AMD_IP_BLOCK_TYPE_GFX,
7189     .major = 8,
7190     .minor = 1,
7191     .rev = 0,
7192     .funcs = &gfx_v8_0_ip_funcs,
7193 };
7194 
7195 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7196 {
7197     uint64_t ce_payload_addr;
7198     int cnt_ce;
7199     union {
7200         struct vi_ce_ib_state regular;
7201         struct vi_ce_ib_state_chained_ib chained;
7202     } ce_payload = {};
7203 
7204     if (ring->adev->virt.chained_ib_support) {
7205         ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7206             offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7207         cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7208     } else {
7209         ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7210             offsetof(struct vi_gfx_meta_data, ce_payload);
7211         cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7212     }
7213 
7214     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7215     amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7216                 WRITE_DATA_DST_SEL(8) |
7217                 WR_CONFIRM) |
7218                 WRITE_DATA_CACHE_POLICY(0));
7219     amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7220     amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7221     amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7222 }
7223 
7224 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7225 {
7226     uint64_t de_payload_addr, gds_addr, csa_addr;
7227     int cnt_de;
7228     union {
7229         struct vi_de_ib_state regular;
7230         struct vi_de_ib_state_chained_ib chained;
7231     } de_payload = {};
7232 
7233     csa_addr = amdgpu_csa_vaddr(ring->adev);
7234     gds_addr = csa_addr + 4096;
7235     if (ring->adev->virt.chained_ib_support) {
7236         de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7237         de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7238         de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7239         cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7240     } else {
7241         de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7242         de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7243         de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7244         cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7245     }
7246 
7247     amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7248     amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7249                 WRITE_DATA_DST_SEL(8) |
7250                 WR_CONFIRM) |
7251                 WRITE_DATA_CACHE_POLICY(0));
7252     amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7253     amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7254     amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7255 }