Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2020 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  */
0023 #include "umc_v8_7.h"
0024 #include "amdgpu_ras.h"
0025 #include "amdgpu_umc.h"
0026 #include "amdgpu.h"
0027 
0028 #include "rsmu/rsmu_0_0_2_offset.h"
0029 #include "rsmu/rsmu_0_0_2_sh_mask.h"
0030 #include "umc/umc_8_7_0_offset.h"
0031 #include "umc/umc_8_7_0_sh_mask.h"
0032 
0033 #define UMC_8_INST_DIST         0x40000
0034 
0035 const uint32_t
0036     umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM] = {
0037         {2, 11},  {4, 13},
0038         {1, 8},   {7, 14},
0039         {10, 3},  {12, 5},
0040         {9, 0},   {15, 6}
0041 };
0042 
0043 static inline uint32_t get_umc_v8_7_reg_offset(struct amdgpu_device *adev,
0044                         uint32_t umc_inst,
0045                         uint32_t ch_inst)
0046 {
0047     return adev->umc.channel_offs*ch_inst + UMC_8_INST_DIST*umc_inst;
0048 }
0049 
0050 static void umc_v8_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
0051                         uint32_t umc_inst, uint32_t ch_inst,
0052                         unsigned long *error_count)
0053 {
0054     uint64_t mc_umc_status;
0055     uint32_t eccinfo_table_idx;
0056     struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
0057 
0058     eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
0059 
0060     /* check for SRAM correctable error
0061      * MCUMC_STATUS is a 64 bit register
0062      */
0063     mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
0064     if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
0065         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
0066         *error_count += 1;
0067 }
0068 
0069 static void umc_v8_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
0070                             uint32_t umc_inst, uint32_t ch_inst,
0071                             unsigned long *error_count)
0072 {
0073     uint64_t mc_umc_status;
0074     uint32_t eccinfo_table_idx;
0075     struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
0076 
0077     eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
0078 
0079     /* check the MCUMC_STATUS */
0080     mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
0081     if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
0082         (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
0083         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
0084         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
0085         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
0086         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
0087         *error_count += 1;
0088 }
0089 
0090 static void umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
0091                     void *ras_error_status)
0092 {
0093     struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
0094 
0095     uint32_t umc_inst        = 0;
0096     uint32_t ch_inst         = 0;
0097 
0098     /* TODO: driver needs to toggle DF Cstate to ensure
0099      * safe access of UMC registers. Will add the protection
0100      */
0101     LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0102         umc_v8_7_ecc_info_query_correctable_error_count(adev,
0103                             umc_inst, ch_inst,
0104                             &(err_data->ce_count));
0105         umc_v8_7_ecc_info_querry_uncorrectable_error_count(adev,
0106                             umc_inst, ch_inst,
0107                             &(err_data->ue_count));
0108     }
0109 }
0110 
0111 static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
0112                     struct ras_err_data *err_data,
0113                     uint32_t ch_inst,
0114                     uint32_t umc_inst)
0115 {
0116     uint64_t mc_umc_status, err_addr, retired_page;
0117     uint32_t channel_index;
0118     uint32_t eccinfo_table_idx;
0119     struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
0120 
0121     eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
0122     channel_index =
0123         adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
0124 
0125     mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
0126 
0127     if (mc_umc_status == 0)
0128         return;
0129 
0130     if (!err_data->err_addr)
0131         return;
0132 
0133     /* calculate error address if ue/ce error is detected */
0134     if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
0135         (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
0136         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
0137 
0138         err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
0139         err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
0140 
0141         /* translate umc channel address to soc pa, 3 parts are included */
0142         retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
0143                 ADDR_OF_256B_BLOCK(channel_index) |
0144                 OFFSET_IN_256B_BLOCK(err_addr);
0145 
0146         /* we only save ue error information currently, ce is skipped */
0147         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
0148                 == 1)
0149             amdgpu_umc_fill_error_record(err_data, err_addr,
0150                     retired_page, channel_index, umc_inst);
0151     }
0152 }
0153 
0154 static void umc_v8_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
0155                     void *ras_error_status)
0156 {
0157     struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
0158 
0159     uint32_t umc_inst        = 0;
0160     uint32_t ch_inst         = 0;
0161 
0162     /* TODO: driver needs to toggle DF Cstate to ensure
0163      * safe access of UMC resgisters. Will add the protection
0164      * when firmware interface is ready
0165      */
0166     LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0167         umc_v8_7_ecc_info_query_error_address(adev,
0168                         err_data,
0169                         ch_inst,
0170                         umc_inst);
0171     }
0172 }
0173 
0174 static void umc_v8_7_clear_error_count_per_channel(struct amdgpu_device *adev,
0175                     uint32_t umc_reg_offset)
0176 {
0177     uint32_t ecc_err_cnt_addr;
0178     uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
0179 
0180     ecc_err_cnt_sel_addr =
0181         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCntSel);
0182     ecc_err_cnt_addr =
0183         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCnt);
0184 
0185     /* select the lower chip */
0186     ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
0187                     umc_reg_offset) * 4);
0188     ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
0189                     UMCCH0_0_GeccErrCntSel,
0190                     GeccErrCntCsSel, 0);
0191     WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
0192             ecc_err_cnt_sel);
0193 
0194     /* clear lower chip error count */
0195     WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
0196             UMC_V8_7_CE_CNT_INIT);
0197 
0198     /* select the higher chip */
0199     ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
0200                     umc_reg_offset) * 4);
0201     ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
0202                     UMCCH0_0_GeccErrCntSel,
0203                     GeccErrCntCsSel, 1);
0204     WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
0205             ecc_err_cnt_sel);
0206 
0207     /* clear higher chip error count */
0208     WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
0209             UMC_V8_7_CE_CNT_INIT);
0210 }
0211 
0212 static void umc_v8_7_clear_error_count(struct amdgpu_device *adev)
0213 {
0214     uint32_t umc_inst        = 0;
0215     uint32_t ch_inst         = 0;
0216     uint32_t umc_reg_offset  = 0;
0217 
0218     LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0219         umc_reg_offset = get_umc_v8_7_reg_offset(adev,
0220                         umc_inst,
0221                         ch_inst);
0222 
0223         umc_v8_7_clear_error_count_per_channel(adev,
0224                         umc_reg_offset);
0225     }
0226 }
0227 
0228 static void umc_v8_7_query_correctable_error_count(struct amdgpu_device *adev,
0229                            uint32_t umc_reg_offset,
0230                            unsigned long *error_count)
0231 {
0232     uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
0233     uint32_t ecc_err_cnt, ecc_err_cnt_addr;
0234     uint64_t mc_umc_status;
0235     uint32_t mc_umc_status_addr;
0236 
0237     /* UMC 8_7_2 registers */
0238     ecc_err_cnt_sel_addr =
0239         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCntSel);
0240     ecc_err_cnt_addr =
0241         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCnt);
0242     mc_umc_status_addr =
0243         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
0244 
0245     /* select the lower chip and check the error count */
0246     ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
0247     ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
0248                     GeccErrCntCsSel, 0);
0249     WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
0250 
0251     ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
0252     *error_count +=
0253         (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) -
0254          UMC_V8_7_CE_CNT_INIT);
0255 
0256     /* select the higher chip and check the err counter */
0257     ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
0258                     GeccErrCntCsSel, 1);
0259     WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
0260 
0261     ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
0262     *error_count +=
0263         (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) -
0264          UMC_V8_7_CE_CNT_INIT);
0265 
0266     /* check for SRAM correctable error
0267       MCUMC_STATUS is a 64 bit register */
0268     mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
0269     if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
0270         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
0271         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
0272         *error_count += 1;
0273 }
0274 
0275 static void umc_v8_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
0276                               uint32_t umc_reg_offset,
0277                               unsigned long *error_count)
0278 {
0279     uint64_t mc_umc_status;
0280     uint32_t mc_umc_status_addr;
0281 
0282     mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
0283 
0284     /* check the MCUMC_STATUS */
0285     mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
0286     if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
0287         (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
0288         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
0289         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
0290         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
0291         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
0292         *error_count += 1;
0293 }
0294 
0295 static void umc_v8_7_query_ras_error_count(struct amdgpu_device *adev,
0296                        void *ras_error_status)
0297 {
0298     struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
0299 
0300     uint32_t umc_inst        = 0;
0301     uint32_t ch_inst         = 0;
0302     uint32_t umc_reg_offset  = 0;
0303 
0304     LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0305         umc_reg_offset = get_umc_v8_7_reg_offset(adev,
0306                               umc_inst,
0307                               ch_inst);
0308 
0309         umc_v8_7_query_correctable_error_count(adev,
0310                                umc_reg_offset,
0311                                &(err_data->ce_count));
0312         umc_v8_7_querry_uncorrectable_error_count(adev,
0313                               umc_reg_offset,
0314                               &(err_data->ue_count));
0315     }
0316 
0317     umc_v8_7_clear_error_count(adev);
0318 }
0319 
0320 static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
0321                      struct ras_err_data *err_data,
0322                      uint32_t umc_reg_offset,
0323                      uint32_t ch_inst,
0324                      uint32_t umc_inst)
0325 {
0326     uint32_t lsb, mc_umc_status_addr;
0327     uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
0328     uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
0329 
0330     mc_umc_status_addr =
0331         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
0332     mc_umc_addrt0 =
0333         SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
0334 
0335     mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
0336 
0337     if (mc_umc_status == 0)
0338         return;
0339 
0340     if (!err_data->err_addr) {
0341         /* clear umc status */
0342         WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
0343         return;
0344     }
0345 
0346     /* calculate error address if ue/ce error is detected */
0347     if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
0348         (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
0349         REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
0350 
0351         err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
0352         /* the lowest lsb bits should be ignored */
0353         lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
0354         err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
0355         err_addr &= ~((0x1ULL << lsb) - 1);
0356 
0357         /* translate umc channel address to soc pa, 3 parts are included */
0358         retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
0359                 ADDR_OF_256B_BLOCK(channel_index) |
0360                 OFFSET_IN_256B_BLOCK(err_addr);
0361 
0362         /* we only save ue error information currently, ce is skipped */
0363         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
0364                 == 1)
0365             amdgpu_umc_fill_error_record(err_data, err_addr,
0366                     retired_page, channel_index, umc_inst);
0367     }
0368 
0369     /* clear umc status */
0370     WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
0371 }
0372 
0373 static void umc_v8_7_query_ras_error_address(struct amdgpu_device *adev,
0374                          void *ras_error_status)
0375 {
0376     struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
0377 
0378     uint32_t umc_inst        = 0;
0379     uint32_t ch_inst         = 0;
0380     uint32_t umc_reg_offset  = 0;
0381 
0382     LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0383         umc_reg_offset = get_umc_v8_7_reg_offset(adev,
0384                               umc_inst,
0385                               ch_inst);
0386 
0387         umc_v8_7_query_error_address(adev,
0388                          err_data,
0389                          umc_reg_offset,
0390                          ch_inst,
0391                          umc_inst);
0392     }
0393 }
0394 
0395 static void umc_v8_7_err_cnt_init_per_channel(struct amdgpu_device *adev,
0396                           uint32_t umc_reg_offset)
0397 {
0398     uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
0399     uint32_t ecc_err_cnt_addr;
0400 
0401     ecc_err_cnt_sel_addr =
0402         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCntSel);
0403     ecc_err_cnt_addr =
0404         SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_GeccErrCnt);
0405 
0406     /* select the lower chip and check the error count */
0407     ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
0408     ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
0409                     GeccErrCntCsSel, 0);
0410     /* set ce error interrupt type to APIC based interrupt */
0411     ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
0412                     GeccErrInt, 0x1);
0413     WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
0414     /* set error count to initial value */
0415     WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_7_CE_CNT_INIT);
0416 
0417     /* select the higher chip and check the err counter */
0418     ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
0419                     GeccErrCntCsSel, 1);
0420     WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
0421     WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_7_CE_CNT_INIT);
0422 }
0423 
0424 static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev)
0425 {
0426     uint32_t umc_inst        = 0;
0427     uint32_t ch_inst         = 0;
0428     uint32_t umc_reg_offset  = 0;
0429 
0430     LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0431         umc_reg_offset = get_umc_v8_7_reg_offset(adev,
0432                               umc_inst,
0433                               ch_inst);
0434 
0435         umc_v8_7_err_cnt_init_per_channel(adev, umc_reg_offset);
0436     }
0437 }
0438 
0439 const struct amdgpu_ras_block_hw_ops umc_v8_7_ras_hw_ops = {
0440     .query_ras_error_count = umc_v8_7_query_ras_error_count,
0441     .query_ras_error_address = umc_v8_7_query_ras_error_address,
0442 };
0443 
0444 struct amdgpu_umc_ras umc_v8_7_ras = {
0445     .ras_block = {
0446         .hw_ops = &umc_v8_7_ras_hw_ops,
0447     },
0448     .err_cnt_init = umc_v8_7_err_cnt_init,
0449     .ecc_info_query_ras_error_count = umc_v8_7_ecc_info_query_ras_error_count,
0450     .ecc_info_query_ras_error_address = umc_v8_7_ecc_info_query_ras_error_address,
0451 };