0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 #include "umc_v6_7.h"
0024 #include "amdgpu_ras.h"
0025 #include "amdgpu_umc.h"
0026 #include "amdgpu.h"
0027
0028 #include "umc/umc_6_7_0_offset.h"
0029 #include "umc/umc_6_7_0_sh_mask.h"
0030
0031 const uint32_t
0032 umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = {
0033 {28, 20, 24, 16, 12, 4, 8, 0},
0034 {6, 30, 2, 26, 22, 14, 18, 10},
0035 {19, 11, 15, 7, 3, 27, 31, 23},
0036 {9, 1, 5, 29, 25, 17, 21, 13}
0037 };
0038 const uint32_t
0039 umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = {
0040 {19, 11, 15, 7, 3, 27, 31, 23},
0041 {9, 1, 5, 29, 25, 17, 21, 13},
0042 {28, 20, 24, 16, 12, 4, 8, 0},
0043 {6, 30, 2, 26, 22, 14, 18, 10},
0044 };
0045
0046 static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev,
0047 uint32_t umc_inst,
0048 uint32_t ch_inst)
0049 {
0050 uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst;
0051
0052
0053
0054 umc_inst = index / 4;
0055 ch_inst = index % 4;
0056
0057 return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst;
0058 }
0059
0060 static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev,
0061 uint32_t umc_inst,
0062 uint32_t ch_inst)
0063 {
0064 return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
0065 }
0066
0067 static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
0068 uint64_t mc_umc_status, uint32_t umc_reg_offset)
0069 {
0070 uint32_t mc_umc_addr;
0071 uint64_t reg_value;
0072
0073 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
0074 dev_info(adev->dev, "Deferred error, no user action is needed.\n");
0075
0076 if (mc_umc_status)
0077 dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
0078
0079
0080 mc_umc_addr =
0081 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
0082 reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
0083 if (reg_value)
0084 dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
0085
0086
0087 mc_umc_addr =
0088 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
0089 reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
0090 if (reg_value)
0091 dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
0092
0093
0094 mc_umc_addr =
0095 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
0096 reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
0097 if (reg_value)
0098 dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
0099 }
0100
0101 static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
0102 uint32_t umc_inst, uint32_t ch_inst,
0103 unsigned long *error_count)
0104 {
0105 uint64_t mc_umc_status;
0106 uint32_t eccinfo_table_idx;
0107 uint32_t umc_reg_offset;
0108 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
0109
0110 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
0111 umc_inst, ch_inst);
0112
0113 eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
0114
0115
0116 mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
0117 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
0118 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
0119 *error_count += 1;
0120
0121 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
0122
0123 if (ras->umc_ecc.record_ce_addr_supported) {
0124 uint64_t err_addr, soc_pa;
0125 uint32_t channel_index =
0126 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
0127
0128 err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr;
0129 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
0130
0131 soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
0132 ADDR_OF_256B_BLOCK(channel_index) |
0133 OFFSET_IN_256B_BLOCK(err_addr);
0134
0135
0136 SET_CHANNEL_HASH(channel_index, soc_pa);
0137
0138 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
0139 }
0140 }
0141 }
0142
0143 static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
0144 uint32_t umc_inst, uint32_t ch_inst,
0145 unsigned long *error_count)
0146 {
0147 uint64_t mc_umc_status;
0148 uint32_t eccinfo_table_idx;
0149 uint32_t umc_reg_offset;
0150 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
0151
0152 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
0153 umc_inst, ch_inst);
0154
0155 eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
0156
0157 mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
0158 if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
0159 (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
0160 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
0161 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
0162 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
0163 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
0164 *error_count += 1;
0165
0166 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
0167 }
0168 }
0169
0170 static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
0171 void *ras_error_status)
0172 {
0173 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
0174
0175 uint32_t umc_inst = 0;
0176 uint32_t ch_inst = 0;
0177
0178
0179
0180 LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0181 umc_v6_7_ecc_info_query_correctable_error_count(adev,
0182 umc_inst, ch_inst,
0183 &(err_data->ce_count));
0184 umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev,
0185 umc_inst, ch_inst,
0186 &(err_data->ue_count));
0187 }
0188 }
0189
0190 static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
0191 struct ras_err_data *err_data,
0192 uint32_t ch_inst,
0193 uint32_t umc_inst)
0194 {
0195 uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column;
0196 uint32_t channel_index;
0197 uint32_t eccinfo_table_idx;
0198 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
0199
0200 eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
0201 channel_index =
0202 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
0203
0204 mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
0205
0206 if (mc_umc_status == 0)
0207 return;
0208
0209 if (!err_data->err_addr)
0210 return;
0211
0212
0213 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
0214 (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
0215 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
0216
0217 err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
0218 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
0219
0220
0221 soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
0222 ADDR_OF_256B_BLOCK(channel_index) |
0223 OFFSET_IN_256B_BLOCK(err_addr);
0224
0225
0226 SET_CHANNEL_HASH(channel_index, soc_pa);
0227
0228
0229 soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
0230
0231
0232 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
0233 == 1) {
0234
0235 for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
0236 retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
0237 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
0238 amdgpu_umc_fill_error_record(err_data, err_addr,
0239 retired_page, channel_index, umc_inst);
0240
0241
0242 retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
0243 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
0244 amdgpu_umc_fill_error_record(err_data, err_addr,
0245 retired_page, channel_index, umc_inst);
0246 }
0247 }
0248 }
0249 }
0250
0251 static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
0252 void *ras_error_status)
0253 {
0254 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
0255
0256 uint32_t umc_inst = 0;
0257 uint32_t ch_inst = 0;
0258
0259
0260
0261
0262 LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0263 umc_v6_7_ecc_info_query_error_address(adev,
0264 err_data,
0265 ch_inst,
0266 umc_inst);
0267 }
0268 }
0269
0270 static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
0271 uint32_t umc_reg_offset,
0272 unsigned long *error_count,
0273 uint32_t ch_inst,
0274 uint32_t umc_inst)
0275 {
0276 uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
0277 uint32_t ecc_err_cnt, ecc_err_cnt_addr;
0278 uint64_t mc_umc_status;
0279 uint32_t mc_umc_status_addr;
0280
0281
0282 ecc_err_cnt_sel_addr =
0283 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCntSel);
0284 ecc_err_cnt_addr =
0285 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCnt);
0286 mc_umc_status_addr =
0287 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
0288
0289
0290 ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
0291 ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
0292 EccErrCntCsSel, 0);
0293 WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
0294
0295 ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
0296 *error_count +=
0297 (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
0298 UMC_V6_7_CE_CNT_INIT);
0299
0300
0301 ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
0302 EccErrCntCsSel, 1);
0303 WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
0304
0305 ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
0306 *error_count +=
0307 (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
0308 UMC_V6_7_CE_CNT_INIT);
0309
0310
0311
0312 mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
0313 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
0314 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
0315 *error_count += 1;
0316
0317 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
0318
0319 {
0320 uint64_t err_addr, soc_pa;
0321 uint32_t mc_umc_addrt0;
0322 uint32_t channel_index;
0323
0324 mc_umc_addrt0 =
0325 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
0326
0327 channel_index =
0328 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
0329
0330 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
0331 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
0332
0333
0334 soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
0335 ADDR_OF_256B_BLOCK(channel_index) |
0336 OFFSET_IN_256B_BLOCK(err_addr);
0337
0338
0339 SET_CHANNEL_HASH(channel_index, soc_pa);
0340
0341 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
0342 }
0343 }
0344 }
0345
0346 static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
0347 uint32_t umc_reg_offset,
0348 unsigned long *error_count)
0349 {
0350 uint64_t mc_umc_status;
0351 uint32_t mc_umc_status_addr;
0352
0353 mc_umc_status_addr =
0354 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
0355
0356
0357 mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
0358 if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
0359 (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
0360 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
0361 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
0362 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
0363 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
0364 *error_count += 1;
0365
0366 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
0367 }
0368 }
0369
0370 static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
0371 uint32_t umc_reg_offset)
0372 {
0373 uint32_t ecc_err_cnt_addr;
0374 uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
0375
0376 ecc_err_cnt_sel_addr =
0377 SOC15_REG_OFFSET(UMC, 0,
0378 regUMCCH0_0_EccErrCntSel);
0379 ecc_err_cnt_addr =
0380 SOC15_REG_OFFSET(UMC, 0,
0381 regUMCCH0_0_EccErrCnt);
0382
0383
0384 ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
0385 umc_reg_offset) * 4);
0386 ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
0387 UMCCH0_0_EccErrCntSel,
0388 EccErrCntCsSel, 0);
0389 WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
0390 ecc_err_cnt_sel);
0391
0392
0393 WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
0394 UMC_V6_7_CE_CNT_INIT);
0395
0396
0397 ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
0398 umc_reg_offset) * 4);
0399 ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
0400 UMCCH0_0_EccErrCntSel,
0401 EccErrCntCsSel, 1);
0402 WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
0403 ecc_err_cnt_sel);
0404
0405
0406 WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
0407 UMC_V6_7_CE_CNT_INIT);
0408 }
0409
0410 static void umc_v6_7_reset_error_count(struct amdgpu_device *adev)
0411 {
0412 uint32_t umc_inst = 0;
0413 uint32_t ch_inst = 0;
0414 uint32_t umc_reg_offset = 0;
0415
0416 LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0417 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
0418 umc_inst,
0419 ch_inst);
0420
0421 umc_v6_7_reset_error_count_per_channel(adev,
0422 umc_reg_offset);
0423 }
0424 }
0425
0426 static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
0427 void *ras_error_status)
0428 {
0429 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
0430
0431 uint32_t umc_inst = 0;
0432 uint32_t ch_inst = 0;
0433 uint32_t umc_reg_offset = 0;
0434
0435
0436
0437 LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0438 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
0439 umc_inst,
0440 ch_inst);
0441 umc_v6_7_query_correctable_error_count(adev,
0442 umc_reg_offset,
0443 &(err_data->ce_count),
0444 ch_inst, umc_inst);
0445 umc_v6_7_querry_uncorrectable_error_count(adev,
0446 umc_reg_offset,
0447 &(err_data->ue_count));
0448 }
0449
0450 umc_v6_7_reset_error_count(adev);
0451 }
0452
0453 static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
0454 struct ras_err_data *err_data,
0455 uint32_t umc_reg_offset,
0456 uint32_t ch_inst,
0457 uint32_t umc_inst)
0458 {
0459 uint32_t mc_umc_status_addr;
0460 uint32_t channel_index;
0461 uint64_t mc_umc_status, mc_umc_addrt0;
0462 uint64_t err_addr, soc_pa, retired_page, column;
0463
0464 mc_umc_status_addr =
0465 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
0466 mc_umc_addrt0 =
0467 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
0468
0469 mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
0470
0471 if (mc_umc_status == 0)
0472 return;
0473
0474 if (!err_data->err_addr) {
0475
0476 WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
0477 return;
0478 }
0479
0480 channel_index =
0481 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
0482
0483
0484 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
0485 (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
0486 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
0487
0488 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
0489 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
0490
0491
0492 soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
0493 ADDR_OF_256B_BLOCK(channel_index) |
0494 OFFSET_IN_256B_BLOCK(err_addr);
0495
0496
0497 SET_CHANNEL_HASH(channel_index, soc_pa);
0498
0499
0500 soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
0501
0502
0503 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
0504 == 1) {
0505
0506 for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
0507 retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
0508 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
0509 amdgpu_umc_fill_error_record(err_data, err_addr,
0510 retired_page, channel_index, umc_inst);
0511
0512
0513 retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
0514 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
0515 amdgpu_umc_fill_error_record(err_data, err_addr,
0516 retired_page, channel_index, umc_inst);
0517 }
0518 }
0519 }
0520
0521
0522 WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
0523 }
0524
0525 static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
0526 void *ras_error_status)
0527 {
0528 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
0529
0530 uint32_t umc_inst = 0;
0531 uint32_t ch_inst = 0;
0532 uint32_t umc_reg_offset = 0;
0533
0534
0535
0536
0537 LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
0538 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
0539 umc_inst,
0540 ch_inst);
0541 umc_v6_7_query_error_address(adev,
0542 err_data,
0543 umc_reg_offset,
0544 ch_inst,
0545 umc_inst);
0546 }
0547 }
0548
0549 static uint32_t umc_v6_7_query_ras_poison_mode_per_channel(
0550 struct amdgpu_device *adev,
0551 uint32_t umc_reg_offset)
0552 {
0553 uint32_t ecc_ctrl_addr, ecc_ctrl;
0554
0555 ecc_ctrl_addr =
0556 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccCtrl);
0557 ecc_ctrl = RREG32_PCIE((ecc_ctrl_addr +
0558 umc_reg_offset) * 4);
0559
0560 return REG_GET_FIELD(ecc_ctrl, UMCCH0_0_EccCtrl, UCFatalEn);
0561 }
0562
0563 static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev)
0564 {
0565 uint32_t umc_reg_offset = 0;
0566
0567
0568
0569
0570 umc_reg_offset = get_umc_v6_7_reg_offset(adev, 0, 0);
0571 return !umc_v6_7_query_ras_poison_mode_per_channel(adev, umc_reg_offset);
0572 }
0573
0574 const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = {
0575 .query_ras_error_count = umc_v6_7_query_ras_error_count,
0576 .query_ras_error_address = umc_v6_7_query_ras_error_address,
0577 };
0578
0579 struct amdgpu_umc_ras umc_v6_7_ras = {
0580 .ras_block = {
0581 .hw_ops = &umc_v6_7_ras_hw_ops,
0582 },
0583 .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
0584 .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
0585 .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
0586 };