0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 #include "amdgpu.h"
0024 #include "sdma/sdma_4_4_0_offset.h"
0025 #include "sdma/sdma_4_4_0_sh_mask.h"
0026 #include "soc15.h"
0027 #include "amdgpu_ras.h"
0028
0029 #define SDMA1_REG_OFFSET 0x600
0030 #define SDMA2_REG_OFFSET 0x1cda0
0031 #define SDMA3_REG_OFFSET 0x1d1a0
0032 #define SDMA4_REG_OFFSET 0x1d5a0
0033
0034
0035
0036 static uint32_t sdma_v4_4_get_reg_offset(struct amdgpu_device *adev,
0037 uint32_t instance,
0038 uint32_t offset)
0039 {
0040 uint32_t sdma_base = adev->reg_offset[SDMA0_HWIP][0][0];
0041
0042 switch (instance) {
0043 case 0:
0044 return (sdma_base + offset);
0045 case 1:
0046 return (sdma_base + SDMA1_REG_OFFSET + offset);
0047 case 2:
0048 return (sdma_base + SDMA2_REG_OFFSET + offset);
0049 case 3:
0050 return (sdma_base + SDMA3_REG_OFFSET + offset);
0051 case 4:
0052 return (sdma_base + SDMA4_REG_OFFSET + offset);
0053 default:
0054 break;
0055 }
0056 return 0;
0057 }
0058
0059 static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = {
0060 { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0061 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
0062 0, 0,
0063 },
0064 { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0065 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
0066 0, 0,
0067 },
0068 { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0069 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
0070 0, 0,
0071 },
0072 { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0073 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
0074 0, 0,
0075 },
0076 { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0077 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
0078 0, 0,
0079 },
0080 { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0081 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
0082 0, 0,
0083 },
0084 { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0085 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
0086 0, 0,
0087 },
0088 { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0089 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
0090 0, 0,
0091 },
0092 { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0093 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
0094 0, 0,
0095 },
0096 { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0097 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
0098 0, 0,
0099 },
0100 { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0101 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
0102 0, 0,
0103 },
0104 { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0105 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
0106 0, 0,
0107 },
0108 { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0109 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
0110 0, 0,
0111 },
0112 { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0113 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
0114 0, 0,
0115 },
0116 { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0117 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
0118 0, 0,
0119 },
0120 { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
0121 SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
0122 0, 0,
0123 },
0124 { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
0125 SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UCODE_BUF_SED),
0126 0, 0,
0127 },
0128 { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
0129 SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_RB_CMD_BUF_SED),
0130 0, 0,
0131 },
0132 { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
0133 SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_IB_CMD_BUF_SED),
0134 0, 0,
0135 },
0136 { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
0137 SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RD_FIFO_SED),
0138 0, 0,
0139 },
0140 { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
0141 SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RDBST_FIFO_SED),
0142 0, 0,
0143 },
0144 { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
0145 SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_DATA_LUT_FIFO_SED),
0146 0, 0,
0147 },
0148 { "SDMA_SPLIT_DATA_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
0149 SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_SPLIT_DATA_BUF_SED),
0150 0, 0,
0151 },
0152 { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
0153 SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
0154 0, 0,
0155 },
0156 { "SDMA_MC_RDRET_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
0157 SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
0158 0, 0,
0159 },
0160 };
0161
0162 static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
0163 uint32_t reg_offset,
0164 uint32_t value,
0165 uint32_t instance,
0166 uint32_t *sec_count)
0167 {
0168 uint32_t i;
0169 uint32_t sec_cnt;
0170
0171
0172 for (i = 0; i < ARRAY_SIZE(sdma_v4_4_ras_fields); i++) {
0173 if (sdma_v4_4_ras_fields[i].reg_offset != reg_offset)
0174 continue;
0175
0176
0177
0178
0179 sec_cnt = (value &
0180 sdma_v4_4_ras_fields[i].sec_count_mask) >>
0181 sdma_v4_4_ras_fields[i].sec_count_shift;
0182 if (sec_cnt) {
0183 dev_info(adev->dev, "Detected %s in SDMA%d, SED %d\n",
0184 sdma_v4_4_ras_fields[i].name,
0185 instance, sec_cnt);
0186 *sec_count += sec_cnt;
0187 }
0188 }
0189 }
0190
0191 static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
0192 uint32_t instance,
0193 void *ras_error_status)
0194 {
0195 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
0196 uint32_t sec_count = 0;
0197 uint32_t reg_value = 0;
0198 uint32_t reg_offset = 0;
0199
0200 reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER);
0201 reg_value = RREG32(reg_offset);
0202
0203 if (reg_value)
0204 sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER, reg_value,
0205 instance, &sec_count);
0206
0207 reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER2);
0208 reg_value = RREG32(reg_offset);
0209
0210 if (reg_value)
0211 sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER2, reg_value,
0212 instance, &sec_count);
0213
0214
0215
0216
0217
0218
0219
0220
0221 err_data->ue_count += sec_count;
0222
0223
0224
0225
0226
0227 err_data->ce_count = 0;
0228
0229 return 0;
0230 };
0231
0232 static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
0233 {
0234 int i;
0235 uint32_t reg_offset;
0236
0237
0238 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
0239 for (i = 0; i < adev->sdma.num_instances; i++) {
0240 reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER);
0241 WREG32(reg_offset, 0);
0242 reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER2);
0243 WREG32(reg_offset, 0);
0244 }
0245 }
0246 }
0247
0248 static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
0249 {
0250 int i = 0;
0251
0252 for (i = 0; i < adev->sdma.num_instances; i++) {
0253 if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
0254 dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
0255 return;
0256 }
0257 }
0258
0259 }
0260
0261 const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = {
0262 .query_ras_error_count = sdma_v4_4_query_ras_error_count,
0263 .reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
0264 };
0265
0266 struct amdgpu_sdma_ras sdma_v4_4_ras = {
0267 .ras_block = {
0268 .hw_ops = &sdma_v4_4_ras_hw_ops,
0269 },
0270 };