0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include <linux/ctype.h>
0011 #include <linux/edac.h>
0012 #include <linux/interrupt.h>
0013 #include <linux/mfd/syscon.h>
0014 #include <linux/module.h>
0015 #include <linux/of.h>
0016 #include <linux/of_address.h>
0017 #include <linux/regmap.h>
0018
0019 #include "edac_module.h"
0020
0021 #define EDAC_MOD_STR "xgene_edac"
0022
0023
0024 #define PCPHPERRINTSTS 0x0000
0025 #define PCPHPERRINTMSK 0x0004
0026 #define MCU_CTL_ERR_MASK BIT(12)
0027 #define IOB_PA_ERR_MASK BIT(11)
0028 #define IOB_BA_ERR_MASK BIT(10)
0029 #define IOB_XGIC_ERR_MASK BIT(9)
0030 #define IOB_RB_ERR_MASK BIT(8)
0031 #define L3C_UNCORR_ERR_MASK BIT(5)
0032 #define MCU_UNCORR_ERR_MASK BIT(4)
0033 #define PMD3_MERR_MASK BIT(3)
0034 #define PMD2_MERR_MASK BIT(2)
0035 #define PMD1_MERR_MASK BIT(1)
0036 #define PMD0_MERR_MASK BIT(0)
0037 #define PCPLPERRINTSTS 0x0008
0038 #define PCPLPERRINTMSK 0x000C
0039 #define CSW_SWITCH_TRACE_ERR_MASK BIT(2)
0040 #define L3C_CORR_ERR_MASK BIT(1)
0041 #define MCU_CORR_ERR_MASK BIT(0)
0042 #define MEMERRINTSTS 0x0010
0043 #define MEMERRINTMSK 0x0014
0044
0045 struct xgene_edac {
0046 struct device *dev;
0047 struct regmap *csw_map;
0048 struct regmap *mcba_map;
0049 struct regmap *mcbb_map;
0050 struct regmap *efuse_map;
0051 struct regmap *rb_map;
0052 void __iomem *pcp_csr;
0053 spinlock_t lock;
0054 struct dentry *dfs;
0055
0056 struct list_head mcus;
0057 struct list_head pmds;
0058 struct list_head l3s;
0059 struct list_head socs;
0060
0061 struct mutex mc_lock;
0062 int mc_active_mask;
0063 int mc_registered_mask;
0064 };
0065
0066 static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
0067 {
0068 *val = readl(edac->pcp_csr + reg);
0069 }
0070
0071 static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
0072 u32 bits_mask)
0073 {
0074 u32 val;
0075
0076 spin_lock(&edac->lock);
0077 val = readl(edac->pcp_csr + reg);
0078 val &= ~bits_mask;
0079 writel(val, edac->pcp_csr + reg);
0080 spin_unlock(&edac->lock);
0081 }
0082
0083 static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
0084 u32 bits_mask)
0085 {
0086 u32 val;
0087
0088 spin_lock(&edac->lock);
0089 val = readl(edac->pcp_csr + reg);
0090 val |= bits_mask;
0091 writel(val, edac->pcp_csr + reg);
0092 spin_unlock(&edac->lock);
0093 }
0094
0095
0096 #define MCU_MAX_RANK 8
0097 #define MCU_RANK_STRIDE 0x40
0098
0099 #define MCUGECR 0x0110
0100 #define MCU_GECR_DEMANDUCINTREN_MASK BIT(0)
0101 #define MCU_GECR_BACKUCINTREN_MASK BIT(1)
0102 #define MCU_GECR_CINTREN_MASK BIT(2)
0103 #define MUC_GECR_MCUADDRERREN_MASK BIT(9)
0104 #define MCUGESR 0x0114
0105 #define MCU_GESR_ADDRNOMATCH_ERR_MASK BIT(7)
0106 #define MCU_GESR_ADDRMULTIMATCH_ERR_MASK BIT(6)
0107 #define MCU_GESR_PHYP_ERR_MASK BIT(3)
0108 #define MCUESRR0 0x0314
0109 #define MCU_ESRR_MULTUCERR_MASK BIT(3)
0110 #define MCU_ESRR_BACKUCERR_MASK BIT(2)
0111 #define MCU_ESRR_DEMANDUCERR_MASK BIT(1)
0112 #define MCU_ESRR_CERR_MASK BIT(0)
0113 #define MCUESRRA0 0x0318
0114 #define MCUEBLRR0 0x031c
0115 #define MCU_EBLRR_ERRBANK_RD(src) (((src) & 0x00000007) >> 0)
0116 #define MCUERCRR0 0x0320
0117 #define MCU_ERCRR_ERRROW_RD(src) (((src) & 0xFFFF0000) >> 16)
0118 #define MCU_ERCRR_ERRCOL_RD(src) ((src) & 0x00000FFF)
0119 #define MCUSBECNT0 0x0324
0120 #define MCU_SBECNT_COUNT(src) ((src) & 0xFFFF)
0121
0122 #define CSW_CSWCR 0x0000
0123 #define CSW_CSWCR_DUALMCB_MASK BIT(0)
0124
0125 #define MCBADDRMR 0x0000
0126 #define MCBADDRMR_MCU_INTLV_MODE_MASK BIT(3)
0127 #define MCBADDRMR_DUALMCU_MODE_MASK BIT(2)
0128 #define MCBADDRMR_MCB_INTLV_MODE_MASK BIT(1)
0129 #define MCBADDRMR_ADDRESS_MODE_MASK BIT(0)
0130
0131 struct xgene_edac_mc_ctx {
0132 struct list_head next;
0133 char *name;
0134 struct mem_ctl_info *mci;
0135 struct xgene_edac *edac;
0136 void __iomem *mcu_csr;
0137 u32 mcu_id;
0138 };
0139
0140 static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
0141 const char __user *data,
0142 size_t count, loff_t *ppos)
0143 {
0144 struct mem_ctl_info *mci = file->private_data;
0145 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
0146 int i;
0147
0148 for (i = 0; i < MCU_MAX_RANK; i++) {
0149 writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
0150 MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
0151 ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
0152 }
0153 return count;
0154 }
0155
0156 static const struct file_operations xgene_edac_mc_debug_inject_fops = {
0157 .open = simple_open,
0158 .write = xgene_edac_mc_err_inject_write,
0159 .llseek = generic_file_llseek,
0160 };
0161
0162 static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
0163 {
0164 if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
0165 return;
0166
0167 if (!mci->debugfs)
0168 return;
0169
0170 edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
0171 &xgene_edac_mc_debug_inject_fops);
0172 }
0173
0174 static void xgene_edac_mc_check(struct mem_ctl_info *mci)
0175 {
0176 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
0177 unsigned int pcp_hp_stat;
0178 unsigned int pcp_lp_stat;
0179 u32 reg;
0180 u32 rank;
0181 u32 bank;
0182 u32 count;
0183 u32 col_row;
0184
0185 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
0186 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
0187 if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
0188 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
0189 (MCU_CORR_ERR_MASK & pcp_lp_stat)))
0190 return;
0191
0192 for (rank = 0; rank < MCU_MAX_RANK; rank++) {
0193 reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
0194
0195
0196 if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
0197 MCU_ESRR_BACKUCERR_MASK)) {
0198
0199 edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
0200 "MCU uncorrectable error at rank %d\n", rank);
0201
0202 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
0203 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
0204 }
0205
0206
0207 if (reg & MCU_ESRR_CERR_MASK) {
0208 bank = readl(ctx->mcu_csr + MCUEBLRR0 +
0209 rank * MCU_RANK_STRIDE);
0210 col_row = readl(ctx->mcu_csr + MCUERCRR0 +
0211 rank * MCU_RANK_STRIDE);
0212 count = readl(ctx->mcu_csr + MCUSBECNT0 +
0213 rank * MCU_RANK_STRIDE);
0214 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
0215 "MCU correctable error at rank %d bank %d column %d row %d count %d\n",
0216 rank, MCU_EBLRR_ERRBANK_RD(bank),
0217 MCU_ERCRR_ERRCOL_RD(col_row),
0218 MCU_ERCRR_ERRROW_RD(col_row),
0219 MCU_SBECNT_COUNT(count));
0220
0221 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
0222 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
0223 }
0224
0225
0226 writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
0227 writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
0228 writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
0229 rank * MCU_RANK_STRIDE);
0230 writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
0231 }
0232
0233
0234 reg = readl(ctx->mcu_csr + MCUGESR);
0235 if (reg) {
0236 if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
0237 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
0238 "MCU address miss-match error\n");
0239 if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
0240 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
0241 "MCU address multi-match error\n");
0242
0243 writel(reg, ctx->mcu_csr + MCUGESR);
0244 }
0245 }
0246
0247 static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
0248 {
0249 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
0250 unsigned int val;
0251
0252 if (edac_op_state != EDAC_OPSTATE_INT)
0253 return;
0254
0255 mutex_lock(&ctx->edac->mc_lock);
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265 if (enable) {
0266
0267 ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
0268
0269
0270 if (ctx->edac->mc_registered_mask ==
0271 ctx->edac->mc_active_mask) {
0272
0273 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
0274 MCU_UNCORR_ERR_MASK |
0275 MCU_CTL_ERR_MASK);
0276 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
0277 MCU_CORR_ERR_MASK);
0278 }
0279
0280
0281 val = readl(ctx->mcu_csr + MCUGECR);
0282 val |= MCU_GECR_DEMANDUCINTREN_MASK |
0283 MCU_GECR_BACKUCINTREN_MASK |
0284 MCU_GECR_CINTREN_MASK |
0285 MUC_GECR_MCUADDRERREN_MASK;
0286 writel(val, ctx->mcu_csr + MCUGECR);
0287 } else {
0288
0289 val = readl(ctx->mcu_csr + MCUGECR);
0290 val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
0291 MCU_GECR_BACKUCINTREN_MASK |
0292 MCU_GECR_CINTREN_MASK |
0293 MUC_GECR_MCUADDRERREN_MASK);
0294 writel(val, ctx->mcu_csr + MCUGECR);
0295
0296
0297 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
0298 MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
0299 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
0300 MCU_CORR_ERR_MASK);
0301
0302
0303 ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
0304 }
0305
0306 mutex_unlock(&ctx->edac->mc_lock);
0307 }
0308
0309 static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
0310 {
0311 unsigned int reg;
0312 u32 mcu_mask;
0313
0314 if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, ®))
0315 return 0;
0316
0317 if (reg & CSW_CSWCR_DUALMCB_MASK) {
0318
0319
0320
0321
0322 if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, ®))
0323 return 0;
0324 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
0325 } else {
0326
0327
0328
0329
0330 if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, ®))
0331 return 0;
0332 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
0333 }
0334
0335
0336 if (!ctx->edac->mc_active_mask)
0337 ctx->edac->mc_active_mask = mcu_mask;
0338
0339 return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
0340 }
0341
0342 static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
0343 {
0344 struct mem_ctl_info *mci;
0345 struct edac_mc_layer layers[2];
0346 struct xgene_edac_mc_ctx tmp_ctx;
0347 struct xgene_edac_mc_ctx *ctx;
0348 struct resource res;
0349 int rc;
0350
0351 memset(&tmp_ctx, 0, sizeof(tmp_ctx));
0352 tmp_ctx.edac = edac;
0353
0354 if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
0355 return -ENOMEM;
0356
0357 rc = of_address_to_resource(np, 0, &res);
0358 if (rc < 0) {
0359 dev_err(edac->dev, "no MCU resource address\n");
0360 goto err_group;
0361 }
0362 tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
0363 if (IS_ERR(tmp_ctx.mcu_csr)) {
0364 dev_err(edac->dev, "unable to map MCU resource\n");
0365 rc = PTR_ERR(tmp_ctx.mcu_csr);
0366 goto err_group;
0367 }
0368
0369
0370 if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
0371 dev_err(edac->dev, "no memory-controller property\n");
0372 rc = -ENODEV;
0373 goto err_group;
0374 }
0375 if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
0376 rc = -ENODEV;
0377 goto err_group;
0378 }
0379
0380 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
0381 layers[0].size = 4;
0382 layers[0].is_virt_csrow = true;
0383 layers[1].type = EDAC_MC_LAYER_CHANNEL;
0384 layers[1].size = 2;
0385 layers[1].is_virt_csrow = false;
0386 mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
0387 sizeof(*ctx));
0388 if (!mci) {
0389 rc = -ENOMEM;
0390 goto err_group;
0391 }
0392
0393 ctx = mci->pvt_info;
0394 *ctx = tmp_ctx;
0395 ctx->name = "xgene_edac_mc_err";
0396 ctx->mci = mci;
0397 mci->pdev = &mci->dev;
0398 mci->ctl_name = ctx->name;
0399 mci->dev_name = ctx->name;
0400
0401 mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
0402 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
0403 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
0404 mci->edac_cap = EDAC_FLAG_SECDED;
0405 mci->mod_name = EDAC_MOD_STR;
0406 mci->ctl_page_to_phys = NULL;
0407 mci->scrub_cap = SCRUB_FLAG_HW_SRC;
0408 mci->scrub_mode = SCRUB_HW_SRC;
0409
0410 if (edac_op_state == EDAC_OPSTATE_POLL)
0411 mci->edac_check = xgene_edac_mc_check;
0412
0413 if (edac_mc_add_mc(mci)) {
0414 dev_err(edac->dev, "edac_mc_add_mc failed\n");
0415 rc = -EINVAL;
0416 goto err_free;
0417 }
0418
0419 xgene_edac_mc_create_debugfs_node(mci);
0420
0421 list_add(&ctx->next, &edac->mcus);
0422
0423 xgene_edac_mc_irq_ctl(mci, true);
0424
0425 devres_remove_group(edac->dev, xgene_edac_mc_add);
0426
0427 dev_info(edac->dev, "X-Gene EDAC MC registered\n");
0428 return 0;
0429
0430 err_free:
0431 edac_mc_free(mci);
0432 err_group:
0433 devres_release_group(edac->dev, xgene_edac_mc_add);
0434 return rc;
0435 }
0436
0437 static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
0438 {
0439 xgene_edac_mc_irq_ctl(mcu->mci, false);
0440 edac_mc_del_mc(&mcu->mci->dev);
0441 edac_mc_free(mcu->mci);
0442 return 0;
0443 }
0444
0445
0446 #define MAX_CPU_PER_PMD 2
0447 #define CPU_CSR_STRIDE 0x00100000
0448 #define CPU_L2C_PAGE 0x000D0000
0449 #define CPU_MEMERR_L2C_PAGE 0x000E0000
0450 #define CPU_MEMERR_CPU_PAGE 0x000F0000
0451
0452 #define MEMERR_CPU_ICFECR_PAGE_OFFSET 0x0000
0453 #define MEMERR_CPU_ICFESR_PAGE_OFFSET 0x0004
0454 #define MEMERR_CPU_ICFESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
0455 #define MEMERR_CPU_ICFESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
0456 #define MEMERR_CPU_ICFESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
0457 #define MEMERR_CPU_ICFESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
0458 #define MEMERR_CPU_ICFESR_MULTCERR_MASK BIT(2)
0459 #define MEMERR_CPU_ICFESR_CERR_MASK BIT(0)
0460 #define MEMERR_CPU_LSUESR_PAGE_OFFSET 0x000c
0461 #define MEMERR_CPU_LSUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
0462 #define MEMERR_CPU_LSUESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
0463 #define MEMERR_CPU_LSUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
0464 #define MEMERR_CPU_LSUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
0465 #define MEMERR_CPU_LSUESR_MULTCERR_MASK BIT(2)
0466 #define MEMERR_CPU_LSUESR_CERR_MASK BIT(0)
0467 #define MEMERR_CPU_LSUECR_PAGE_OFFSET 0x0008
0468 #define MEMERR_CPU_MMUECR_PAGE_OFFSET 0x0010
0469 #define MEMERR_CPU_MMUESR_PAGE_OFFSET 0x0014
0470 #define MEMERR_CPU_MMUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
0471 #define MEMERR_CPU_MMUESR_ERRINDEX_RD(src) (((src) & 0x007F0000) >> 16)
0472 #define MEMERR_CPU_MMUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
0473 #define MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK BIT(7)
0474 #define MEMERR_CPU_MMUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
0475 #define MEMERR_CPU_MMUESR_MULTCERR_MASK BIT(2)
0476 #define MEMERR_CPU_MMUESR_CERR_MASK BIT(0)
0477 #define MEMERR_CPU_ICFESRA_PAGE_OFFSET 0x0804
0478 #define MEMERR_CPU_LSUESRA_PAGE_OFFSET 0x080c
0479 #define MEMERR_CPU_MMUESRA_PAGE_OFFSET 0x0814
0480
0481 #define MEMERR_L2C_L2ECR_PAGE_OFFSET 0x0000
0482 #define MEMERR_L2C_L2ESR_PAGE_OFFSET 0x0004
0483 #define MEMERR_L2C_L2ESR_ERRSYN_RD(src) (((src) & 0xFF000000) >> 24)
0484 #define MEMERR_L2C_L2ESR_ERRWAY_RD(src) (((src) & 0x00FC0000) >> 18)
0485 #define MEMERR_L2C_L2ESR_ERRCPU_RD(src) (((src) & 0x00020000) >> 17)
0486 #define MEMERR_L2C_L2ESR_ERRGROUP_RD(src) (((src) & 0x0000E000) >> 13)
0487 #define MEMERR_L2C_L2ESR_ERRACTION_RD(src) (((src) & 0x00001C00) >> 10)
0488 #define MEMERR_L2C_L2ESR_ERRTYPE_RD(src) (((src) & 0x00000300) >> 8)
0489 #define MEMERR_L2C_L2ESR_MULTUCERR_MASK BIT(3)
0490 #define MEMERR_L2C_L2ESR_MULTICERR_MASK BIT(2)
0491 #define MEMERR_L2C_L2ESR_UCERR_MASK BIT(1)
0492 #define MEMERR_L2C_L2ESR_ERR_MASK BIT(0)
0493 #define MEMERR_L2C_L2EALR_PAGE_OFFSET 0x0008
0494 #define CPUX_L2C_L2RTOCR_PAGE_OFFSET 0x0010
0495 #define MEMERR_L2C_L2EAHR_PAGE_OFFSET 0x000c
0496 #define CPUX_L2C_L2RTOSR_PAGE_OFFSET 0x0014
0497 #define MEMERR_L2C_L2RTOSR_MULTERR_MASK BIT(1)
0498 #define MEMERR_L2C_L2RTOSR_ERR_MASK BIT(0)
0499 #define CPUX_L2C_L2RTOALR_PAGE_OFFSET 0x0018
0500 #define CPUX_L2C_L2RTOAHR_PAGE_OFFSET 0x001c
0501 #define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804
0502
0503
0504
0505
0506
0507
0508 struct xgene_edac_pmd_ctx {
0509 struct list_head next;
0510 struct device ddev;
0511 char *name;
0512 struct xgene_edac *edac;
0513 struct edac_device_ctl_info *edac_dev;
0514 void __iomem *pmd_csr;
0515 u32 pmd;
0516 int version;
0517 };
0518
0519 static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
0520 int cpu_idx)
0521 {
0522 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
0523 void __iomem *pg_f;
0524 u32 val;
0525
0526 pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
0527
0528 val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
0529 if (!val)
0530 goto chk_lsu;
0531 dev_err(edac_dev->dev,
0532 "CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
0533 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
0534 MEMERR_CPU_ICFESR_ERRWAY_RD(val),
0535 MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
0536 MEMERR_CPU_ICFESR_ERRINFO_RD(val));
0537 if (val & MEMERR_CPU_ICFESR_CERR_MASK)
0538 dev_err(edac_dev->dev, "One or more correctable error\n");
0539 if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
0540 dev_err(edac_dev->dev, "Multiple correctable error\n");
0541 switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
0542 case 1:
0543 dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
0544 break;
0545 case 2:
0546 dev_err(edac_dev->dev, "Way select multiple hit\n");
0547 break;
0548 case 3:
0549 dev_err(edac_dev->dev, "Physical tag parity error\n");
0550 break;
0551 case 4:
0552 case 5:
0553 dev_err(edac_dev->dev, "L1 data parity error\n");
0554 break;
0555 case 6:
0556 dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
0557 break;
0558 }
0559
0560
0561 writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
0562
0563 if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
0564 MEMERR_CPU_ICFESR_MULTCERR_MASK))
0565 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
0566
0567 chk_lsu:
0568 val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
0569 if (!val)
0570 goto chk_mmu;
0571 dev_err(edac_dev->dev,
0572 "CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
0573 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
0574 MEMERR_CPU_LSUESR_ERRWAY_RD(val),
0575 MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
0576 MEMERR_CPU_LSUESR_ERRINFO_RD(val));
0577 if (val & MEMERR_CPU_LSUESR_CERR_MASK)
0578 dev_err(edac_dev->dev, "One or more correctable error\n");
0579 if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
0580 dev_err(edac_dev->dev, "Multiple correctable error\n");
0581 switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
0582 case 0:
0583 dev_err(edac_dev->dev, "Load tag error\n");
0584 break;
0585 case 1:
0586 dev_err(edac_dev->dev, "Load data error\n");
0587 break;
0588 case 2:
0589 dev_err(edac_dev->dev, "WSL multihit error\n");
0590 break;
0591 case 3:
0592 dev_err(edac_dev->dev, "Store tag error\n");
0593 break;
0594 case 4:
0595 dev_err(edac_dev->dev,
0596 "DTB multihit from load pipeline error\n");
0597 break;
0598 case 5:
0599 dev_err(edac_dev->dev,
0600 "DTB multihit from store pipeline error\n");
0601 break;
0602 }
0603
0604
0605 writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
0606
0607 if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
0608 MEMERR_CPU_LSUESR_MULTCERR_MASK))
0609 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
0610
0611 chk_mmu:
0612 val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
0613 if (!val)
0614 return;
0615 dev_err(edac_dev->dev,
0616 "CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
0617 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
0618 MEMERR_CPU_MMUESR_ERRWAY_RD(val),
0619 MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
0620 MEMERR_CPU_MMUESR_ERRINFO_RD(val),
0621 val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
0622 if (val & MEMERR_CPU_MMUESR_CERR_MASK)
0623 dev_err(edac_dev->dev, "One or more correctable error\n");
0624 if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
0625 dev_err(edac_dev->dev, "Multiple correctable error\n");
0626 switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
0627 case 0:
0628 dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
0629 break;
0630 case 1:
0631 dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
0632 break;
0633 case 2:
0634 dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
0635 break;
0636 case 3:
0637 dev_err(edac_dev->dev, "TMO operation single bank error\n");
0638 break;
0639 case 4:
0640 dev_err(edac_dev->dev, "Stage 2 UTB error\n");
0641 break;
0642 case 5:
0643 dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
0644 break;
0645 case 6:
0646 dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
0647 break;
0648 case 7:
0649 dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
0650 break;
0651 }
0652
0653
0654 writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
0655
0656 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
0657 }
0658
0659 static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
0660 {
0661 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
0662 void __iomem *pg_d;
0663 void __iomem *pg_e;
0664 u32 val_hi;
0665 u32 val_lo;
0666 u32 val;
0667
0668
0669 pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
0670 val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
0671 if (!val)
0672 goto chk_l2c;
0673 val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
0674 val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
0675 dev_err(edac_dev->dev,
0676 "PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
0677 ctx->pmd, val, val_hi, val_lo);
0678 dev_err(edac_dev->dev,
0679 "ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
0680 MEMERR_L2C_L2ESR_ERRSYN_RD(val),
0681 MEMERR_L2C_L2ESR_ERRWAY_RD(val),
0682 MEMERR_L2C_L2ESR_ERRCPU_RD(val),
0683 MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
0684 MEMERR_L2C_L2ESR_ERRACTION_RD(val));
0685
0686 if (val & MEMERR_L2C_L2ESR_ERR_MASK)
0687 dev_err(edac_dev->dev, "One or more correctable error\n");
0688 if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
0689 dev_err(edac_dev->dev, "Multiple correctable error\n");
0690 if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
0691 dev_err(edac_dev->dev, "One or more uncorrectable error\n");
0692 if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
0693 dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
0694
0695 switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
0696 case 0:
0697 dev_err(edac_dev->dev, "Outbound SDB parity error\n");
0698 break;
0699 case 1:
0700 dev_err(edac_dev->dev, "Inbound SDB parity error\n");
0701 break;
0702 case 2:
0703 dev_err(edac_dev->dev, "Tag ECC error\n");
0704 break;
0705 case 3:
0706 dev_err(edac_dev->dev, "Data ECC error\n");
0707 break;
0708 }
0709
0710
0711 writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
0712
0713 if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
0714 MEMERR_L2C_L2ESR_MULTICERR_MASK))
0715 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
0716 if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
0717 MEMERR_L2C_L2ESR_MULTUCERR_MASK))
0718 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
0719
0720 chk_l2c:
0721
0722 pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
0723 val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
0724 if (val) {
0725 val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
0726 val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
0727 dev_err(edac_dev->dev,
0728 "PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
0729 ctx->pmd, val, val_hi, val_lo);
0730 writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
0731 }
0732 }
0733
0734 static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
0735 {
0736 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
0737 unsigned int pcp_hp_stat;
0738 int i;
0739
0740 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
0741 if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
0742 return;
0743
0744
0745 for (i = 0; i < MAX_CPU_PER_PMD; i++)
0746 xgene_edac_pmd_l1_check(edac_dev, i);
0747
0748
0749 xgene_edac_pmd_l2_check(edac_dev);
0750 }
0751
0752 static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
0753 int cpu)
0754 {
0755 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
0756 void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
0757 CPU_MEMERR_CPU_PAGE;
0758
0759
0760
0761
0762
0763 writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
0764 writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
0765 writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
0766 }
0767
0768 static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
0769 {
0770 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
0771 void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
0772 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
0773
0774
0775 writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
0776
0777 if (ctx->version > 1)
0778 writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
0779 }
0780
0781 static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
0782 bool enable)
0783 {
0784 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
0785 int i;
0786
0787
0788 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
0789 if (enable)
0790 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
0791 PMD0_MERR_MASK << ctx->pmd);
0792 else
0793 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
0794 PMD0_MERR_MASK << ctx->pmd);
0795 }
0796
0797 if (enable) {
0798 xgene_edac_pmd_hw_cfg(edac_dev);
0799
0800
0801 for (i = 0; i < MAX_CPU_PER_PMD; i++)
0802 xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
0803 }
0804 }
0805
0806 static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
0807 const char __user *data,
0808 size_t count, loff_t *ppos)
0809 {
0810 struct edac_device_ctl_info *edac_dev = file->private_data;
0811 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
0812 void __iomem *cpux_pg_f;
0813 int i;
0814
0815 for (i = 0; i < MAX_CPU_PER_PMD; i++) {
0816 cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
0817 CPU_MEMERR_CPU_PAGE;
0818
0819 writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
0820 MEMERR_CPU_ICFESR_CERR_MASK,
0821 cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
0822 writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
0823 MEMERR_CPU_LSUESR_CERR_MASK,
0824 cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
0825 writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
0826 MEMERR_CPU_MMUESR_CERR_MASK,
0827 cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
0828 }
0829 return count;
0830 }
0831
0832 static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
0833 const char __user *data,
0834 size_t count, loff_t *ppos)
0835 {
0836 struct edac_device_ctl_info *edac_dev = file->private_data;
0837 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
0838 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
0839
0840 writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
0841 MEMERR_L2C_L2ESR_MULTICERR_MASK |
0842 MEMERR_L2C_L2ESR_UCERR_MASK |
0843 MEMERR_L2C_L2ESR_ERR_MASK,
0844 pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
0845 return count;
0846 }
0847
0848 static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
0849 {
0850 .open = simple_open,
0851 .write = xgene_edac_pmd_l1_inject_ctrl_write,
0852 .llseek = generic_file_llseek, },
0853 {
0854 .open = simple_open,
0855 .write = xgene_edac_pmd_l2_inject_ctrl_write,
0856 .llseek = generic_file_llseek, },
0857 { }
0858 };
0859
0860 static void
0861 xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
0862 {
0863 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
0864 struct dentry *dbgfs_dir;
0865 char name[10];
0866
0867 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
0868 return;
0869
0870 snprintf(name, sizeof(name), "PMD%d", ctx->pmd);
0871 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
0872 if (!dbgfs_dir)
0873 return;
0874
0875 edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
0876 &xgene_edac_pmd_debug_inject_fops[0]);
0877 edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
0878 &xgene_edac_pmd_debug_inject_fops[1]);
0879 }
0880
0881 static int xgene_edac_pmd_available(u32 efuse, int pmd)
0882 {
0883 return (efuse & (1 << pmd)) ? 0 : 1;
0884 }
0885
0886 static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
0887 int version)
0888 {
0889 struct edac_device_ctl_info *edac_dev;
0890 struct xgene_edac_pmd_ctx *ctx;
0891 struct resource res;
0892 char edac_name[10];
0893 u32 pmd;
0894 int rc;
0895 u32 val;
0896
0897 if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
0898 return -ENOMEM;
0899
0900
0901 if (of_property_read_u32(np, "pmd-controller", &pmd)) {
0902 dev_err(edac->dev, "no pmd-controller property\n");
0903 rc = -ENODEV;
0904 goto err_group;
0905 }
0906 rc = regmap_read(edac->efuse_map, 0, &val);
0907 if (rc)
0908 goto err_group;
0909 if (!xgene_edac_pmd_available(val, pmd)) {
0910 rc = -ENODEV;
0911 goto err_group;
0912 }
0913
0914 snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
0915 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
0916 edac_name, 1, "l2c", 1, 2, NULL,
0917 0, edac_device_alloc_index());
0918 if (!edac_dev) {
0919 rc = -ENOMEM;
0920 goto err_group;
0921 }
0922
0923 ctx = edac_dev->pvt_info;
0924 ctx->name = "xgene_pmd_err";
0925 ctx->pmd = pmd;
0926 ctx->edac = edac;
0927 ctx->edac_dev = edac_dev;
0928 ctx->ddev = *edac->dev;
0929 ctx->version = version;
0930 edac_dev->dev = &ctx->ddev;
0931 edac_dev->ctl_name = ctx->name;
0932 edac_dev->dev_name = ctx->name;
0933 edac_dev->mod_name = EDAC_MOD_STR;
0934
0935 rc = of_address_to_resource(np, 0, &res);
0936 if (rc < 0) {
0937 dev_err(edac->dev, "no PMD resource address\n");
0938 goto err_free;
0939 }
0940 ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
0941 if (IS_ERR(ctx->pmd_csr)) {
0942 dev_err(edac->dev,
0943 "devm_ioremap_resource failed for PMD resource address\n");
0944 rc = PTR_ERR(ctx->pmd_csr);
0945 goto err_free;
0946 }
0947
0948 if (edac_op_state == EDAC_OPSTATE_POLL)
0949 edac_dev->edac_check = xgene_edac_pmd_check;
0950
0951 xgene_edac_pmd_create_debugfs_nodes(edac_dev);
0952
0953 rc = edac_device_add_device(edac_dev);
0954 if (rc > 0) {
0955 dev_err(edac->dev, "edac_device_add_device failed\n");
0956 rc = -ENOMEM;
0957 goto err_free;
0958 }
0959
0960 if (edac_op_state == EDAC_OPSTATE_INT)
0961 edac_dev->op_state = OP_RUNNING_INTERRUPT;
0962
0963 list_add(&ctx->next, &edac->pmds);
0964
0965 xgene_edac_pmd_hw_ctl(edac_dev, 1);
0966
0967 devres_remove_group(edac->dev, xgene_edac_pmd_add);
0968
0969 dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
0970 return 0;
0971
0972 err_free:
0973 edac_device_free_ctl_info(edac_dev);
0974 err_group:
0975 devres_release_group(edac->dev, xgene_edac_pmd_add);
0976 return rc;
0977 }
0978
0979 static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
0980 {
0981 struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
0982
0983 xgene_edac_pmd_hw_ctl(edac_dev, 0);
0984 edac_device_del_device(edac_dev->dev);
0985 edac_device_free_ctl_info(edac_dev);
0986 return 0;
0987 }
0988
0989
0990 #define L3C_ESR (0x0A * 4)
0991 #define L3C_ESR_DATATAG_MASK BIT(9)
0992 #define L3C_ESR_MULTIHIT_MASK BIT(8)
0993 #define L3C_ESR_UCEVICT_MASK BIT(6)
0994 #define L3C_ESR_MULTIUCERR_MASK BIT(5)
0995 #define L3C_ESR_MULTICERR_MASK BIT(4)
0996 #define L3C_ESR_UCERR_MASK BIT(3)
0997 #define L3C_ESR_CERR_MASK BIT(2)
0998 #define L3C_ESR_UCERRINTR_MASK BIT(1)
0999 #define L3C_ESR_CERRINTR_MASK BIT(0)
1000 #define L3C_ECR (0x0B * 4)
1001 #define L3C_ECR_UCINTREN BIT(3)
1002 #define L3C_ECR_CINTREN BIT(2)
1003 #define L3C_UCERREN BIT(1)
1004 #define L3C_CERREN BIT(0)
1005 #define L3C_ELR (0x0C * 4)
1006 #define L3C_ELR_ERRSYN(src) ((src & 0xFF800000) >> 23)
1007 #define L3C_ELR_ERRWAY(src) ((src & 0x007E0000) >> 17)
1008 #define L3C_ELR_AGENTID(src) ((src & 0x0001E000) >> 13)
1009 #define L3C_ELR_ERRGRP(src) ((src & 0x00000F00) >> 8)
1010 #define L3C_ELR_OPTYPE(src) ((src & 0x000000F0) >> 4)
1011 #define L3C_ELR_PADDRHIGH(src) (src & 0x0000000F)
1012 #define L3C_AELR (0x0D * 4)
1013 #define L3C_BELR (0x0E * 4)
1014 #define L3C_BELR_BANK(src) (src & 0x0000000F)
1015
1016 struct xgene_edac_dev_ctx {
1017 struct list_head next;
1018 struct device ddev;
1019 char *name;
1020 struct xgene_edac *edac;
1021 struct edac_device_ctl_info *edac_dev;
1022 int edac_idx;
1023 void __iomem *dev_csr;
1024 int version;
1025 };
1026
1027
1028
1029
1030
1031 static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1032 {
1033 if (l3cesr & L3C_ESR_DATATAG_MASK) {
1034 switch (L3C_ELR_ERRSYN(l3celr)) {
1035 case 0x13C:
1036 case 0x0B4:
1037 case 0x007:
1038 case 0x00D:
1039 case 0x00E:
1040 case 0x019:
1041 case 0x01A:
1042 case 0x01C:
1043 case 0x04E:
1044 case 0x041:
1045 return true;
1046 }
1047 } else if (L3C_ELR_ERRWAY(l3celr) == 9)
1048 return true;
1049
1050 return false;
1051 }
1052
1053 static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1054 {
1055 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1056 u32 l3cesr;
1057 u32 l3celr;
1058 u32 l3caelr;
1059 u32 l3cbelr;
1060
1061 l3cesr = readl(ctx->dev_csr + L3C_ESR);
1062 if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1063 return;
1064
1065 if (l3cesr & L3C_ESR_UCERR_MASK)
1066 dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1067 if (l3cesr & L3C_ESR_CERR_MASK)
1068 dev_warn(edac_dev->dev, "L3C correctable error\n");
1069
1070 l3celr = readl(ctx->dev_csr + L3C_ELR);
1071 l3caelr = readl(ctx->dev_csr + L3C_AELR);
1072 l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1073 if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1074 dev_err(edac_dev->dev, "L3C multiple hit error\n");
1075 if (l3cesr & L3C_ESR_UCEVICT_MASK)
1076 dev_err(edac_dev->dev,
1077 "L3C dropped eviction of line with error\n");
1078 if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1079 dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1080 if (l3cesr & L3C_ESR_DATATAG_MASK)
1081 dev_err(edac_dev->dev,
1082 "L3C data error syndrome 0x%X group 0x%X\n",
1083 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1084 else
1085 dev_err(edac_dev->dev,
1086 "L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1087 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1088 L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1089
1090
1091
1092
1093 dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1094 L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1095 (l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1096 dev_err(edac_dev->dev,
1097 "L3C error status register value 0x%X\n", l3cesr);
1098
1099
1100 writel(0, ctx->dev_csr + L3C_ESR);
1101
1102 if (ctx->version <= 1 &&
1103 xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1104 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1105 return;
1106 }
1107 if (l3cesr & L3C_ESR_CERR_MASK)
1108 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1109 if (l3cesr & L3C_ESR_UCERR_MASK)
1110 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1111 }
1112
1113 static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1114 bool enable)
1115 {
1116 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1117 u32 val;
1118
1119 val = readl(ctx->dev_csr + L3C_ECR);
1120 val |= L3C_UCERREN | L3C_CERREN;
1121
1122 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1123 if (enable)
1124 val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1125 else
1126 val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1127 }
1128 writel(val, ctx->dev_csr + L3C_ECR);
1129
1130 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1131
1132 if (enable) {
1133 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1134 L3C_UNCORR_ERR_MASK);
1135 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1136 L3C_CORR_ERR_MASK);
1137 } else {
1138 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1139 L3C_UNCORR_ERR_MASK);
1140 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1141 L3C_CORR_ERR_MASK);
1142 }
1143 }
1144 }
1145
1146 static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1147 const char __user *data,
1148 size_t count, loff_t *ppos)
1149 {
1150 struct edac_device_ctl_info *edac_dev = file->private_data;
1151 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1152
1153
1154 writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1155 return count;
1156 }
1157
1158 static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1159 .open = simple_open,
1160 .write = xgene_edac_l3_inject_ctrl_write,
1161 .llseek = generic_file_llseek
1162 };
1163
1164 static void
1165 xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1166 {
1167 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1168 struct dentry *dbgfs_dir;
1169 char name[10];
1170
1171 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1172 return;
1173
1174 snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1175 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1176 if (!dbgfs_dir)
1177 return;
1178
1179 debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1180 &xgene_edac_l3_debug_inject_fops);
1181 }
1182
1183 static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1184 int version)
1185 {
1186 struct edac_device_ctl_info *edac_dev;
1187 struct xgene_edac_dev_ctx *ctx;
1188 struct resource res;
1189 void __iomem *dev_csr;
1190 int edac_idx;
1191 int rc = 0;
1192
1193 if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1194 return -ENOMEM;
1195
1196 rc = of_address_to_resource(np, 0, &res);
1197 if (rc < 0) {
1198 dev_err(edac->dev, "no L3 resource address\n");
1199 goto err_release_group;
1200 }
1201 dev_csr = devm_ioremap_resource(edac->dev, &res);
1202 if (IS_ERR(dev_csr)) {
1203 dev_err(edac->dev,
1204 "devm_ioremap_resource failed for L3 resource address\n");
1205 rc = PTR_ERR(dev_csr);
1206 goto err_release_group;
1207 }
1208
1209 edac_idx = edac_device_alloc_index();
1210 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1211 "l3c", 1, "l3c", 1, 0, NULL, 0,
1212 edac_idx);
1213 if (!edac_dev) {
1214 rc = -ENOMEM;
1215 goto err_release_group;
1216 }
1217
1218 ctx = edac_dev->pvt_info;
1219 ctx->dev_csr = dev_csr;
1220 ctx->name = "xgene_l3_err";
1221 ctx->edac_idx = edac_idx;
1222 ctx->edac = edac;
1223 ctx->edac_dev = edac_dev;
1224 ctx->ddev = *edac->dev;
1225 ctx->version = version;
1226 edac_dev->dev = &ctx->ddev;
1227 edac_dev->ctl_name = ctx->name;
1228 edac_dev->dev_name = ctx->name;
1229 edac_dev->mod_name = EDAC_MOD_STR;
1230
1231 if (edac_op_state == EDAC_OPSTATE_POLL)
1232 edac_dev->edac_check = xgene_edac_l3_check;
1233
1234 xgene_edac_l3_create_debugfs_nodes(edac_dev);
1235
1236 rc = edac_device_add_device(edac_dev);
1237 if (rc > 0) {
1238 dev_err(edac->dev, "failed edac_device_add_device()\n");
1239 rc = -ENOMEM;
1240 goto err_ctl_free;
1241 }
1242
1243 if (edac_op_state == EDAC_OPSTATE_INT)
1244 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1245
1246 list_add(&ctx->next, &edac->l3s);
1247
1248 xgene_edac_l3_hw_init(edac_dev, 1);
1249
1250 devres_remove_group(edac->dev, xgene_edac_l3_add);
1251
1252 dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1253 return 0;
1254
1255 err_ctl_free:
1256 edac_device_free_ctl_info(edac_dev);
1257 err_release_group:
1258 devres_release_group(edac->dev, xgene_edac_l3_add);
1259 return rc;
1260 }
1261
1262 static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1263 {
1264 struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1265
1266 xgene_edac_l3_hw_init(edac_dev, 0);
1267 edac_device_del_device(l3->edac->dev);
1268 edac_device_free_ctl_info(edac_dev);
1269 return 0;
1270 }
1271
1272
1273 #define IOBAXIS0TRANSERRINTSTS 0x0000
1274 #define IOBAXIS0_M_ILLEGAL_ACCESS_MASK BIT(1)
1275 #define IOBAXIS0_ILLEGAL_ACCESS_MASK BIT(0)
1276 #define IOBAXIS0TRANSERRINTMSK 0x0004
1277 #define IOBAXIS0TRANSERRREQINFOL 0x0008
1278 #define IOBAXIS0TRANSERRREQINFOH 0x000c
1279 #define REQTYPE_RD(src) (((src) & BIT(0)))
1280 #define ERRADDRH_RD(src) (((src) & 0xffc00000) >> 22)
1281 #define IOBAXIS1TRANSERRINTSTS 0x0010
1282 #define IOBAXIS1TRANSERRINTMSK 0x0014
1283 #define IOBAXIS1TRANSERRREQINFOL 0x0018
1284 #define IOBAXIS1TRANSERRREQINFOH 0x001c
1285 #define IOBPATRANSERRINTSTS 0x0020
1286 #define IOBPA_M_REQIDRAM_CORRUPT_MASK BIT(7)
1287 #define IOBPA_REQIDRAM_CORRUPT_MASK BIT(6)
1288 #define IOBPA_M_TRANS_CORRUPT_MASK BIT(5)
1289 #define IOBPA_TRANS_CORRUPT_MASK BIT(4)
1290 #define IOBPA_M_WDATA_CORRUPT_MASK BIT(3)
1291 #define IOBPA_WDATA_CORRUPT_MASK BIT(2)
1292 #define IOBPA_M_RDATA_CORRUPT_MASK BIT(1)
1293 #define IOBPA_RDATA_CORRUPT_MASK BIT(0)
1294 #define IOBBATRANSERRINTSTS 0x0030
1295 #define M_ILLEGAL_ACCESS_MASK BIT(15)
1296 #define ILLEGAL_ACCESS_MASK BIT(14)
1297 #define M_WIDRAM_CORRUPT_MASK BIT(13)
1298 #define WIDRAM_CORRUPT_MASK BIT(12)
1299 #define M_RIDRAM_CORRUPT_MASK BIT(11)
1300 #define RIDRAM_CORRUPT_MASK BIT(10)
1301 #define M_TRANS_CORRUPT_MASK BIT(9)
1302 #define TRANS_CORRUPT_MASK BIT(8)
1303 #define M_WDATA_CORRUPT_MASK BIT(7)
1304 #define WDATA_CORRUPT_MASK BIT(6)
1305 #define M_RBM_POISONED_REQ_MASK BIT(5)
1306 #define RBM_POISONED_REQ_MASK BIT(4)
1307 #define M_XGIC_POISONED_REQ_MASK BIT(3)
1308 #define XGIC_POISONED_REQ_MASK BIT(2)
1309 #define M_WRERR_RESP_MASK BIT(1)
1310 #define WRERR_RESP_MASK BIT(0)
1311 #define IOBBATRANSERRREQINFOL 0x0038
1312 #define IOBBATRANSERRREQINFOH 0x003c
1313 #define REQTYPE_F2_RD(src) ((src) & BIT(0))
1314 #define ERRADDRH_F2_RD(src) (((src) & 0xffc00000) >> 22)
1315 #define IOBBATRANSERRCSWREQID 0x0040
1316 #define XGICTRANSERRINTSTS 0x0050
1317 #define M_WR_ACCESS_ERR_MASK BIT(3)
1318 #define WR_ACCESS_ERR_MASK BIT(2)
1319 #define M_RD_ACCESS_ERR_MASK BIT(1)
1320 #define RD_ACCESS_ERR_MASK BIT(0)
1321 #define XGICTRANSERRINTMSK 0x0054
1322 #define XGICTRANSERRREQINFO 0x0058
1323 #define REQTYPE_MASK BIT(26)
1324 #define ERRADDR_RD(src) ((src) & 0x03ffffff)
1325 #define GLBL_ERR_STS 0x0800
1326 #define MDED_ERR_MASK BIT(3)
1327 #define DED_ERR_MASK BIT(2)
1328 #define MSEC_ERR_MASK BIT(1)
1329 #define SEC_ERR_MASK BIT(0)
1330 #define GLBL_SEC_ERRL 0x0810
1331 #define GLBL_SEC_ERRH 0x0818
1332 #define GLBL_MSEC_ERRL 0x0820
1333 #define GLBL_MSEC_ERRH 0x0828
1334 #define GLBL_DED_ERRL 0x0830
1335 #define GLBL_DED_ERRLMASK 0x0834
1336 #define GLBL_DED_ERRH 0x0838
1337 #define GLBL_DED_ERRHMASK 0x083c
1338 #define GLBL_MDED_ERRL 0x0840
1339 #define GLBL_MDED_ERRLMASK 0x0844
1340 #define GLBL_MDED_ERRH 0x0848
1341 #define GLBL_MDED_ERRHMASK 0x084c
1342
1343
1344 #define RBCSR 0x0000
1345 #define STICKYERR_MASK BIT(0)
1346 #define RBEIR 0x0008
1347 #define AGENT_OFFLINE_ERR_MASK BIT(30)
1348 #define UNIMPL_RBPAGE_ERR_MASK BIT(29)
1349 #define WORD_ALIGNED_ERR_MASK BIT(28)
1350 #define PAGE_ACCESS_ERR_MASK BIT(27)
1351 #define WRITE_ACCESS_MASK BIT(26)
1352
1353 static const char * const soc_mem_err_v1[] = {
1354 "10GbE0",
1355 "10GbE1",
1356 "Security",
1357 "SATA45",
1358 "SATA23/ETH23",
1359 "SATA01/ETH01",
1360 "USB1",
1361 "USB0",
1362 "QML",
1363 "QM0",
1364 "QM1 (XGbE01)",
1365 "PCIE4",
1366 "PCIE3",
1367 "PCIE2",
1368 "PCIE1",
1369 "PCIE0",
1370 "CTX Manager",
1371 "OCM",
1372 "1GbE",
1373 "CLE",
1374 "AHBC",
1375 "PktDMA",
1376 "GFC",
1377 "MSLIM",
1378 "10GbE2",
1379 "10GbE3",
1380 "QM2 (XGbE23)",
1381 "IOB",
1382 "unknown",
1383 "unknown",
1384 "unknown",
1385 "unknown",
1386 };
1387
1388 static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
1389 {
1390 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1391 u32 err_addr_lo;
1392 u32 err_addr_hi;
1393 u32 reg;
1394 u32 info;
1395
1396
1397 reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS);
1398 if (!reg)
1399 goto chk_iob_err;
1400 dev_err(edac_dev->dev, "XGIC transaction error\n");
1401 if (reg & RD_ACCESS_ERR_MASK)
1402 dev_err(edac_dev->dev, "XGIC read size error\n");
1403 if (reg & M_RD_ACCESS_ERR_MASK)
1404 dev_err(edac_dev->dev, "Multiple XGIC read size error\n");
1405 if (reg & WR_ACCESS_ERR_MASK)
1406 dev_err(edac_dev->dev, "XGIC write size error\n");
1407 if (reg & M_WR_ACCESS_ERR_MASK)
1408 dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
1409 info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
1410 dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
1411 info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
1412 info);
1413 writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
1414
1415 chk_iob_err:
1416
1417 reg = readl(ctx->dev_csr + GLBL_ERR_STS);
1418 if (!reg)
1419 return;
1420 if (reg & SEC_ERR_MASK) {
1421 err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL);
1422 err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH);
1423 dev_err(edac_dev->dev,
1424 "IOB single-bit correctable memory at 0x%08X.%08X error\n",
1425 err_addr_lo, err_addr_hi);
1426 writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL);
1427 writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH);
1428 }
1429 if (reg & MSEC_ERR_MASK) {
1430 err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL);
1431 err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH);
1432 dev_err(edac_dev->dev,
1433 "IOB multiple single-bit correctable memory at 0x%08X.%08X error\n",
1434 err_addr_lo, err_addr_hi);
1435 writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL);
1436 writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH);
1437 }
1438 if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK))
1439 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1440
1441 if (reg & DED_ERR_MASK) {
1442 err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL);
1443 err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH);
1444 dev_err(edac_dev->dev,
1445 "IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1446 err_addr_lo, err_addr_hi);
1447 writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL);
1448 writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH);
1449 }
1450 if (reg & MDED_ERR_MASK) {
1451 err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL);
1452 err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH);
1453 dev_err(edac_dev->dev,
1454 "Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1455 err_addr_lo, err_addr_hi);
1456 writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL);
1457 writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH);
1458 }
1459 if (reg & (DED_ERR_MASK | MDED_ERR_MASK))
1460 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1461 }
1462
1463 static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
1464 {
1465 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1466 u32 err_addr_lo;
1467 u32 err_addr_hi;
1468 u32 reg;
1469
1470
1471 if (!ctx->edac->rb_map)
1472 goto rb_skip;
1473
1474
1475
1476
1477
1478
1479
1480
1481 if (regmap_read(ctx->edac->rb_map, RBCSR, ®))
1482 return;
1483 if (reg & STICKYERR_MASK) {
1484 bool write;
1485
1486 dev_err(edac_dev->dev, "IOB bus access error(s)\n");
1487 if (regmap_read(ctx->edac->rb_map, RBEIR, ®))
1488 return;
1489 write = reg & WRITE_ACCESS_MASK ? 1 : 0;
1490 if (reg & AGENT_OFFLINE_ERR_MASK)
1491 dev_err(edac_dev->dev,
1492 "IOB bus %s access to offline agent error\n",
1493 write ? "write" : "read");
1494 if (reg & UNIMPL_RBPAGE_ERR_MASK)
1495 dev_err(edac_dev->dev,
1496 "IOB bus %s access to unimplemented page error\n",
1497 write ? "write" : "read");
1498 if (reg & WORD_ALIGNED_ERR_MASK)
1499 dev_err(edac_dev->dev,
1500 "IOB bus %s word aligned access error\n",
1501 write ? "write" : "read");
1502 if (reg & PAGE_ACCESS_ERR_MASK)
1503 dev_err(edac_dev->dev,
1504 "IOB bus %s to page out of range access error\n",
1505 write ? "write" : "read");
1506 if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
1507 return;
1508 if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
1509 return;
1510 }
1511 rb_skip:
1512
1513
1514 reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
1515 if (!reg)
1516 return;
1517
1518 dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n");
1519 if (reg & WRERR_RESP_MASK)
1520 dev_err(edac_dev->dev, "IOB BA write response error\n");
1521 if (reg & M_WRERR_RESP_MASK)
1522 dev_err(edac_dev->dev,
1523 "Multiple IOB BA write response error\n");
1524 if (reg & XGIC_POISONED_REQ_MASK)
1525 dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n");
1526 if (reg & M_XGIC_POISONED_REQ_MASK)
1527 dev_err(edac_dev->dev,
1528 "Multiple IOB BA XGIC poisoned write error\n");
1529 if (reg & RBM_POISONED_REQ_MASK)
1530 dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n");
1531 if (reg & M_RBM_POISONED_REQ_MASK)
1532 dev_err(edac_dev->dev,
1533 "Multiple IOB BA RBM poisoned write error\n");
1534 if (reg & WDATA_CORRUPT_MASK)
1535 dev_err(edac_dev->dev, "IOB BA write error\n");
1536 if (reg & M_WDATA_CORRUPT_MASK)
1537 dev_err(edac_dev->dev, "Multiple IOB BA write error\n");
1538 if (reg & TRANS_CORRUPT_MASK)
1539 dev_err(edac_dev->dev, "IOB BA transaction error\n");
1540 if (reg & M_TRANS_CORRUPT_MASK)
1541 dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n");
1542 if (reg & RIDRAM_CORRUPT_MASK)
1543 dev_err(edac_dev->dev,
1544 "IOB BA RDIDRAM read transaction ID error\n");
1545 if (reg & M_RIDRAM_CORRUPT_MASK)
1546 dev_err(edac_dev->dev,
1547 "Multiple IOB BA RDIDRAM read transaction ID error\n");
1548 if (reg & WIDRAM_CORRUPT_MASK)
1549 dev_err(edac_dev->dev,
1550 "IOB BA RDIDRAM write transaction ID error\n");
1551 if (reg & M_WIDRAM_CORRUPT_MASK)
1552 dev_err(edac_dev->dev,
1553 "Multiple IOB BA RDIDRAM write transaction ID error\n");
1554 if (reg & ILLEGAL_ACCESS_MASK)
1555 dev_err(edac_dev->dev,
1556 "IOB BA XGIC/RB illegal access error\n");
1557 if (reg & M_ILLEGAL_ACCESS_MASK)
1558 dev_err(edac_dev->dev,
1559 "Multiple IOB BA XGIC/RB illegal access error\n");
1560
1561 err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
1562 err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
1563 dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
1564 REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
1565 ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1566 if (reg & WRERR_RESP_MASK)
1567 dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
1568 readl(ctx->dev_csr + IOBBATRANSERRCSWREQID));
1569 writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS);
1570 }
1571
1572 static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1573 {
1574 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1575 u32 err_addr_lo;
1576 u32 err_addr_hi;
1577 u32 reg;
1578
1579
1580 reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
1581 if (!reg)
1582 goto chk_iob_axi0;
1583 dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n");
1584 if (reg & IOBPA_RDATA_CORRUPT_MASK)
1585 dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1586 if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1587 dev_err(edac_dev->dev,
1588 "Multiple IOB PA read data RAM error\n");
1589 if (reg & IOBPA_WDATA_CORRUPT_MASK)
1590 dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1591 if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1592 dev_err(edac_dev->dev,
1593 "Multiple IOB PA write data RAM error\n");
1594 if (reg & IOBPA_TRANS_CORRUPT_MASK)
1595 dev_err(edac_dev->dev, "IOB PA transaction error\n");
1596 if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
1597 dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n");
1598 if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1599 dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1600 if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
1601 dev_err(edac_dev->dev,
1602 "Multiple IOB PA transaction ID RAM error\n");
1603 writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS);
1604
1605 chk_iob_axi0:
1606
1607 reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1608 if (!reg)
1609 goto chk_iob_axi1;
1610 err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL);
1611 err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH);
1612 dev_err(edac_dev->dev,
1613 "%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1614 reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1615 REQTYPE_RD(err_addr_hi) ? "read" : "write",
1616 ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1617 writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1618
1619 chk_iob_axi1:
1620
1621 reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1622 if (!reg)
1623 return;
1624 err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL);
1625 err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH);
1626 dev_err(edac_dev->dev,
1627 "%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1628 reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1629 REQTYPE_RD(err_addr_hi) ? "read" : "write",
1630 ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1631 writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1632 }
1633
1634 static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev)
1635 {
1636 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1637 const char * const *soc_mem_err = NULL;
1638 u32 pcp_hp_stat;
1639 u32 pcp_lp_stat;
1640 u32 reg;
1641 int i;
1642
1643 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
1644 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
1645 xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, ®);
1646 if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK |
1647 IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) ||
1648 (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg))
1649 return;
1650
1651 if (pcp_hp_stat & IOB_XGIC_ERR_MASK)
1652 xgene_edac_iob_gic_report(edac_dev);
1653
1654 if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK))
1655 xgene_edac_rb_report(edac_dev);
1656
1657 if (pcp_hp_stat & IOB_PA_ERR_MASK)
1658 xgene_edac_pa_report(edac_dev);
1659
1660 if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) {
1661 dev_info(edac_dev->dev,
1662 "CSW switch trace correctable memory parity error\n");
1663 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1664 }
1665
1666 if (!reg)
1667 return;
1668 if (ctx->version == 1)
1669 soc_mem_err = soc_mem_err_v1;
1670 if (!soc_mem_err) {
1671 dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n",
1672 reg);
1673 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1674 return;
1675 }
1676 for (i = 0; i < 31; i++) {
1677 if (reg & (1 << i)) {
1678 dev_err(edac_dev->dev, "%s memory parity error\n",
1679 soc_mem_err[i]);
1680 edac_device_handle_ue(edac_dev, 0, 0,
1681 edac_dev->ctl_name);
1682 }
1683 }
1684 }
1685
1686 static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev,
1687 bool enable)
1688 {
1689 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1690
1691
1692 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1693 if (enable) {
1694 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1695 IOB_PA_ERR_MASK |
1696 IOB_BA_ERR_MASK |
1697 IOB_XGIC_ERR_MASK |
1698 IOB_RB_ERR_MASK);
1699 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1700 CSW_SWITCH_TRACE_ERR_MASK);
1701 } else {
1702 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1703 IOB_PA_ERR_MASK |
1704 IOB_BA_ERR_MASK |
1705 IOB_XGIC_ERR_MASK |
1706 IOB_RB_ERR_MASK);
1707 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1708 CSW_SWITCH_TRACE_ERR_MASK);
1709 }
1710
1711 writel(enable ? 0x0 : 0xFFFFFFFF,
1712 ctx->dev_csr + IOBAXIS0TRANSERRINTMSK);
1713 writel(enable ? 0x0 : 0xFFFFFFFF,
1714 ctx->dev_csr + IOBAXIS1TRANSERRINTMSK);
1715 writel(enable ? 0x0 : 0xFFFFFFFF,
1716 ctx->dev_csr + XGICTRANSERRINTMSK);
1717
1718 xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK,
1719 enable ? 0x0 : 0xFFFFFFFF);
1720 }
1721 }
1722
1723 static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
1724 int version)
1725 {
1726 struct edac_device_ctl_info *edac_dev;
1727 struct xgene_edac_dev_ctx *ctx;
1728 void __iomem *dev_csr;
1729 struct resource res;
1730 int edac_idx;
1731 int rc;
1732
1733 if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL))
1734 return -ENOMEM;
1735
1736 rc = of_address_to_resource(np, 0, &res);
1737 if (rc < 0) {
1738 dev_err(edac->dev, "no SoC resource address\n");
1739 goto err_release_group;
1740 }
1741 dev_csr = devm_ioremap_resource(edac->dev, &res);
1742 if (IS_ERR(dev_csr)) {
1743 dev_err(edac->dev,
1744 "devm_ioremap_resource failed for soc resource address\n");
1745 rc = PTR_ERR(dev_csr);
1746 goto err_release_group;
1747 }
1748
1749 edac_idx = edac_device_alloc_index();
1750 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1751 "SOC", 1, "SOC", 1, 2, NULL, 0,
1752 edac_idx);
1753 if (!edac_dev) {
1754 rc = -ENOMEM;
1755 goto err_release_group;
1756 }
1757
1758 ctx = edac_dev->pvt_info;
1759 ctx->dev_csr = dev_csr;
1760 ctx->name = "xgene_soc_err";
1761 ctx->edac_idx = edac_idx;
1762 ctx->edac = edac;
1763 ctx->edac_dev = edac_dev;
1764 ctx->ddev = *edac->dev;
1765 ctx->version = version;
1766 edac_dev->dev = &ctx->ddev;
1767 edac_dev->ctl_name = ctx->name;
1768 edac_dev->dev_name = ctx->name;
1769 edac_dev->mod_name = EDAC_MOD_STR;
1770
1771 if (edac_op_state == EDAC_OPSTATE_POLL)
1772 edac_dev->edac_check = xgene_edac_soc_check;
1773
1774 rc = edac_device_add_device(edac_dev);
1775 if (rc > 0) {
1776 dev_err(edac->dev, "failed edac_device_add_device()\n");
1777 rc = -ENOMEM;
1778 goto err_ctl_free;
1779 }
1780
1781 if (edac_op_state == EDAC_OPSTATE_INT)
1782 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1783
1784 list_add(&ctx->next, &edac->socs);
1785
1786 xgene_edac_soc_hw_init(edac_dev, 1);
1787
1788 devres_remove_group(edac->dev, xgene_edac_soc_add);
1789
1790 dev_info(edac->dev, "X-Gene EDAC SoC registered\n");
1791
1792 return 0;
1793
1794 err_ctl_free:
1795 edac_device_free_ctl_info(edac_dev);
1796 err_release_group:
1797 devres_release_group(edac->dev, xgene_edac_soc_add);
1798 return rc;
1799 }
1800
1801 static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc)
1802 {
1803 struct edac_device_ctl_info *edac_dev = soc->edac_dev;
1804
1805 xgene_edac_soc_hw_init(edac_dev, 0);
1806 edac_device_del_device(soc->edac->dev);
1807 edac_device_free_ctl_info(edac_dev);
1808 return 0;
1809 }
1810
1811 static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1812 {
1813 struct xgene_edac *ctx = dev_id;
1814 struct xgene_edac_pmd_ctx *pmd;
1815 struct xgene_edac_dev_ctx *node;
1816 unsigned int pcp_hp_stat;
1817 unsigned int pcp_lp_stat;
1818
1819 xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1820 xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1821 if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1822 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1823 (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1824 struct xgene_edac_mc_ctx *mcu;
1825
1826 list_for_each_entry(mcu, &ctx->mcus, next)
1827 xgene_edac_mc_check(mcu->mci);
1828 }
1829
1830 list_for_each_entry(pmd, &ctx->pmds, next) {
1831 if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1832 xgene_edac_pmd_check(pmd->edac_dev);
1833 }
1834
1835 list_for_each_entry(node, &ctx->l3s, next)
1836 xgene_edac_l3_check(node->edac_dev);
1837
1838 list_for_each_entry(node, &ctx->socs, next)
1839 xgene_edac_soc_check(node->edac_dev);
1840
1841 return IRQ_HANDLED;
1842 }
1843
1844 static int xgene_edac_probe(struct platform_device *pdev)
1845 {
1846 struct xgene_edac *edac;
1847 struct device_node *child;
1848 struct resource *res;
1849 int rc;
1850
1851 edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1852 if (!edac)
1853 return -ENOMEM;
1854
1855 edac->dev = &pdev->dev;
1856 platform_set_drvdata(pdev, edac);
1857 INIT_LIST_HEAD(&edac->mcus);
1858 INIT_LIST_HEAD(&edac->pmds);
1859 INIT_LIST_HEAD(&edac->l3s);
1860 INIT_LIST_HEAD(&edac->socs);
1861 spin_lock_init(&edac->lock);
1862 mutex_init(&edac->mc_lock);
1863
1864 edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1865 "regmap-csw");
1866 if (IS_ERR(edac->csw_map)) {
1867 dev_err(edac->dev, "unable to get syscon regmap csw\n");
1868 rc = PTR_ERR(edac->csw_map);
1869 goto out_err;
1870 }
1871
1872 edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1873 "regmap-mcba");
1874 if (IS_ERR(edac->mcba_map)) {
1875 dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1876 rc = PTR_ERR(edac->mcba_map);
1877 goto out_err;
1878 }
1879
1880 edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1881 "regmap-mcbb");
1882 if (IS_ERR(edac->mcbb_map)) {
1883 dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1884 rc = PTR_ERR(edac->mcbb_map);
1885 goto out_err;
1886 }
1887 edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1888 "regmap-efuse");
1889 if (IS_ERR(edac->efuse_map)) {
1890 dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1891 rc = PTR_ERR(edac->efuse_map);
1892 goto out_err;
1893 }
1894
1895
1896
1897
1898
1899 edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1900 "regmap-rb");
1901 if (IS_ERR(edac->rb_map)) {
1902 dev_warn(edac->dev, "missing syscon regmap rb\n");
1903 edac->rb_map = NULL;
1904 }
1905
1906 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1907 edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1908 if (IS_ERR(edac->pcp_csr)) {
1909 dev_err(&pdev->dev, "no PCP resource address\n");
1910 rc = PTR_ERR(edac->pcp_csr);
1911 goto out_err;
1912 }
1913
1914 if (edac_op_state == EDAC_OPSTATE_INT) {
1915 int irq;
1916 int i;
1917
1918 for (i = 0; i < 3; i++) {
1919 irq = platform_get_irq_optional(pdev, i);
1920 if (irq < 0) {
1921 dev_err(&pdev->dev, "No IRQ resource\n");
1922 rc = irq;
1923 goto out_err;
1924 }
1925 rc = devm_request_irq(&pdev->dev, irq,
1926 xgene_edac_isr, IRQF_SHARED,
1927 dev_name(&pdev->dev), edac);
1928 if (rc) {
1929 dev_err(&pdev->dev,
1930 "Could not request IRQ %d\n", irq);
1931 goto out_err;
1932 }
1933 }
1934 }
1935
1936 edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1937
1938 for_each_child_of_node(pdev->dev.of_node, child) {
1939 if (!of_device_is_available(child))
1940 continue;
1941 if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1942 xgene_edac_mc_add(edac, child);
1943 if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
1944 xgene_edac_pmd_add(edac, child, 1);
1945 if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1946 xgene_edac_pmd_add(edac, child, 2);
1947 if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1948 xgene_edac_l3_add(edac, child, 1);
1949 if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1950 xgene_edac_l3_add(edac, child, 2);
1951 if (of_device_is_compatible(child, "apm,xgene-edac-soc"))
1952 xgene_edac_soc_add(edac, child, 0);
1953 if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1"))
1954 xgene_edac_soc_add(edac, child, 1);
1955 }
1956
1957 return 0;
1958
1959 out_err:
1960 return rc;
1961 }
1962
1963 static int xgene_edac_remove(struct platform_device *pdev)
1964 {
1965 struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1966 struct xgene_edac_mc_ctx *mcu;
1967 struct xgene_edac_mc_ctx *temp_mcu;
1968 struct xgene_edac_pmd_ctx *pmd;
1969 struct xgene_edac_pmd_ctx *temp_pmd;
1970 struct xgene_edac_dev_ctx *node;
1971 struct xgene_edac_dev_ctx *temp_node;
1972
1973 list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
1974 xgene_edac_mc_remove(mcu);
1975
1976 list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
1977 xgene_edac_pmd_remove(pmd);
1978
1979 list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1980 xgene_edac_l3_remove(node);
1981
1982 list_for_each_entry_safe(node, temp_node, &edac->socs, next)
1983 xgene_edac_soc_remove(node);
1984
1985 return 0;
1986 }
1987
1988 static const struct of_device_id xgene_edac_of_match[] = {
1989 { .compatible = "apm,xgene-edac" },
1990 {},
1991 };
1992 MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
1993
1994 static struct platform_driver xgene_edac_driver = {
1995 .probe = xgene_edac_probe,
1996 .remove = xgene_edac_remove,
1997 .driver = {
1998 .name = "xgene-edac",
1999 .of_match_table = xgene_edac_of_match,
2000 },
2001 };
2002
2003 static int __init xgene_edac_init(void)
2004 {
2005 int rc;
2006
2007
2008 switch (edac_op_state) {
2009 case EDAC_OPSTATE_POLL:
2010 case EDAC_OPSTATE_INT:
2011 break;
2012 default:
2013 edac_op_state = EDAC_OPSTATE_INT;
2014 break;
2015 }
2016
2017 rc = platform_driver_register(&xgene_edac_driver);
2018 if (rc) {
2019 edac_printk(KERN_ERR, EDAC_MOD_STR,
2020 "EDAC fails to register\n");
2021 goto reg_failed;
2022 }
2023
2024 return 0;
2025
2026 reg_failed:
2027 return rc;
2028 }
2029 module_init(xgene_edac_init);
2030
2031 static void __exit xgene_edac_exit(void)
2032 {
2033 platform_driver_unregister(&xgene_edac_driver);
2034 }
2035 module_exit(xgene_edac_exit);
2036
2037 MODULE_LICENSE("GPL");
2038 MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
2039 MODULE_DESCRIPTION("APM X-Gene EDAC driver");
2040 module_param(edac_op_state, int, 0444);
2041 MODULE_PARM_DESC(edac_op_state,
2042 "EDAC error reporting state: 0=Poll, 2=Interrupt");