Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
0004  */
0005 #include <linux/bitfield.h>
0006 #include <linux/bitops.h>
0007 #include <linux/edac.h>
0008 #include <linux/of_irq.h>
0009 #include <linux/platform_device.h>
0010 #include <linux/spinlock.h>
0011 #include "edac_module.h"
0012 
0013 /* Registers Offset */
0014 #define AL_MC_ECC_CFG       0x70
0015 #define AL_MC_ECC_CLEAR     0x7c
0016 #define AL_MC_ECC_ERR_COUNT 0x80
0017 #define AL_MC_ECC_CE_ADDR0  0x84
0018 #define AL_MC_ECC_CE_ADDR1  0x88
0019 #define AL_MC_ECC_UE_ADDR0  0xa4
0020 #define AL_MC_ECC_UE_ADDR1  0xa8
0021 #define AL_MC_ECC_CE_SYND0  0x8c
0022 #define AL_MC_ECC_CE_SYND1  0x90
0023 #define AL_MC_ECC_CE_SYND2  0x94
0024 #define AL_MC_ECC_UE_SYND0  0xac
0025 #define AL_MC_ECC_UE_SYND1  0xb0
0026 #define AL_MC_ECC_UE_SYND2  0xb4
0027 
0028 /* Registers Fields */
0029 #define AL_MC_ECC_CFG_SCRUB_DISABLED    BIT(4)
0030 
0031 #define AL_MC_ECC_CLEAR_UE_COUNT    BIT(3)
0032 #define AL_MC_ECC_CLEAR_CE_COUNT    BIT(2)
0033 #define AL_MC_ECC_CLEAR_UE_ERR      BIT(1)
0034 #define AL_MC_ECC_CLEAR_CE_ERR      BIT(0)
0035 
0036 #define AL_MC_ECC_ERR_COUNT_UE      GENMASK(31, 16)
0037 #define AL_MC_ECC_ERR_COUNT_CE      GENMASK(15, 0)
0038 
0039 #define AL_MC_ECC_CE_ADDR0_RANK     GENMASK(25, 24)
0040 #define AL_MC_ECC_CE_ADDR0_ROW      GENMASK(17, 0)
0041 
0042 #define AL_MC_ECC_CE_ADDR1_BG       GENMASK(25, 24)
0043 #define AL_MC_ECC_CE_ADDR1_BANK     GENMASK(18, 16)
0044 #define AL_MC_ECC_CE_ADDR1_COLUMN   GENMASK(11, 0)
0045 
0046 #define AL_MC_ECC_UE_ADDR0_RANK     GENMASK(25, 24)
0047 #define AL_MC_ECC_UE_ADDR0_ROW      GENMASK(17, 0)
0048 
0049 #define AL_MC_ECC_UE_ADDR1_BG       GENMASK(25, 24)
0050 #define AL_MC_ECC_UE_ADDR1_BANK     GENMASK(18, 16)
0051 #define AL_MC_ECC_UE_ADDR1_COLUMN   GENMASK(11, 0)
0052 
0053 #define DRV_NAME "al_mc_edac"
0054 #define AL_MC_EDAC_MSG_MAX 256
0055 
0056 struct al_mc_edac {
0057     void __iomem *mmio_base;
0058     spinlock_t lock;
0059     int irq_ce;
0060     int irq_ue;
0061 };
0062 
0063 static void prepare_msg(char *message, size_t buffer_size,
0064             enum hw_event_mc_err_type type,
0065             u8 rank, u32 row, u8 bg, u8 bank, u16 column,
0066             u32 syn0, u32 syn1, u32 syn2)
0067 {
0068     snprintf(message, buffer_size,
0069          "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x",
0070          type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE",
0071          rank, row, bg, bank, column, syn0, syn1, syn2);
0072 }
0073 
0074 static int handle_ce(struct mem_ctl_info *mci)
0075 {
0076     u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row;
0077     struct al_mc_edac *al_mc = mci->pvt_info;
0078     char msg[AL_MC_EDAC_MSG_MAX];
0079     u16 ce_count, column;
0080     unsigned long flags;
0081     u8 rank, bg, bank;
0082 
0083     eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
0084     ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt);
0085     if (!ce_count)
0086         return 0;
0087 
0088     ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0);
0089     ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1);
0090     ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0);
0091     ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1);
0092     ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2);
0093 
0094     writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR,
0095                al_mc->mmio_base + AL_MC_ECC_CLEAR);
0096 
0097     dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
0098         ecccaddr0, ecccaddr1);
0099 
0100     rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0);
0101     row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0);
0102 
0103     bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1);
0104     bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1);
0105     column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1);
0106 
0107     prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED,
0108             rank, row, bg, bank, column,
0109             ecccsyn0, ecccsyn1, ecccsyn2);
0110 
0111     spin_lock_irqsave(&al_mc->lock, flags);
0112     edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
0113                  ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
0114     spin_unlock_irqrestore(&al_mc->lock, flags);
0115 
0116     return ce_count;
0117 }
0118 
0119 static int handle_ue(struct mem_ctl_info *mci)
0120 {
0121     u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row;
0122     struct al_mc_edac *al_mc = mci->pvt_info;
0123     char msg[AL_MC_EDAC_MSG_MAX];
0124     u16 ue_count, column;
0125     unsigned long flags;
0126     u8 rank, bg, bank;
0127 
0128     eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
0129     ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt);
0130     if (!ue_count)
0131         return 0;
0132 
0133     eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0);
0134     eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1);
0135     eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0);
0136     eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1);
0137     eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2);
0138 
0139     writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR,
0140                al_mc->mmio_base + AL_MC_ECC_CLEAR);
0141 
0142     dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
0143         eccuaddr0, eccuaddr1);
0144 
0145     rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0);
0146     row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0);
0147 
0148     bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1);
0149     bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1);
0150     column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1);
0151 
0152     prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED,
0153             rank, row, bg, bank, column,
0154             eccusyn0, eccusyn1, eccusyn2);
0155 
0156     spin_lock_irqsave(&al_mc->lock, flags);
0157     edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
0158                  ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
0159     spin_unlock_irqrestore(&al_mc->lock, flags);
0160 
0161     return ue_count;
0162 }
0163 
0164 static void al_mc_edac_check(struct mem_ctl_info *mci)
0165 {
0166     struct al_mc_edac *al_mc = mci->pvt_info;
0167 
0168     if (al_mc->irq_ue <= 0)
0169         handle_ue(mci);
0170 
0171     if (al_mc->irq_ce <= 0)
0172         handle_ce(mci);
0173 }
0174 
0175 static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info)
0176 {
0177     struct platform_device *pdev = info;
0178     struct mem_ctl_info *mci = platform_get_drvdata(pdev);
0179 
0180     if (handle_ue(mci))
0181         return IRQ_HANDLED;
0182     return IRQ_NONE;
0183 }
0184 
0185 static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info)
0186 {
0187     struct platform_device *pdev = info;
0188     struct mem_ctl_info *mci = platform_get_drvdata(pdev);
0189 
0190     if (handle_ce(mci))
0191         return IRQ_HANDLED;
0192     return IRQ_NONE;
0193 }
0194 
0195 static enum scrub_type get_scrub_mode(void __iomem *mmio_base)
0196 {
0197     u32 ecccfg0;
0198 
0199     ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG);
0200 
0201     if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0))
0202         return SCRUB_NONE;
0203     else
0204         return SCRUB_HW_SRC;
0205 }
0206 
0207 static void devm_al_mc_edac_free(void *data)
0208 {
0209     edac_mc_free(data);
0210 }
0211 
0212 static void devm_al_mc_edac_del(void *data)
0213 {
0214     edac_mc_del_mc(data);
0215 }
0216 
0217 static int al_mc_edac_probe(struct platform_device *pdev)
0218 {
0219     struct edac_mc_layer layers[1];
0220     struct mem_ctl_info *mci;
0221     struct al_mc_edac *al_mc;
0222     void __iomem *mmio_base;
0223     struct dimm_info *dimm;
0224     int ret;
0225 
0226     mmio_base = devm_platform_ioremap_resource(pdev, 0);
0227     if (IS_ERR(mmio_base)) {
0228         dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
0229             PTR_ERR(mmio_base));
0230         return PTR_ERR(mmio_base);
0231     }
0232 
0233     layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
0234     layers[0].size = 1;
0235     layers[0].is_virt_csrow = false;
0236     mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
0237                 sizeof(struct al_mc_edac));
0238     if (!mci)
0239         return -ENOMEM;
0240 
0241     ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_free, mci);
0242     if (ret)
0243         return ret;
0244 
0245     platform_set_drvdata(pdev, mci);
0246     al_mc = mci->pvt_info;
0247 
0248     al_mc->mmio_base = mmio_base;
0249 
0250     al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue");
0251     if (al_mc->irq_ue <= 0)
0252         dev_dbg(&pdev->dev,
0253             "no IRQ defined for UE - falling back to polling\n");
0254 
0255     al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce");
0256     if (al_mc->irq_ce <= 0)
0257         dev_dbg(&pdev->dev,
0258             "no IRQ defined for CE - falling back to polling\n");
0259 
0260     /*
0261      * In case both interrupts (ue/ce) are to be found, use interrupt mode.
0262      * In case none of the interrupt are foud, use polling mode.
0263      * In case only one interrupt is found, use interrupt mode for it but
0264      * keep polling mode enable for the other.
0265      */
0266     if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) {
0267         edac_op_state = EDAC_OPSTATE_POLL;
0268         mci->edac_check = al_mc_edac_check;
0269     } else {
0270         edac_op_state = EDAC_OPSTATE_INT;
0271     }
0272 
0273     spin_lock_init(&al_mc->lock);
0274 
0275     mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
0276     mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
0277     mci->edac_cap = EDAC_FLAG_SECDED;
0278     mci->mod_name = DRV_NAME;
0279     mci->ctl_name = "al_mc";
0280     mci->pdev = &pdev->dev;
0281     mci->scrub_mode = get_scrub_mode(mmio_base);
0282 
0283     dimm = *mci->dimms;
0284     dimm->grain = 1;
0285 
0286     ret = edac_mc_add_mc(mci);
0287     if (ret < 0) {
0288         dev_err(&pdev->dev,
0289             "fail to add memory controller device (%d)\n",
0290             ret);
0291         return ret;
0292     }
0293 
0294     ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_del, &pdev->dev);
0295     if (ret)
0296         return ret;
0297 
0298     if (al_mc->irq_ue > 0) {
0299         ret = devm_request_irq(&pdev->dev,
0300                        al_mc->irq_ue,
0301                        al_mc_edac_irq_handler_ue,
0302                        IRQF_SHARED,
0303                        pdev->name,
0304                        pdev);
0305         if (ret != 0) {
0306             dev_err(&pdev->dev,
0307                 "failed to request UE IRQ %d (%d)\n",
0308                 al_mc->irq_ue, ret);
0309             return ret;
0310         }
0311     }
0312 
0313     if (al_mc->irq_ce > 0) {
0314         ret = devm_request_irq(&pdev->dev,
0315                        al_mc->irq_ce,
0316                        al_mc_edac_irq_handler_ce,
0317                        IRQF_SHARED,
0318                        pdev->name,
0319                        pdev);
0320         if (ret != 0) {
0321             dev_err(&pdev->dev,
0322                 "failed to request CE IRQ %d (%d)\n",
0323                 al_mc->irq_ce, ret);
0324             return ret;
0325         }
0326     }
0327 
0328     return 0;
0329 }
0330 
0331 static const struct of_device_id al_mc_edac_of_match[] = {
0332     { .compatible = "amazon,al-mc-edac", },
0333     {},
0334 };
0335 
0336 MODULE_DEVICE_TABLE(of, al_mc_edac_of_match);
0337 
0338 static struct platform_driver al_mc_edac_driver = {
0339     .probe = al_mc_edac_probe,
0340     .driver = {
0341         .name = DRV_NAME,
0342         .of_match_table = al_mc_edac_of_match,
0343     },
0344 };
0345 
0346 module_platform_driver(al_mc_edac_driver);
0347 
0348 MODULE_LICENSE("GPL v2");
0349 MODULE_AUTHOR("Talel Shenhar");
0350 MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver");