Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Intel D82875P Memory Controller kernel module
0003  * (C) 2003 Linux Networx (http://lnxi.com)
0004  * This file may be distributed under the terms of the
0005  * GNU General Public License.
0006  *
0007  * Written by Thayne Harbaugh
0008  * Contributors:
0009  *  Wang Zhenyu at intel.com
0010  *
0011  * $Id: edac_i82875p.c,v 1.5.2.11 2005/10/05 00:43:44 dsp_llnl Exp $
0012  *
0013  * Note: E7210 appears same as D82875P - zhenyu.z.wang at intel.com
0014  */
0015 
0016 #include <linux/module.h>
0017 #include <linux/init.h>
0018 #include <linux/pci.h>
0019 #include <linux/pci_ids.h>
0020 #include <linux/edac.h>
0021 #include "edac_module.h"
0022 
0023 #define EDAC_MOD_STR        "i82875p_edac"
0024 
0025 #define i82875p_printk(level, fmt, arg...) \
0026     edac_printk(level, "i82875p", fmt, ##arg)
0027 
0028 #define i82875p_mc_printk(mci, level, fmt, arg...) \
0029     edac_mc_chipset_printk(mci, level, "i82875p", fmt, ##arg)
0030 
0031 #ifndef PCI_DEVICE_ID_INTEL_82875_0
0032 #define PCI_DEVICE_ID_INTEL_82875_0 0x2578
0033 #endif              /* PCI_DEVICE_ID_INTEL_82875_0 */
0034 
0035 #ifndef PCI_DEVICE_ID_INTEL_82875_6
0036 #define PCI_DEVICE_ID_INTEL_82875_6 0x257e
0037 #endif              /* PCI_DEVICE_ID_INTEL_82875_6 */
0038 
0039 /* four csrows in dual channel, eight in single channel */
0040 #define I82875P_NR_DIMMS        8
0041 #define I82875P_NR_CSROWS(nr_chans) (I82875P_NR_DIMMS / (nr_chans))
0042 
0043 /* Intel 82875p register addresses - device 0 function 0 - DRAM Controller */
0044 #define I82875P_EAP     0x58    /* Error Address Pointer (32b)
0045                      *
0046                      * 31:12 block address
0047                      * 11:0  reserved
0048                      */
0049 
0050 #define I82875P_DERRSYN     0x5c    /* DRAM Error Syndrome (8b)
0051                      *
0052                      *  7:0  DRAM ECC Syndrome
0053                      */
0054 
0055 #define I82875P_DES     0x5d    /* DRAM Error Status (8b)
0056                      *
0057                      *  7:1  reserved
0058                      *  0    Error channel 0/1
0059                      */
0060 
0061 #define I82875P_ERRSTS      0xc8    /* Error Status Register (16b)
0062                      *
0063                      * 15:10 reserved
0064                      *  9    non-DRAM lock error (ndlock)
0065                      *  8    Sftwr Generated SMI
0066                      *  7    ECC UE
0067                      *  6    reserved
0068                      *  5    MCH detects unimplemented cycle
0069                      *  4    AGP access outside GA
0070                      *  3    Invalid AGP access
0071                      *  2    Invalid GA translation table
0072                      *  1    Unsupported AGP command
0073                      *  0    ECC CE
0074                      */
0075 
0076 #define I82875P_ERRCMD      0xca    /* Error Command (16b)
0077                      *
0078                      * 15:10 reserved
0079                      *  9    SERR on non-DRAM lock
0080                      *  8    SERR on ECC UE
0081                      *  7    SERR on ECC CE
0082                      *  6    target abort on high exception
0083                      *  5    detect unimplemented cyc
0084                      *  4    AGP access outside of GA
0085                      *  3    SERR on invalid AGP access
0086                      *  2    invalid translation table
0087                      *  1    SERR on unsupported AGP command
0088                      *  0    reserved
0089                      */
0090 
0091 /* Intel 82875p register addresses - device 6 function 0 - DRAM Controller */
0092 #define I82875P_PCICMD6     0x04    /* PCI Command Register (16b)
0093                      *
0094                      * 15:10 reserved
0095                      *  9    fast back-to-back - ro 0
0096                      *  8    SERR enable - ro 0
0097                      *  7    addr/data stepping - ro 0
0098                      *  6    parity err enable - ro 0
0099                      *  5    VGA palette snoop - ro 0
0100                      *  4    mem wr & invalidate - ro 0
0101                      *  3    special cycle - ro 0
0102                      *  2    bus master - ro 0
0103                      *  1    mem access dev6 - 0(dis),1(en)
0104                      *  0    IO access dev3 - 0(dis),1(en)
0105                      */
0106 
0107 #define I82875P_BAR6        0x10    /* Mem Delays Base ADDR Reg (32b)
0108                      *
0109                      * 31:12 mem base addr [31:12]
0110                      * 11:4  address mask - ro 0
0111                      *  3    prefetchable - ro 0(non),1(pre)
0112                      *  2:1  mem type - ro 0
0113                      *  0    mem space - ro 0
0114                      */
0115 
0116 /* Intel 82875p MMIO register space - device 0 function 0 - MMR space */
0117 
0118 #define I82875P_DRB_SHIFT 26    /* 64MiB grain */
0119 #define I82875P_DRB     0x00    /* DRAM Row Boundary (8b x 8)
0120                      *
0121                      *  7    reserved
0122                      *  6:0  64MiB row boundary addr
0123                      */
0124 
0125 #define I82875P_DRA     0x10    /* DRAM Row Attribute (4b x 8)
0126                      *
0127                      *  7    reserved
0128                      *  6:4  row attr row 1
0129                      *  3    reserved
0130                      *  2:0  row attr row 0
0131                      *
0132                      * 000 =  4KiB
0133                      * 001 =  8KiB
0134                      * 010 = 16KiB
0135                      * 011 = 32KiB
0136                      */
0137 
0138 #define I82875P_DRC     0x68    /* DRAM Controller Mode (32b)
0139                      *
0140                      * 31:30 reserved
0141                      * 29    init complete
0142                      * 28:23 reserved
0143                      * 22:21 nr chan 00=1,01=2
0144                      * 20    reserved
0145                      * 19:18 Data Integ Mode 00=none,01=ecc
0146                      * 17:11 reserved
0147                      * 10:8  refresh mode
0148                      *  7    reserved
0149                      *  6:4  mode select
0150                      *  3:2  reserved
0151                      *  1:0  DRAM type 01=DDR
0152                      */
0153 
0154 enum i82875p_chips {
0155     I82875P = 0,
0156 };
0157 
0158 struct i82875p_pvt {
0159     struct pci_dev *ovrfl_pdev;
0160     void __iomem *ovrfl_window;
0161 };
0162 
0163 struct i82875p_dev_info {
0164     const char *ctl_name;
0165 };
0166 
0167 struct i82875p_error_info {
0168     u16 errsts;
0169     u32 eap;
0170     u8 des;
0171     u8 derrsyn;
0172     u16 errsts2;
0173 };
0174 
0175 static const struct i82875p_dev_info i82875p_devs[] = {
0176     [I82875P] = {
0177         .ctl_name = "i82875p"},
0178 };
0179 
0180 static struct pci_dev *mci_pdev;    /* init dev: in case that AGP code has
0181                      * already registered driver
0182                      */
0183 
0184 static struct edac_pci_ctl_info *i82875p_pci;
0185 
0186 static void i82875p_get_error_info(struct mem_ctl_info *mci,
0187                 struct i82875p_error_info *info)
0188 {
0189     struct pci_dev *pdev;
0190 
0191     pdev = to_pci_dev(mci->pdev);
0192 
0193     /*
0194      * This is a mess because there is no atomic way to read all the
0195      * registers at once and the registers can transition from CE being
0196      * overwritten by UE.
0197      */
0198     pci_read_config_word(pdev, I82875P_ERRSTS, &info->errsts);
0199 
0200     if (!(info->errsts & 0x0081))
0201         return;
0202 
0203     pci_read_config_dword(pdev, I82875P_EAP, &info->eap);
0204     pci_read_config_byte(pdev, I82875P_DES, &info->des);
0205     pci_read_config_byte(pdev, I82875P_DERRSYN, &info->derrsyn);
0206     pci_read_config_word(pdev, I82875P_ERRSTS, &info->errsts2);
0207 
0208     /*
0209      * If the error is the same then we can for both reads then
0210      * the first set of reads is valid.  If there is a change then
0211      * there is a CE no info and the second set of reads is valid
0212      * and should be UE info.
0213      */
0214     if ((info->errsts ^ info->errsts2) & 0x0081) {
0215         pci_read_config_dword(pdev, I82875P_EAP, &info->eap);
0216         pci_read_config_byte(pdev, I82875P_DES, &info->des);
0217         pci_read_config_byte(pdev, I82875P_DERRSYN, &info->derrsyn);
0218     }
0219 
0220     pci_write_bits16(pdev, I82875P_ERRSTS, 0x0081, 0x0081);
0221 }
0222 
0223 static int i82875p_process_error_info(struct mem_ctl_info *mci,
0224                 struct i82875p_error_info *info,
0225                 int handle_errors)
0226 {
0227     int row, multi_chan;
0228 
0229     multi_chan = mci->csrows[0]->nr_channels - 1;
0230 
0231     if (!(info->errsts & 0x0081))
0232         return 0;
0233 
0234     if (!handle_errors)
0235         return 1;
0236 
0237     if ((info->errsts ^ info->errsts2) & 0x0081) {
0238         edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
0239                      -1, -1, -1,
0240                      "UE overwrote CE", "");
0241         info->errsts = info->errsts2;
0242     }
0243 
0244     info->eap >>= PAGE_SHIFT;
0245     row = edac_mc_find_csrow_by_page(mci, info->eap);
0246 
0247     if (info->errsts & 0x0080)
0248         edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
0249                      info->eap, 0, 0,
0250                      row, -1, -1,
0251                      "i82875p UE", "");
0252     else
0253         edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0254                      info->eap, 0, info->derrsyn,
0255                      row, multi_chan ? (info->des & 0x1) : 0,
0256                      -1, "i82875p CE", "");
0257 
0258     return 1;
0259 }
0260 
0261 static void i82875p_check(struct mem_ctl_info *mci)
0262 {
0263     struct i82875p_error_info info;
0264 
0265     i82875p_get_error_info(mci, &info);
0266     i82875p_process_error_info(mci, &info, 1);
0267 }
0268 
0269 /* Return 0 on success or 1 on failure. */
0270 static int i82875p_setup_overfl_dev(struct pci_dev *pdev,
0271                 struct pci_dev **ovrfl_pdev,
0272                 void __iomem **ovrfl_window)
0273 {
0274     struct pci_dev *dev;
0275     void __iomem *window;
0276 
0277     *ovrfl_pdev = NULL;
0278     *ovrfl_window = NULL;
0279     dev = pci_get_device(PCI_VEND_DEV(INTEL, 82875_6), NULL);
0280 
0281     if (dev == NULL) {
0282         /* Intel tells BIOS developers to hide device 6 which
0283          * configures the overflow device access containing
0284          * the DRBs - this is where we expose device 6.
0285          * http://www.x86-secret.com/articles/tweak/pat/patsecrets-2.htm
0286          */
0287         pci_write_bits8(pdev, 0xf4, 0x2, 0x2);
0288         dev = pci_scan_single_device(pdev->bus, PCI_DEVFN(6, 0));
0289 
0290         if (dev == NULL)
0291             return 1;
0292 
0293         pci_bus_assign_resources(dev->bus);
0294         pci_bus_add_device(dev);
0295     }
0296 
0297     *ovrfl_pdev = dev;
0298 
0299     if (pci_enable_device(dev)) {
0300         i82875p_printk(KERN_ERR, "%s(): Failed to enable overflow "
0301             "device\n", __func__);
0302         return 1;
0303     }
0304 
0305     if (pci_request_regions(dev, pci_name(dev))) {
0306 #ifdef CORRECT_BIOS
0307         goto fail0;
0308 #endif
0309     }
0310 
0311     /* cache is irrelevant for PCI bus reads/writes */
0312     window = pci_ioremap_bar(dev, 0);
0313     if (window == NULL) {
0314         i82875p_printk(KERN_ERR, "%s(): Failed to ioremap bar6\n",
0315             __func__);
0316         goto fail1;
0317     }
0318 
0319     *ovrfl_window = window;
0320     return 0;
0321 
0322 fail1:
0323     pci_release_regions(dev);
0324 
0325 #ifdef CORRECT_BIOS
0326 fail0:
0327     pci_disable_device(dev);
0328 #endif
0329     /* NOTE: the ovrfl proc entry and pci_dev are intentionally left */
0330     return 1;
0331 }
0332 
0333 /* Return 1 if dual channel mode is active.  Else return 0. */
0334 static inline int dual_channel_active(u32 drc)
0335 {
0336     return (drc >> 21) & 0x1;
0337 }
0338 
0339 static void i82875p_init_csrows(struct mem_ctl_info *mci,
0340                 struct pci_dev *pdev,
0341                 void __iomem * ovrfl_window, u32 drc)
0342 {
0343     struct csrow_info *csrow;
0344     struct dimm_info *dimm;
0345     unsigned nr_chans = dual_channel_active(drc) + 1;
0346     unsigned long last_cumul_size;
0347     u8 value;
0348     u32 drc_ddim;       /* DRAM Data Integrity Mode 0=none,2=edac */
0349     u32 cumul_size, nr_pages;
0350     int index, j;
0351 
0352     drc_ddim = (drc >> 18) & 0x1;
0353     last_cumul_size = 0;
0354 
0355     /* The dram row boundary (DRB) reg values are boundary address
0356      * for each DRAM row with a granularity of 32 or 64MB (single/dual
0357      * channel operation).  DRB regs are cumulative; therefore DRB7 will
0358      * contain the total memory contained in all eight rows.
0359      */
0360 
0361     for (index = 0; index < mci->nr_csrows; index++) {
0362         csrow = mci->csrows[index];
0363 
0364         value = readb(ovrfl_window + I82875P_DRB + index);
0365         cumul_size = value << (I82875P_DRB_SHIFT - PAGE_SHIFT);
0366         edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
0367         if (cumul_size == last_cumul_size)
0368             continue;   /* not populated */
0369 
0370         csrow->first_page = last_cumul_size;
0371         csrow->last_page = cumul_size - 1;
0372         nr_pages = cumul_size - last_cumul_size;
0373         last_cumul_size = cumul_size;
0374 
0375         for (j = 0; j < nr_chans; j++) {
0376             dimm = csrow->channels[j]->dimm;
0377 
0378             dimm->nr_pages = nr_pages / nr_chans;
0379             dimm->grain = 1 << 12;  /* I82875P_EAP has 4KiB reolution */
0380             dimm->mtype = MEM_DDR;
0381             dimm->dtype = DEV_UNKNOWN;
0382             dimm->edac_mode = drc_ddim ? EDAC_SECDED : EDAC_NONE;
0383         }
0384     }
0385 }
0386 
0387 static int i82875p_probe1(struct pci_dev *pdev, int dev_idx)
0388 {
0389     int rc = -ENODEV;
0390     struct mem_ctl_info *mci;
0391     struct edac_mc_layer layers[2];
0392     struct i82875p_pvt *pvt;
0393     struct pci_dev *ovrfl_pdev;
0394     void __iomem *ovrfl_window;
0395     u32 drc;
0396     u32 nr_chans;
0397     struct i82875p_error_info discard;
0398 
0399     edac_dbg(0, "\n");
0400 
0401     if (i82875p_setup_overfl_dev(pdev, &ovrfl_pdev, &ovrfl_window))
0402         return -ENODEV;
0403     drc = readl(ovrfl_window + I82875P_DRC);
0404     nr_chans = dual_channel_active(drc) + 1;
0405 
0406     layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
0407     layers[0].size = I82875P_NR_CSROWS(nr_chans);
0408     layers[0].is_virt_csrow = true;
0409     layers[1].type = EDAC_MC_LAYER_CHANNEL;
0410     layers[1].size = nr_chans;
0411     layers[1].is_virt_csrow = false;
0412     mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
0413     if (!mci) {
0414         rc = -ENOMEM;
0415         goto fail0;
0416     }
0417 
0418     edac_dbg(3, "init mci\n");
0419     mci->pdev = &pdev->dev;
0420     mci->mtype_cap = MEM_FLAG_DDR;
0421     mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
0422     mci->edac_cap = EDAC_FLAG_UNKNOWN;
0423     mci->mod_name = EDAC_MOD_STR;
0424     mci->ctl_name = i82875p_devs[dev_idx].ctl_name;
0425     mci->dev_name = pci_name(pdev);
0426     mci->edac_check = i82875p_check;
0427     mci->ctl_page_to_phys = NULL;
0428     edac_dbg(3, "init pvt\n");
0429     pvt = (struct i82875p_pvt *)mci->pvt_info;
0430     pvt->ovrfl_pdev = ovrfl_pdev;
0431     pvt->ovrfl_window = ovrfl_window;
0432     i82875p_init_csrows(mci, pdev, ovrfl_window, drc);
0433     i82875p_get_error_info(mci, &discard);  /* clear counters */
0434 
0435     /* Here we assume that we will never see multiple instances of this
0436      * type of memory controller.  The ID is therefore hardcoded to 0.
0437      */
0438     if (edac_mc_add_mc(mci)) {
0439         edac_dbg(3, "failed edac_mc_add_mc()\n");
0440         goto fail1;
0441     }
0442 
0443     /* allocating generic PCI control info */
0444     i82875p_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
0445     if (!i82875p_pci) {
0446         printk(KERN_WARNING
0447             "%s(): Unable to create PCI control\n",
0448             __func__);
0449         printk(KERN_WARNING
0450             "%s(): PCI error report via EDAC not setup\n",
0451             __func__);
0452     }
0453 
0454     /* get this far and it's successful */
0455     edac_dbg(3, "success\n");
0456     return 0;
0457 
0458 fail1:
0459     edac_mc_free(mci);
0460 
0461 fail0:
0462     iounmap(ovrfl_window);
0463     pci_release_regions(ovrfl_pdev);
0464 
0465     pci_disable_device(ovrfl_pdev);
0466     /* NOTE: the ovrfl proc entry and pci_dev are intentionally left */
0467     return rc;
0468 }
0469 
0470 /* returns count (>= 0), or negative on error */
0471 static int i82875p_init_one(struct pci_dev *pdev,
0472                 const struct pci_device_id *ent)
0473 {
0474     int rc;
0475 
0476     edac_dbg(0, "\n");
0477     i82875p_printk(KERN_INFO, "i82875p init one\n");
0478 
0479     if (pci_enable_device(pdev) < 0)
0480         return -EIO;
0481 
0482     rc = i82875p_probe1(pdev, ent->driver_data);
0483 
0484     if (mci_pdev == NULL)
0485         mci_pdev = pci_dev_get(pdev);
0486 
0487     return rc;
0488 }
0489 
0490 static void i82875p_remove_one(struct pci_dev *pdev)
0491 {
0492     struct mem_ctl_info *mci;
0493     struct i82875p_pvt *pvt = NULL;
0494 
0495     edac_dbg(0, "\n");
0496 
0497     if (i82875p_pci)
0498         edac_pci_release_generic_ctl(i82875p_pci);
0499 
0500     if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
0501         return;
0502 
0503     pvt = (struct i82875p_pvt *)mci->pvt_info;
0504 
0505     if (pvt->ovrfl_window)
0506         iounmap(pvt->ovrfl_window);
0507 
0508     if (pvt->ovrfl_pdev) {
0509 #ifdef CORRECT_BIOS
0510         pci_release_regions(pvt->ovrfl_pdev);
0511 #endif              /*CORRECT_BIOS */
0512         pci_disable_device(pvt->ovrfl_pdev);
0513         pci_dev_put(pvt->ovrfl_pdev);
0514     }
0515 
0516     edac_mc_free(mci);
0517 }
0518 
0519 static const struct pci_device_id i82875p_pci_tbl[] = {
0520     {
0521      PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
0522      I82875P},
0523     {
0524      0,
0525      }          /* 0 terminated list. */
0526 };
0527 
0528 MODULE_DEVICE_TABLE(pci, i82875p_pci_tbl);
0529 
0530 static struct pci_driver i82875p_driver = {
0531     .name = EDAC_MOD_STR,
0532     .probe = i82875p_init_one,
0533     .remove = i82875p_remove_one,
0534     .id_table = i82875p_pci_tbl,
0535 };
0536 
0537 static int __init i82875p_init(void)
0538 {
0539     int pci_rc;
0540 
0541     edac_dbg(3, "\n");
0542 
0543        /* Ensure that the OPSTATE is set correctly for POLL or NMI */
0544        opstate_init();
0545 
0546     pci_rc = pci_register_driver(&i82875p_driver);
0547 
0548     if (pci_rc < 0)
0549         goto fail0;
0550 
0551     if (mci_pdev == NULL) {
0552         mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
0553                     PCI_DEVICE_ID_INTEL_82875_0, NULL);
0554 
0555         if (!mci_pdev) {
0556             edac_dbg(0, "875p pci_get_device fail\n");
0557             pci_rc = -ENODEV;
0558             goto fail1;
0559         }
0560 
0561         pci_rc = i82875p_init_one(mci_pdev, i82875p_pci_tbl);
0562 
0563         if (pci_rc < 0) {
0564             edac_dbg(0, "875p init fail\n");
0565             pci_rc = -ENODEV;
0566             goto fail1;
0567         }
0568     }
0569 
0570     return 0;
0571 
0572 fail1:
0573     pci_unregister_driver(&i82875p_driver);
0574 
0575 fail0:
0576     pci_dev_put(mci_pdev);
0577     return pci_rc;
0578 }
0579 
0580 static void __exit i82875p_exit(void)
0581 {
0582     edac_dbg(3, "\n");
0583 
0584     i82875p_remove_one(mci_pdev);
0585     pci_dev_put(mci_pdev);
0586 
0587     pci_unregister_driver(&i82875p_driver);
0588 
0589 }
0590 
0591 module_init(i82875p_init);
0592 module_exit(i82875p_exit);
0593 
0594 MODULE_LICENSE("GPL");
0595 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh");
0596 MODULE_DESCRIPTION("MC support for Intel 82875 memory hub controllers");
0597 
0598 module_param(edac_op_state, int, 0444);
0599 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");