Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *  Bus error event handling code for systems equipped with ECC
0004  *  handling logic, i.e. DECstation/DECsystem 5000/200 (KN02),
0005  *  5000/240 (KN03), 5000/260 (KN05) and DECsystem 5900 (KN03),
0006  *  5900/260 (KN05) systems.
0007  *
0008  *  Copyright (c) 2003, 2005  Maciej W. Rozycki
0009  */
0010 
0011 #include <linux/init.h>
0012 #include <linux/interrupt.h>
0013 #include <linux/kernel.h>
0014 #include <linux/sched.h>
0015 #include <linux/types.h>
0016 
0017 #include <asm/addrspace.h>
0018 #include <asm/bootinfo.h>
0019 #include <asm/cpu.h>
0020 #include <asm/cpu-type.h>
0021 #include <asm/irq_regs.h>
0022 #include <asm/processor.h>
0023 #include <asm/ptrace.h>
0024 #include <asm/traps.h>
0025 
0026 #include <asm/dec/ecc.h>
0027 #include <asm/dec/kn02.h>
0028 #include <asm/dec/kn03.h>
0029 #include <asm/dec/kn05.h>
0030 
0031 static volatile u32 *kn0x_erraddr;
0032 static volatile u32 *kn0x_chksyn;
0033 
0034 static inline void dec_ecc_be_ack(void)
0035 {
0036     *kn0x_erraddr = 0;          /* any write clears the IRQ */
0037     iob();
0038 }
0039 
0040 static int dec_ecc_be_backend(struct pt_regs *regs, int is_fixup, int invoker)
0041 {
0042     static const char excstr[] = "exception";
0043     static const char intstr[] = "interrupt";
0044     static const char cpustr[] = "CPU";
0045     static const char dmastr[] = "DMA";
0046     static const char readstr[] = "read";
0047     static const char mreadstr[] = "memory read";
0048     static const char writestr[] = "write";
0049     static const char mwritstr[] = "partial memory write";
0050     static const char timestr[] = "timeout";
0051     static const char overstr[] = "overrun";
0052     static const char eccstr[] = "ECC error";
0053 
0054     const char *kind, *agent, *cycle, *event;
0055     const char *status = "", *xbit = "", *fmt = "";
0056     unsigned long address;
0057     u16 syn = 0, sngl;
0058 
0059     int i = 0;
0060 
0061     u32 erraddr = *kn0x_erraddr;
0062     u32 chksyn = *kn0x_chksyn;
0063     int action = MIPS_BE_FATAL;
0064 
0065     /* For non-ECC ack ASAP, so that any subsequent errors get caught. */
0066     if ((erraddr & (KN0X_EAR_VALID | KN0X_EAR_ECCERR)) == KN0X_EAR_VALID)
0067         dec_ecc_be_ack();
0068 
0069     kind = invoker ? intstr : excstr;
0070 
0071     if (!(erraddr & KN0X_EAR_VALID)) {
0072         /* No idea what happened. */
0073         printk(KERN_ALERT "Unidentified bus error %s\n", kind);
0074         return action;
0075     }
0076 
0077     agent = (erraddr & KN0X_EAR_CPU) ? cpustr : dmastr;
0078 
0079     if (erraddr & KN0X_EAR_ECCERR) {
0080         /* An ECC error on a CPU or DMA transaction. */
0081         cycle = (erraddr & KN0X_EAR_WRITE) ? mwritstr : mreadstr;
0082         event = eccstr;
0083     } else {
0084         /* A CPU timeout or a DMA overrun. */
0085         cycle = (erraddr & KN0X_EAR_WRITE) ? writestr : readstr;
0086         event = (erraddr & KN0X_EAR_CPU) ? timestr : overstr;
0087     }
0088 
0089     address = erraddr & KN0X_EAR_ADDRESS;
0090     /* For ECC errors on reads adjust for MT pipelining. */
0091     if ((erraddr & (KN0X_EAR_WRITE | KN0X_EAR_ECCERR)) == KN0X_EAR_ECCERR)
0092         address = (address & ~0xfffLL) | ((address - 5) & 0xfffLL);
0093     address <<= 2;
0094 
0095     /* Only CPU errors are fixable. */
0096     if (erraddr & KN0X_EAR_CPU && is_fixup)
0097         action = MIPS_BE_FIXUP;
0098 
0099     if (erraddr & KN0X_EAR_ECCERR) {
0100         static const u8 data_sbit[32] = {
0101             0x4f, 0x4a, 0x52, 0x54, 0x57, 0x58, 0x5b, 0x5d,
0102             0x23, 0x25, 0x26, 0x29, 0x2a, 0x2c, 0x31, 0x34,
0103             0x0e, 0x0b, 0x13, 0x15, 0x16, 0x19, 0x1a, 0x1c,
0104             0x62, 0x64, 0x67, 0x68, 0x6b, 0x6d, 0x70, 0x75,
0105         };
0106         static const u8 data_mbit[25] = {
0107             0x07, 0x0d, 0x1f,
0108             0x2f, 0x32, 0x37, 0x38, 0x3b, 0x3d, 0x3e,
0109             0x43, 0x45, 0x46, 0x49, 0x4c, 0x51, 0x5e,
0110             0x61, 0x6e, 0x73, 0x76, 0x79, 0x7a, 0x7c, 0x7f,
0111         };
0112         static const char sbestr[] = "corrected single";
0113         static const char dbestr[] = "uncorrectable double";
0114         static const char mbestr[] = "uncorrectable multiple";
0115 
0116         if (!(address & 0x4))
0117             syn = chksyn;           /* Low bank. */
0118         else
0119             syn = chksyn >> 16;     /* High bank. */
0120 
0121         if (!(syn & KN0X_ESR_VLDLO)) {
0122             /* Ack now, no rewrite will happen. */
0123             dec_ecc_be_ack();
0124 
0125             fmt = KERN_ALERT "%s" "invalid\n";
0126         } else {
0127             sngl = syn & KN0X_ESR_SNGLO;
0128             syn &= KN0X_ESR_SYNLO;
0129 
0130             /*
0131              * Multibit errors may be tagged incorrectly;
0132              * check the syndrome explicitly.
0133              */
0134             for (i = 0; i < 25; i++)
0135                 if (syn == data_mbit[i])
0136                     break;
0137 
0138             if (i < 25) {
0139                 status = mbestr;
0140             } else if (!sngl) {
0141                 status = dbestr;
0142             } else {
0143                 volatile u32 *ptr =
0144                     (void *)CKSEG1ADDR(address);
0145 
0146                 *ptr = *ptr;        /* Rewrite. */
0147                 iob();
0148 
0149                 status = sbestr;
0150                 action = MIPS_BE_DISCARD;
0151             }
0152 
0153             /* Ack now, now we've rewritten (or not). */
0154             dec_ecc_be_ack();
0155 
0156             if (syn && syn == (syn & -syn)) {
0157                 if (syn == 0x01) {
0158                     fmt = KERN_ALERT "%s"
0159                           "%#04x -- %s bit error "
0160                           "at check bit C%s\n";
0161                     xbit = "X";
0162                 } else {
0163                     fmt = KERN_ALERT "%s"
0164                           "%#04x -- %s bit error "
0165                           "at check bit C%s%u\n";
0166                 }
0167                 i = syn >> 2;
0168             } else {
0169                 for (i = 0; i < 32; i++)
0170                     if (syn == data_sbit[i])
0171                         break;
0172                 if (i < 32)
0173                     fmt = KERN_ALERT "%s"
0174                           "%#04x -- %s bit error "
0175                           "at data bit D%s%u\n";
0176                 else
0177                     fmt = KERN_ALERT "%s"
0178                           "%#04x -- %s bit error\n";
0179             }
0180         }
0181     }
0182 
0183     if (action != MIPS_BE_FIXUP)
0184         printk(KERN_ALERT "Bus error %s: %s %s %s at %#010lx\n",
0185             kind, agent, cycle, event, address);
0186 
0187     if (action != MIPS_BE_FIXUP && erraddr & KN0X_EAR_ECCERR)
0188         printk(fmt, "  ECC syndrome ", syn, status, xbit, i);
0189 
0190     return action;
0191 }
0192 
0193 int dec_ecc_be_handler(struct pt_regs *regs, int is_fixup)
0194 {
0195     return dec_ecc_be_backend(regs, is_fixup, 0);
0196 }
0197 
0198 irqreturn_t dec_ecc_be_interrupt(int irq, void *dev_id)
0199 {
0200     struct pt_regs *regs = get_irq_regs();
0201 
0202     int action = dec_ecc_be_backend(regs, 0, 1);
0203 
0204     if (action == MIPS_BE_DISCARD)
0205         return IRQ_HANDLED;
0206 
0207     /*
0208      * FIXME: Find the affected processes and kill them, otherwise
0209      * we must die.
0210      *
0211      * The interrupt is asynchronously delivered thus EPC and RA
0212      * may be irrelevant, but are printed for a reference.
0213      */
0214     printk(KERN_ALERT "Fatal bus interrupt, epc == %08lx, ra == %08lx\n",
0215            regs->cp0_epc, regs->regs[31]);
0216     die("Unrecoverable bus error", regs);
0217 }
0218 
0219 
0220 /*
0221  * Initialization differs a bit between KN02 and KN03/KN05, so we
0222  * need two variants.  Once set up, all systems can be handled the
0223  * same way.
0224  */
0225 static inline void dec_kn02_be_init(void)
0226 {
0227     volatile u32 *csr = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_CSR);
0228 
0229     kn0x_erraddr = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_ERRADDR);
0230     kn0x_chksyn = (void *)CKSEG1ADDR(KN02_SLOT_BASE + KN02_CHKSYN);
0231 
0232     /* Preset write-only bits of the Control Register cache. */
0233     cached_kn02_csr = *csr | KN02_CSR_LEDS;
0234 
0235     /* Set normal ECC detection and generation. */
0236     cached_kn02_csr &= ~(KN02_CSR_DIAGCHK | KN02_CSR_DIAGGEN);
0237     /* Enable ECC correction. */
0238     cached_kn02_csr |= KN02_CSR_CORRECT;
0239     *csr = cached_kn02_csr;
0240     iob();
0241 }
0242 
0243 static inline void dec_kn03_be_init(void)
0244 {
0245     volatile u32 *mcr = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_MCR);
0246     volatile u32 *mbcs = (void *)CKSEG1ADDR(KN4K_SLOT_BASE + KN4K_MB_CSR);
0247 
0248     kn0x_erraddr = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_ERRADDR);
0249     kn0x_chksyn = (void *)CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_CHKSYN);
0250 
0251     /*
0252      * Set normal ECC detection and generation, enable ECC correction.
0253      * For KN05 we also need to make sure EE (?) is enabled in the MB.
0254      * Otherwise DBE/IBE exceptions would be masked but bus error
0255      * interrupts would still arrive, resulting in an inevitable crash
0256      * if get_dbe() triggers one.
0257      */
0258     *mcr = (*mcr & ~(KN03_MCR_DIAGCHK | KN03_MCR_DIAGGEN)) |
0259            KN03_MCR_CORRECT;
0260     if (current_cpu_type() == CPU_R4400SC)
0261         *mbcs |= KN4K_MB_CSR_EE;
0262     fast_iob();
0263 }
0264 
0265 void __init dec_ecc_be_init(void)
0266 {
0267     if (mips_machtype == MACH_DS5000_200)
0268         dec_kn02_be_init();
0269     else
0270         dec_kn03_be_init();
0271 
0272     /* Clear any leftover errors from the firmware. */
0273     dec_ecc_be_ack();
0274 }