Back to home page

LXR

 
 

    


0001 /*
0002  * MCE grading rules.
0003  * Copyright 2008, 2009 Intel Corporation.
0004  *
0005  * This program is free software; you can redistribute it and/or
0006  * modify it under the terms of the GNU General Public License
0007  * as published by the Free Software Foundation; version 2
0008  * of the License.
0009  *
0010  * Author: Andi Kleen
0011  */
0012 #include <linux/kernel.h>
0013 #include <linux/seq_file.h>
0014 #include <linux/init.h>
0015 #include <linux/debugfs.h>
0016 #include <asm/mce.h>
0017 #include <linux/uaccess.h>
0018 
0019 #include "mce-internal.h"
0020 
0021 /*
0022  * Grade an mce by severity. In general the most severe ones are processed
0023  * first. Since there are quite a lot of combinations test the bits in a
0024  * table-driven way. The rules are simply processed in order, first
0025  * match wins.
0026  *
0027  * Note this is only used for machine check exceptions, the corrected
0028  * errors use much simpler rules. The exceptions still check for the corrected
0029  * errors, but only to leave them alone for the CMCI handler (except for
0030  * panic situations)
0031  */
0032 
0033 enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
0034 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
0035 enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
0036 
0037 static struct severity {
0038     u64 mask;
0039     u64 result;
0040     unsigned char sev;
0041     unsigned char mcgmask;
0042     unsigned char mcgres;
0043     unsigned char ser;
0044     unsigned char context;
0045     unsigned char excp;
0046     unsigned char covered;
0047     char *msg;
0048 } severities[] = {
0049 #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
0050 #define  KERNEL     .context = IN_KERNEL
0051 #define  USER       .context = IN_USER
0052 #define  KERNEL_RECOV   .context = IN_KERNEL_RECOV
0053 #define  SER        .ser = SER_REQUIRED
0054 #define  NOSER      .ser = NO_SER
0055 #define  EXCP       .excp = EXCP_CONTEXT
0056 #define  NOEXCP     .excp = NO_EXCP
0057 #define  BITCLR(x)  .mask = x, .result = 0
0058 #define  BITSET(x)  .mask = x, .result = x
0059 #define  MCGMASK(x, y)  .mcgmask = x, .mcgres = y
0060 #define  MASK(x, y) .mask = x, .result = y
0061 #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
0062 #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
0063 #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
0064 
0065     MCESEV(
0066         NO, "Invalid",
0067         BITCLR(MCI_STATUS_VAL)
0068         ),
0069     MCESEV(
0070         NO, "Not enabled",
0071         EXCP, BITCLR(MCI_STATUS_EN)
0072         ),
0073     MCESEV(
0074         PANIC, "Processor context corrupt",
0075         BITSET(MCI_STATUS_PCC)
0076         ),
0077     /* When MCIP is not set something is very confused */
0078     MCESEV(
0079         PANIC, "MCIP not set in MCA handler",
0080         EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
0081         ),
0082     /* Neither return not error IP -- no chance to recover -> PANIC */
0083     MCESEV(
0084         PANIC, "Neither restart nor error IP",
0085         EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
0086         ),
0087     MCESEV(
0088         PANIC, "In kernel and no restart IP",
0089         EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
0090         ),
0091     MCESEV(
0092         PANIC, "In kernel and no restart IP",
0093         EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
0094         ),
0095     MCESEV(
0096         DEFERRED, "Deferred error",
0097         NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
0098         ),
0099     MCESEV(
0100         KEEP, "Corrected error",
0101         NOSER, BITCLR(MCI_STATUS_UC)
0102         ),
0103 
0104     /* ignore OVER for UCNA */
0105     MCESEV(
0106         UCNA, "Uncorrected no action required",
0107         SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
0108         ),
0109     MCESEV(
0110         PANIC, "Illegal combination (UCNA with AR=1)",
0111         SER,
0112         MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
0113         ),
0114     MCESEV(
0115         KEEP, "Non signalled machine check",
0116         SER, BITCLR(MCI_STATUS_S)
0117         ),
0118 
0119     MCESEV(
0120         PANIC, "Action required with lost events",
0121         SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
0122         ),
0123 
0124     /* known AR MCACODs: */
0125 #ifdef  CONFIG_MEMORY_FAILURE
0126     MCESEV(
0127         KEEP, "Action required but unaffected thread is continuable",
0128         SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
0129         MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
0130         ),
0131     MCESEV(
0132         AR, "Action required: data load in error recoverable area of kernel",
0133         SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
0134         KERNEL_RECOV
0135         ),
0136     MCESEV(
0137         AR, "Action required: data load error in a user process",
0138         SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
0139         USER
0140         ),
0141     MCESEV(
0142         AR, "Action required: instruction fetch error in a user process",
0143         SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
0144         USER
0145         ),
0146 #endif
0147     MCESEV(
0148         PANIC, "Action required: unknown MCACOD",
0149         SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
0150         ),
0151 
0152     /* known AO MCACODs: */
0153     MCESEV(
0154         AO, "Action optional: memory scrubbing error",
0155         SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
0156         ),
0157     MCESEV(
0158         AO, "Action optional: last level cache writeback error",
0159         SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
0160         ),
0161     MCESEV(
0162         SOME, "Action optional: unknown MCACOD",
0163         SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
0164         ),
0165     MCESEV(
0166         SOME, "Action optional with lost events",
0167         SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
0168         ),
0169 
0170     MCESEV(
0171         PANIC, "Overflowed uncorrected",
0172         BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
0173         ),
0174     MCESEV(
0175         UC, "Uncorrected",
0176         BITSET(MCI_STATUS_UC)
0177         ),
0178     MCESEV(
0179         SOME, "No match",
0180         BITSET(0)
0181         )   /* always matches. keep at end */
0182 };
0183 
0184 #define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
0185                 (MCG_STATUS_RIPV|MCG_STATUS_EIPV))
0186 
0187 /*
0188  * If mcgstatus indicated that ip/cs on the stack were
0189  * no good, then "m->cs" will be zero and we will have
0190  * to assume the worst case (IN_KERNEL) as we actually
0191  * have no idea what we were executing when the machine
0192  * check hit.
0193  * If we do have a good "m->cs" (or a faked one in the
0194  * case we were executing in VM86 mode) we can use it to
0195  * distinguish an exception taken in user from from one
0196  * taken in the kernel.
0197  */
0198 static int error_context(struct mce *m)
0199 {
0200     if ((m->cs & 3) == 3)
0201         return IN_USER;
0202     if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
0203         return IN_KERNEL_RECOV;
0204     return IN_KERNEL;
0205 }
0206 
0207 static int mce_severity_amd_smca(struct mce *m, int err_ctx)
0208 {
0209     u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
0210     u32 low, high;
0211 
0212     /*
0213      * We need to look at the following bits:
0214      * - "succor" bit (data poisoning support), and
0215      * - TCC bit (Task Context Corrupt)
0216      * in MCi_STATUS to determine error severity.
0217      */
0218     if (!mce_flags.succor)
0219         return MCE_PANIC_SEVERITY;
0220 
0221     if (rdmsr_safe(addr, &low, &high))
0222         return MCE_PANIC_SEVERITY;
0223 
0224     /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
0225     if ((low & MCI_CONFIG_MCAX) &&
0226         (m->status & MCI_STATUS_TCC) &&
0227         (err_ctx == IN_KERNEL))
0228         return MCE_PANIC_SEVERITY;
0229 
0230      /* ...otherwise invoke hwpoison handler. */
0231     return MCE_AR_SEVERITY;
0232 }
0233 
0234 /*
0235  * See AMD Error Scope Hierarchy table in a newer BKDG. For example
0236  * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
0237  */
0238 static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
0239 {
0240     enum context ctx = error_context(m);
0241 
0242     /* Processor Context Corrupt, no need to fumble too much, die! */
0243     if (m->status & MCI_STATUS_PCC)
0244         return MCE_PANIC_SEVERITY;
0245 
0246     if (m->status & MCI_STATUS_UC) {
0247 
0248         /*
0249          * On older systems where overflow_recov flag is not present, we
0250          * should simply panic if an error overflow occurs. If
0251          * overflow_recov flag is present and set, then software can try
0252          * to at least kill process to prolong system operation.
0253          */
0254         if (mce_flags.overflow_recov) {
0255             if (mce_flags.smca)
0256                 return mce_severity_amd_smca(m, ctx);
0257 
0258             /* software can try to contain */
0259             if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
0260                 return MCE_PANIC_SEVERITY;
0261 
0262             /* kill current process */
0263             return MCE_AR_SEVERITY;
0264         } else {
0265             /* at least one error was not logged */
0266             if (m->status & MCI_STATUS_OVER)
0267                 return MCE_PANIC_SEVERITY;
0268         }
0269 
0270         /*
0271          * For any other case, return MCE_UC_SEVERITY so that we log the
0272          * error and exit #MC handler.
0273          */
0274         return MCE_UC_SEVERITY;
0275     }
0276 
0277     /*
0278      * deferred error: poll handler catches these and adds to mce_ring so
0279      * memory-failure can take recovery actions.
0280      */
0281     if (m->status & MCI_STATUS_DEFERRED)
0282         return MCE_DEFERRED_SEVERITY;
0283 
0284     /*
0285      * corrected error: poll handler catches these and passes responsibility
0286      * of decoding the error to EDAC
0287      */
0288     return MCE_KEEP_SEVERITY;
0289 }
0290 
0291 static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
0292 {
0293     enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
0294     enum context ctx = error_context(m);
0295     struct severity *s;
0296 
0297     for (s = severities;; s++) {
0298         if ((m->status & s->mask) != s->result)
0299             continue;
0300         if ((m->mcgstatus & s->mcgmask) != s->mcgres)
0301             continue;
0302         if (s->ser == SER_REQUIRED && !mca_cfg.ser)
0303             continue;
0304         if (s->ser == NO_SER && mca_cfg.ser)
0305             continue;
0306         if (s->context && ctx != s->context)
0307             continue;
0308         if (s->excp && excp != s->excp)
0309             continue;
0310         if (msg)
0311             *msg = s->msg;
0312         s->covered = 1;
0313         if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
0314             if (tolerant < 1)
0315                 return MCE_PANIC_SEVERITY;
0316         }
0317         return s->sev;
0318     }
0319 }
0320 
0321 /* Default to mce_severity_intel */
0322 int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
0323             mce_severity_intel;
0324 
0325 void __init mcheck_vendor_init_severity(void)
0326 {
0327     if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
0328         mce_severity = mce_severity_amd;
0329 }
0330 
0331 #ifdef CONFIG_DEBUG_FS
0332 static void *s_start(struct seq_file *f, loff_t *pos)
0333 {
0334     if (*pos >= ARRAY_SIZE(severities))
0335         return NULL;
0336     return &severities[*pos];
0337 }
0338 
0339 static void *s_next(struct seq_file *f, void *data, loff_t *pos)
0340 {
0341     if (++(*pos) >= ARRAY_SIZE(severities))
0342         return NULL;
0343     return &severities[*pos];
0344 }
0345 
0346 static void s_stop(struct seq_file *f, void *data)
0347 {
0348 }
0349 
0350 static int s_show(struct seq_file *f, void *data)
0351 {
0352     struct severity *ser = data;
0353     seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
0354     return 0;
0355 }
0356 
0357 static const struct seq_operations severities_seq_ops = {
0358     .start  = s_start,
0359     .next   = s_next,
0360     .stop   = s_stop,
0361     .show   = s_show,
0362 };
0363 
0364 static int severities_coverage_open(struct inode *inode, struct file *file)
0365 {
0366     return seq_open(file, &severities_seq_ops);
0367 }
0368 
0369 static ssize_t severities_coverage_write(struct file *file,
0370                      const char __user *ubuf,
0371                      size_t count, loff_t *ppos)
0372 {
0373     int i;
0374     for (i = 0; i < ARRAY_SIZE(severities); i++)
0375         severities[i].covered = 0;
0376     return count;
0377 }
0378 
0379 static const struct file_operations severities_coverage_fops = {
0380     .open       = severities_coverage_open,
0381     .release    = seq_release,
0382     .read       = seq_read,
0383     .write      = severities_coverage_write,
0384     .llseek     = seq_lseek,
0385 };
0386 
0387 static int __init severities_debugfs_init(void)
0388 {
0389     struct dentry *dmce, *fsev;
0390 
0391     dmce = mce_get_debugfs_dir();
0392     if (!dmce)
0393         goto err_out;
0394 
0395     fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
0396                    &severities_coverage_fops);
0397     if (!fsev)
0398         goto err_out;
0399 
0400     return 0;
0401 
0402 err_out:
0403     return -ENOMEM;
0404 }
0405 late_initcall(severities_debugfs_init);
0406 #endif /* CONFIG_DEBUG_FS */