0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/kernel.h>
0009 #include <linux/seq_file.h>
0010 #include <linux/init.h>
0011 #include <linux/debugfs.h>
0012 #include <linux/uaccess.h>
0013
0014 #include <asm/mce.h>
0015 #include <asm/intel-family.h>
0016 #include <asm/traps.h>
0017 #include <asm/insn.h>
0018 #include <asm/insn-eval.h>
0019
0020 #include "internal.h"
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
0035 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
0036 enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
0037
0038 static struct severity {
0039 u64 mask;
0040 u64 result;
0041 unsigned char sev;
0042 unsigned char mcgmask;
0043 unsigned char mcgres;
0044 unsigned char ser;
0045 unsigned char context;
0046 unsigned char excp;
0047 unsigned char covered;
0048 unsigned char cpu_model;
0049 unsigned char cpu_minstepping;
0050 unsigned char bank_lo, bank_hi;
0051 char *msg;
0052 } severities[] = {
0053 #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
0054 #define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h
0055 #define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s
0056 #define KERNEL .context = IN_KERNEL
0057 #define USER .context = IN_USER
0058 #define KERNEL_RECOV .context = IN_KERNEL_RECOV
0059 #define SER .ser = SER_REQUIRED
0060 #define NOSER .ser = NO_SER
0061 #define EXCP .excp = EXCP_CONTEXT
0062 #define NOEXCP .excp = NO_EXCP
0063 #define BITCLR(x) .mask = x, .result = 0
0064 #define BITSET(x) .mask = x, .result = x
0065 #define MCGMASK(x, y) .mcgmask = x, .mcgres = y
0066 #define MASK(x, y) .mask = x, .result = y
0067 #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
0068 #define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR)
0069 #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
0070 #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
0071
0072 MCESEV(
0073 NO, "Invalid",
0074 BITCLR(MCI_STATUS_VAL)
0075 ),
0076 MCESEV(
0077 NO, "Not enabled",
0078 EXCP, BITCLR(MCI_STATUS_EN)
0079 ),
0080 MCESEV(
0081 PANIC, "Processor context corrupt",
0082 BITSET(MCI_STATUS_PCC)
0083 ),
0084
0085 MCESEV(
0086 PANIC, "MCIP not set in MCA handler",
0087 EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
0088 ),
0089
0090 MCESEV(
0091 PANIC, "Neither restart nor error IP",
0092 EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
0093 ),
0094 MCESEV(
0095 PANIC, "In kernel and no restart IP",
0096 EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
0097 ),
0098 MCESEV(
0099 PANIC, "In kernel and no restart IP",
0100 EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
0101 ),
0102 MCESEV(
0103 KEEP, "Corrected error",
0104 NOSER, BITCLR(MCI_STATUS_UC)
0105 ),
0106
0107
0108
0109
0110
0111
0112
0113 MCESEV(
0114 AO, "Action optional: memory scrubbing error",
0115 SER, MASK(MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
0116 ),
0117 MCESEV(
0118 AO, "Action optional: last level cache writeback error",
0119 SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
0120 ),
0121
0122
0123
0124
0125
0126
0127
0128 MCESEV(
0129 AO, "Uncorrected Patrol Scrub Error",
0130 SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0),
0131 MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18)
0132 ),
0133
0134
0135 MCESEV(
0136 UCNA, "Uncorrected no action required",
0137 SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
0138 ),
0139 MCESEV(
0140 PANIC, "Illegal combination (UCNA with AR=1)",
0141 SER,
0142 MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
0143 ),
0144 MCESEV(
0145 KEEP, "Non signaled machine check",
0146 SER, BITCLR(MCI_STATUS_S)
0147 ),
0148
0149 MCESEV(
0150 PANIC, "Action required with lost events",
0151 SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
0152 ),
0153
0154
0155 #ifdef CONFIG_MEMORY_FAILURE
0156 MCESEV(
0157 KEEP, "Action required but unaffected thread is continuable",
0158 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
0159 MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
0160 ),
0161 MCESEV(
0162 AR, "Action required: data load in error recoverable area of kernel",
0163 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
0164 KERNEL_RECOV
0165 ),
0166 MCESEV(
0167 AR, "Action required: data load error in a user process",
0168 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
0169 USER
0170 ),
0171 MCESEV(
0172 AR, "Action required: instruction fetch error in a user process",
0173 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
0174 USER
0175 ),
0176 MCESEV(
0177 PANIC, "Data load in unrecoverable area of kernel",
0178 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
0179 KERNEL
0180 ),
0181 MCESEV(
0182 PANIC, "Instruction fetch error in kernel",
0183 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
0184 KERNEL
0185 ),
0186 #endif
0187 MCESEV(
0188 PANIC, "Action required: unknown MCACOD",
0189 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
0190 ),
0191
0192 MCESEV(
0193 SOME, "Action optional: unknown MCACOD",
0194 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
0195 ),
0196 MCESEV(
0197 SOME, "Action optional with lost events",
0198 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
0199 ),
0200
0201 MCESEV(
0202 PANIC, "Overflowed uncorrected",
0203 BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
0204 ),
0205 MCESEV(
0206 UC, "Uncorrected",
0207 BITSET(MCI_STATUS_UC)
0208 ),
0209 MCESEV(
0210 SOME, "No match",
0211 BITSET(0)
0212 )
0213 };
0214
0215 #define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
0216 (MCG_STATUS_RIPV|MCG_STATUS_EIPV))
0217
0218 static bool is_copy_from_user(struct pt_regs *regs)
0219 {
0220 u8 insn_buf[MAX_INSN_SIZE];
0221 unsigned long addr;
0222 struct insn insn;
0223 int ret;
0224
0225 if (!regs)
0226 return false;
0227
0228 if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
0229 return false;
0230
0231 ret = insn_decode_kernel(&insn, insn_buf);
0232 if (ret < 0)
0233 return false;
0234
0235 switch (insn.opcode.value) {
0236
0237 case 0x8A: case 0x8B:
0238
0239 case 0xB60F: case 0xB70F:
0240 addr = (unsigned long)insn_get_addr_ref(&insn, regs);
0241 break;
0242
0243 case 0xA4: case 0xA5:
0244 addr = regs->si;
0245 break;
0246 default:
0247 return false;
0248 }
0249
0250 if (fault_in_kernel_space(addr))
0251 return false;
0252
0253 current->mce_vaddr = (void __user *)addr;
0254
0255 return true;
0256 }
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269 static noinstr int error_context(struct mce *m, struct pt_regs *regs)
0270 {
0271 int fixup_type;
0272 bool copy_user;
0273
0274 if ((m->cs & 3) == 3)
0275 return IN_USER;
0276
0277 if (!mc_recoverable(m->mcgstatus))
0278 return IN_KERNEL;
0279
0280
0281 instrumentation_begin();
0282 fixup_type = ex_get_fixup_type(m->ip);
0283 copy_user = is_copy_from_user(regs);
0284 instrumentation_end();
0285
0286 switch (fixup_type) {
0287 case EX_TYPE_UACCESS:
0288 case EX_TYPE_COPY:
0289 if (!copy_user)
0290 return IN_KERNEL;
0291 m->kflags |= MCE_IN_KERNEL_COPYIN;
0292 fallthrough;
0293
0294 case EX_TYPE_FAULT_MCE_SAFE:
0295 case EX_TYPE_DEFAULT_MCE_SAFE:
0296 m->kflags |= MCE_IN_KERNEL_RECOV;
0297 return IN_KERNEL_RECOV;
0298
0299 default:
0300 return IN_KERNEL;
0301 }
0302 }
0303
0304
0305 static noinstr int mce_severity_amd(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
0306 {
0307 char *panic_msg = NULL;
0308 int ret;
0309
0310
0311
0312
0313
0314 ret = MCE_AR_SEVERITY;
0315
0316
0317 if (m->status & MCI_STATUS_PCC) {
0318 panic_msg = "Processor Context Corrupt";
0319 ret = MCE_PANIC_SEVERITY;
0320 goto out;
0321 }
0322
0323 if (m->status & MCI_STATUS_DEFERRED) {
0324 ret = MCE_DEFERRED_SEVERITY;
0325 goto out;
0326 }
0327
0328
0329
0330
0331
0332 if (!(m->status & MCI_STATUS_UC)) {
0333 ret = MCE_KEEP_SEVERITY;
0334 goto out;
0335 }
0336
0337
0338
0339
0340
0341 if ((m->status & MCI_STATUS_OVER) && !mce_flags.overflow_recov) {
0342 panic_msg = "Overflowed uncorrected error without MCA Overflow Recovery";
0343 ret = MCE_PANIC_SEVERITY;
0344 goto out;
0345 }
0346
0347 if (!mce_flags.succor) {
0348 panic_msg = "Uncorrected error without MCA Recovery";
0349 ret = MCE_PANIC_SEVERITY;
0350 goto out;
0351 }
0352
0353 if (error_context(m, regs) == IN_KERNEL) {
0354 panic_msg = "Uncorrected unrecoverable error in kernel context";
0355 ret = MCE_PANIC_SEVERITY;
0356 }
0357
0358 out:
0359 if (msg && panic_msg)
0360 *msg = panic_msg;
0361
0362 return ret;
0363 }
0364
0365 static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
0366 {
0367 enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
0368 enum context ctx = error_context(m, regs);
0369 struct severity *s;
0370
0371 for (s = severities;; s++) {
0372 if ((m->status & s->mask) != s->result)
0373 continue;
0374 if ((m->mcgstatus & s->mcgmask) != s->mcgres)
0375 continue;
0376 if (s->ser == SER_REQUIRED && !mca_cfg.ser)
0377 continue;
0378 if (s->ser == NO_SER && mca_cfg.ser)
0379 continue;
0380 if (s->context && ctx != s->context)
0381 continue;
0382 if (s->excp && excp != s->excp)
0383 continue;
0384 if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model)
0385 continue;
0386 if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping)
0387 continue;
0388 if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi))
0389 continue;
0390 if (msg)
0391 *msg = s->msg;
0392 s->covered = 1;
0393
0394 if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL)
0395 return MCE_PANIC_SEVERITY;
0396
0397 return s->sev;
0398 }
0399 }
0400
0401 int noinstr mce_severity(struct mce *m, struct pt_regs *regs, char **msg, bool is_excp)
0402 {
0403 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
0404 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
0405 return mce_severity_amd(m, regs, msg, is_excp);
0406 else
0407 return mce_severity_intel(m, regs, msg, is_excp);
0408 }
0409
0410 #ifdef CONFIG_DEBUG_FS
0411 static void *s_start(struct seq_file *f, loff_t *pos)
0412 {
0413 if (*pos >= ARRAY_SIZE(severities))
0414 return NULL;
0415 return &severities[*pos];
0416 }
0417
0418 static void *s_next(struct seq_file *f, void *data, loff_t *pos)
0419 {
0420 if (++(*pos) >= ARRAY_SIZE(severities))
0421 return NULL;
0422 return &severities[*pos];
0423 }
0424
0425 static void s_stop(struct seq_file *f, void *data)
0426 {
0427 }
0428
0429 static int s_show(struct seq_file *f, void *data)
0430 {
0431 struct severity *ser = data;
0432 seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
0433 return 0;
0434 }
0435
0436 static const struct seq_operations severities_seq_ops = {
0437 .start = s_start,
0438 .next = s_next,
0439 .stop = s_stop,
0440 .show = s_show,
0441 };
0442
0443 static int severities_coverage_open(struct inode *inode, struct file *file)
0444 {
0445 return seq_open(file, &severities_seq_ops);
0446 }
0447
0448 static ssize_t severities_coverage_write(struct file *file,
0449 const char __user *ubuf,
0450 size_t count, loff_t *ppos)
0451 {
0452 int i;
0453 for (i = 0; i < ARRAY_SIZE(severities); i++)
0454 severities[i].covered = 0;
0455 return count;
0456 }
0457
0458 static const struct file_operations severities_coverage_fops = {
0459 .open = severities_coverage_open,
0460 .release = seq_release,
0461 .read = seq_read,
0462 .write = severities_coverage_write,
0463 .llseek = seq_lseek,
0464 };
0465
0466 static int __init severities_debugfs_init(void)
0467 {
0468 struct dentry *dmce;
0469
0470 dmce = mce_get_debugfs_dir();
0471
0472 debugfs_create_file("severities-coverage", 0444, dmce, NULL,
0473 &severities_coverage_fops);
0474 return 0;
0475 }
0476 late_initcall(severities_debugfs_init);
0477 #endif