0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #include <linux/spinlock.h>
0015 #include <linux/kprobes.h>
0016 #include <linux/kdebug.h>
0017 #include <linux/sched/debug.h>
0018 #include <linux/nmi.h>
0019 #include <linux/debugfs.h>
0020 #include <linux/delay.h>
0021 #include <linux/hardirq.h>
0022 #include <linux/ratelimit.h>
0023 #include <linux/slab.h>
0024 #include <linux/export.h>
0025 #include <linux/atomic.h>
0026 #include <linux/sched/clock.h>
0027
0028 #include <asm/cpu_entry_area.h>
0029 #include <asm/traps.h>
0030 #include <asm/mach_traps.h>
0031 #include <asm/nmi.h>
0032 #include <asm/x86_init.h>
0033 #include <asm/reboot.h>
0034 #include <asm/cache.h>
0035 #include <asm/nospec-branch.h>
0036 #include <asm/sev.h>
0037
0038 #define CREATE_TRACE_POINTS
0039 #include <trace/events/nmi.h>
0040
0041 struct nmi_desc {
0042 raw_spinlock_t lock;
0043 struct list_head head;
0044 };
0045
0046 static struct nmi_desc nmi_desc[NMI_MAX] =
0047 {
0048 {
0049 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
0050 .head = LIST_HEAD_INIT(nmi_desc[0].head),
0051 },
0052 {
0053 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
0054 .head = LIST_HEAD_INIT(nmi_desc[1].head),
0055 },
0056 {
0057 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
0058 .head = LIST_HEAD_INIT(nmi_desc[2].head),
0059 },
0060 {
0061 .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
0062 .head = LIST_HEAD_INIT(nmi_desc[3].head),
0063 },
0064
0065 };
0066
0067 struct nmi_stats {
0068 unsigned int normal;
0069 unsigned int unknown;
0070 unsigned int external;
0071 unsigned int swallow;
0072 };
0073
0074 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
0075
0076 static int ignore_nmis __read_mostly;
0077
0078 int unknown_nmi_panic;
0079
0080
0081
0082
0083 static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
0084
0085 static int __init setup_unknown_nmi_panic(char *str)
0086 {
0087 unknown_nmi_panic = 1;
0088 return 1;
0089 }
0090 __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
0091
0092 #define nmi_to_desc(type) (&nmi_desc[type])
0093
0094 static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
0095
0096 static int __init nmi_warning_debugfs(void)
0097 {
0098 debugfs_create_u64("nmi_longest_ns", 0644,
0099 arch_debugfs_dir, &nmi_longest_ns);
0100 return 0;
0101 }
0102 fs_initcall(nmi_warning_debugfs);
0103
0104 static void nmi_check_duration(struct nmiaction *action, u64 duration)
0105 {
0106 int remainder_ns, decimal_msecs;
0107
0108 if (duration < nmi_longest_ns || duration < action->max_duration)
0109 return;
0110
0111 action->max_duration = duration;
0112
0113 remainder_ns = do_div(duration, (1000 * 1000));
0114 decimal_msecs = remainder_ns / 1000;
0115
0116 printk_ratelimited(KERN_INFO
0117 "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
0118 action->handler, duration, decimal_msecs);
0119 }
0120
0121 static int nmi_handle(unsigned int type, struct pt_regs *regs)
0122 {
0123 struct nmi_desc *desc = nmi_to_desc(type);
0124 struct nmiaction *a;
0125 int handled=0;
0126
0127 rcu_read_lock();
0128
0129
0130
0131
0132
0133
0134
0135 list_for_each_entry_rcu(a, &desc->head, list) {
0136 int thishandled;
0137 u64 delta;
0138
0139 delta = sched_clock();
0140 thishandled = a->handler(type, regs);
0141 handled += thishandled;
0142 delta = sched_clock() - delta;
0143 trace_nmi_handler(a->handler, (int)delta, thishandled);
0144
0145 nmi_check_duration(a, delta);
0146 }
0147
0148 rcu_read_unlock();
0149
0150
0151 return handled;
0152 }
0153 NOKPROBE_SYMBOL(nmi_handle);
0154
0155 int __register_nmi_handler(unsigned int type, struct nmiaction *action)
0156 {
0157 struct nmi_desc *desc = nmi_to_desc(type);
0158 unsigned long flags;
0159
0160 if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list)))
0161 return -EINVAL;
0162
0163 raw_spin_lock_irqsave(&desc->lock, flags);
0164
0165
0166
0167
0168
0169 WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
0170 WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
0171
0172
0173
0174
0175
0176 if (action->flags & NMI_FLAG_FIRST)
0177 list_add_rcu(&action->list, &desc->head);
0178 else
0179 list_add_tail_rcu(&action->list, &desc->head);
0180
0181 raw_spin_unlock_irqrestore(&desc->lock, flags);
0182 return 0;
0183 }
0184 EXPORT_SYMBOL(__register_nmi_handler);
0185
0186 void unregister_nmi_handler(unsigned int type, const char *name)
0187 {
0188 struct nmi_desc *desc = nmi_to_desc(type);
0189 struct nmiaction *n, *found = NULL;
0190 unsigned long flags;
0191
0192 raw_spin_lock_irqsave(&desc->lock, flags);
0193
0194 list_for_each_entry_rcu(n, &desc->head, list) {
0195
0196
0197
0198
0199 if (!strcmp(n->name, name)) {
0200 WARN(in_nmi(),
0201 "Trying to free NMI (%s) from NMI context!\n", n->name);
0202 list_del_rcu(&n->list);
0203 found = n;
0204 break;
0205 }
0206 }
0207
0208 raw_spin_unlock_irqrestore(&desc->lock, flags);
0209 if (found) {
0210 synchronize_rcu();
0211 INIT_LIST_HEAD(&found->list);
0212 }
0213 }
0214 EXPORT_SYMBOL_GPL(unregister_nmi_handler);
0215
0216 static void
0217 pci_serr_error(unsigned char reason, struct pt_regs *regs)
0218 {
0219
0220 if (nmi_handle(NMI_SERR, regs))
0221 return;
0222
0223 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
0224 reason, smp_processor_id());
0225
0226 if (panic_on_unrecovered_nmi)
0227 nmi_panic(regs, "NMI: Not continuing");
0228
0229 pr_emerg("Dazed and confused, but trying to continue\n");
0230
0231
0232 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
0233 outb(reason, NMI_REASON_PORT);
0234 }
0235 NOKPROBE_SYMBOL(pci_serr_error);
0236
0237 static void
0238 io_check_error(unsigned char reason, struct pt_regs *regs)
0239 {
0240 unsigned long i;
0241
0242
0243 if (nmi_handle(NMI_IO_CHECK, regs))
0244 return;
0245
0246 pr_emerg(
0247 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
0248 reason, smp_processor_id());
0249 show_regs(regs);
0250
0251 if (panic_on_io_nmi) {
0252 nmi_panic(regs, "NMI IOCK error: Not continuing");
0253
0254
0255
0256
0257
0258
0259 return;
0260 }
0261
0262
0263 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
0264 outb(reason, NMI_REASON_PORT);
0265
0266 i = 20000;
0267 while (--i) {
0268 touch_nmi_watchdog();
0269 udelay(100);
0270 }
0271
0272 reason &= ~NMI_REASON_CLEAR_IOCHK;
0273 outb(reason, NMI_REASON_PORT);
0274 }
0275 NOKPROBE_SYMBOL(io_check_error);
0276
0277 static void
0278 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
0279 {
0280 int handled;
0281
0282
0283
0284
0285
0286
0287
0288 handled = nmi_handle(NMI_UNKNOWN, regs);
0289 if (handled) {
0290 __this_cpu_add(nmi_stats.unknown, handled);
0291 return;
0292 }
0293
0294 __this_cpu_add(nmi_stats.unknown, 1);
0295
0296 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
0297 reason, smp_processor_id());
0298
0299 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
0300 nmi_panic(regs, "NMI: Not continuing");
0301
0302 pr_emerg("Dazed and confused, but trying to continue\n");
0303 }
0304 NOKPROBE_SYMBOL(unknown_nmi_error);
0305
0306 static DEFINE_PER_CPU(bool, swallow_nmi);
0307 static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
0308
0309 static noinstr void default_do_nmi(struct pt_regs *regs)
0310 {
0311 unsigned char reason = 0;
0312 int handled;
0313 bool b2b = false;
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328 if (regs->ip == __this_cpu_read(last_nmi_rip))
0329 b2b = true;
0330 else
0331 __this_cpu_write(swallow_nmi, false);
0332
0333 __this_cpu_write(last_nmi_rip, regs->ip);
0334
0335 instrumentation_begin();
0336
0337 handled = nmi_handle(NMI_LOCAL, regs);
0338 __this_cpu_add(nmi_stats.normal, handled);
0339 if (handled) {
0340
0341
0342
0343
0344
0345
0346
0347
0348 if (handled > 1)
0349 __this_cpu_write(swallow_nmi, true);
0350 goto out;
0351 }
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361 while (!raw_spin_trylock(&nmi_reason_lock)) {
0362 run_crash_ipi_callback(regs);
0363 cpu_relax();
0364 }
0365
0366 reason = x86_platform.get_nmi_reason();
0367
0368 if (reason & NMI_REASON_MASK) {
0369 if (reason & NMI_REASON_SERR)
0370 pci_serr_error(reason, regs);
0371 else if (reason & NMI_REASON_IOCHK)
0372 io_check_error(reason, regs);
0373 #ifdef CONFIG_X86_32
0374
0375
0376
0377
0378 reassert_nmi();
0379 #endif
0380 __this_cpu_add(nmi_stats.external, 1);
0381 raw_spin_unlock(&nmi_reason_lock);
0382 goto out;
0383 }
0384 raw_spin_unlock(&nmi_reason_lock);
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416 if (b2b && __this_cpu_read(swallow_nmi))
0417 __this_cpu_add(nmi_stats.swallow, 1);
0418 else
0419 unknown_nmi_error(reason, regs);
0420
0421 out:
0422 instrumentation_end();
0423 }
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468
0469
0470 enum nmi_states {
0471 NMI_NOT_RUNNING = 0,
0472 NMI_EXECUTING,
0473 NMI_LATCHED,
0474 };
0475 static DEFINE_PER_CPU(enum nmi_states, nmi_state);
0476 static DEFINE_PER_CPU(unsigned long, nmi_cr2);
0477 static DEFINE_PER_CPU(unsigned long, nmi_dr7);
0478
0479 DEFINE_IDTENTRY_RAW(exc_nmi)
0480 {
0481 irqentry_state_t irq_state;
0482
0483
0484
0485
0486
0487 sev_es_nmi_complete();
0488
0489 if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
0490 return;
0491
0492 if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
0493 this_cpu_write(nmi_state, NMI_LATCHED);
0494 return;
0495 }
0496 this_cpu_write(nmi_state, NMI_EXECUTING);
0497 this_cpu_write(nmi_cr2, read_cr2());
0498 nmi_restart:
0499
0500
0501
0502
0503
0504 sev_es_ist_enter(regs);
0505
0506 this_cpu_write(nmi_dr7, local_db_save());
0507
0508 irq_state = irqentry_nmi_enter(regs);
0509
0510 inc_irq_stat(__nmi_count);
0511
0512 if (!ignore_nmis)
0513 default_do_nmi(regs);
0514
0515 irqentry_nmi_exit(regs, irq_state);
0516
0517 local_db_restore(this_cpu_read(nmi_dr7));
0518
0519 sev_es_ist_exit();
0520
0521 if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
0522 write_cr2(this_cpu_read(nmi_cr2));
0523 if (this_cpu_dec_return(nmi_state))
0524 goto nmi_restart;
0525
0526 if (user_mode(regs))
0527 mds_user_clear_cpu_buffers();
0528 }
0529
0530 #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
0531 DEFINE_IDTENTRY_RAW(exc_nmi_noist)
0532 {
0533 exc_nmi(regs);
0534 }
0535 #endif
0536 #if IS_MODULE(CONFIG_KVM_INTEL)
0537 EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
0538 #endif
0539
0540 void stop_nmi(void)
0541 {
0542 ignore_nmis++;
0543 }
0544
0545 void restart_nmi(void)
0546 {
0547 ignore_nmis--;
0548 }
0549
0550
0551 void local_touch_nmi(void)
0552 {
0553 __this_cpu_write(last_nmi_rip, 0);
0554 }
0555 EXPORT_SYMBOL_GPL(local_touch_nmi);