0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0014
0015 #include <linux/context_tracking.h>
0016 #include <linux/interrupt.h>
0017 #include <linux/kallsyms.h>
0018 #include <linux/spinlock.h>
0019 #include <linux/kprobes.h>
0020 #include <linux/uaccess.h>
0021 #include <linux/kdebug.h>
0022 #include <linux/kgdb.h>
0023 #include <linux/kernel.h>
0024 #include <linux/export.h>
0025 #include <linux/ptrace.h>
0026 #include <linux/uprobes.h>
0027 #include <linux/string.h>
0028 #include <linux/delay.h>
0029 #include <linux/errno.h>
0030 #include <linux/kexec.h>
0031 #include <linux/sched.h>
0032 #include <linux/sched/task_stack.h>
0033 #include <linux/timer.h>
0034 #include <linux/init.h>
0035 #include <linux/bug.h>
0036 #include <linux/nmi.h>
0037 #include <linux/mm.h>
0038 #include <linux/smp.h>
0039 #include <linux/io.h>
0040 #include <linux/hardirq.h>
0041 #include <linux/atomic.h>
0042 #include <linux/ioasid.h>
0043
0044 #include <asm/stacktrace.h>
0045 #include <asm/processor.h>
0046 #include <asm/debugreg.h>
0047 #include <asm/realmode.h>
0048 #include <asm/text-patching.h>
0049 #include <asm/ftrace.h>
0050 #include <asm/traps.h>
0051 #include <asm/desc.h>
0052 #include <asm/fpu/api.h>
0053 #include <asm/cpu.h>
0054 #include <asm/cpu_entry_area.h>
0055 #include <asm/mce.h>
0056 #include <asm/fixmap.h>
0057 #include <asm/mach_traps.h>
0058 #include <asm/alternative.h>
0059 #include <asm/fpu/xstate.h>
0060 #include <asm/vm86.h>
0061 #include <asm/umip.h>
0062 #include <asm/insn.h>
0063 #include <asm/insn-eval.h>
0064 #include <asm/vdso.h>
0065 #include <asm/tdx.h>
0066
0067 #ifdef CONFIG_X86_64
0068 #include <asm/x86_init.h>
0069 #include <asm/proto.h>
0070 #else
0071 #include <asm/processor-flags.h>
0072 #include <asm/setup.h>
0073 #include <asm/proto.h>
0074 #endif
0075
0076 DECLARE_BITMAP(system_vectors, NR_VECTORS);
0077
0078 static inline void cond_local_irq_enable(struct pt_regs *regs)
0079 {
0080 if (regs->flags & X86_EFLAGS_IF)
0081 local_irq_enable();
0082 }
0083
0084 static inline void cond_local_irq_disable(struct pt_regs *regs)
0085 {
0086 if (regs->flags & X86_EFLAGS_IF)
0087 local_irq_disable();
0088 }
0089
0090 __always_inline int is_valid_bugaddr(unsigned long addr)
0091 {
0092 if (addr < TASK_SIZE_MAX)
0093 return 0;
0094
0095
0096
0097
0098
0099 return *(unsigned short *)addr == INSN_UD2;
0100 }
0101
0102 static nokprobe_inline int
0103 do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
0104 struct pt_regs *regs, long error_code)
0105 {
0106 if (v8086_mode(regs)) {
0107
0108
0109
0110
0111 if (trapnr < X86_TRAP_UD) {
0112 if (!handle_vm86_trap((struct kernel_vm86_regs *) regs,
0113 error_code, trapnr))
0114 return 0;
0115 }
0116 } else if (!user_mode(regs)) {
0117 if (fixup_exception(regs, trapnr, error_code, 0))
0118 return 0;
0119
0120 tsk->thread.error_code = error_code;
0121 tsk->thread.trap_nr = trapnr;
0122 die(str, regs, error_code);
0123 } else {
0124 if (fixup_vdso_exception(regs, trapnr, error_code, 0))
0125 return 0;
0126 }
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137 tsk->thread.error_code = error_code;
0138 tsk->thread.trap_nr = trapnr;
0139
0140 return -1;
0141 }
0142
0143 static void show_signal(struct task_struct *tsk, int signr,
0144 const char *type, const char *desc,
0145 struct pt_regs *regs, long error_code)
0146 {
0147 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
0148 printk_ratelimit()) {
0149 pr_info("%s[%d] %s%s ip:%lx sp:%lx error:%lx",
0150 tsk->comm, task_pid_nr(tsk), type, desc,
0151 regs->ip, regs->sp, error_code);
0152 print_vma_addr(KERN_CONT " in ", regs->ip);
0153 pr_cont("\n");
0154 }
0155 }
0156
0157 static void
0158 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
0159 long error_code, int sicode, void __user *addr)
0160 {
0161 struct task_struct *tsk = current;
0162
0163 if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
0164 return;
0165
0166 show_signal(tsk, signr, "trap ", str, regs, error_code);
0167
0168 if (!sicode)
0169 force_sig(signr);
0170 else
0171 force_sig_fault(signr, sicode, addr);
0172 }
0173 NOKPROBE_SYMBOL(do_trap);
0174
0175 static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
0176 unsigned long trapnr, int signr, int sicode, void __user *addr)
0177 {
0178 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
0179
0180 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
0181 NOTIFY_STOP) {
0182 cond_local_irq_enable(regs);
0183 do_trap(trapnr, signr, str, regs, error_code, sicode, addr);
0184 cond_local_irq_disable(regs);
0185 }
0186 }
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198 static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs)
0199 {
0200 return (void __user *)uprobe_get_trap_addr(regs);
0201 }
0202
0203 DEFINE_IDTENTRY(exc_divide_error)
0204 {
0205 do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
0206 FPE_INTDIV, error_get_trap_addr(regs));
0207 }
0208
0209 DEFINE_IDTENTRY(exc_overflow)
0210 {
0211 do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL);
0212 }
0213
0214 #ifdef CONFIG_X86_KERNEL_IBT
0215
0216 static __ro_after_init bool ibt_fatal = true;
0217
0218 extern void ibt_selftest_ip(void);
0219
0220 enum cp_error_code {
0221 CP_EC = (1 << 15) - 1,
0222
0223 CP_RET = 1,
0224 CP_IRET = 2,
0225 CP_ENDBR = 3,
0226 CP_RSTRORSSP = 4,
0227 CP_SETSSBSY = 5,
0228
0229 CP_ENCL = 1 << 15,
0230 };
0231
0232 DEFINE_IDTENTRY_ERRORCODE(exc_control_protection)
0233 {
0234 if (!cpu_feature_enabled(X86_FEATURE_IBT)) {
0235 pr_err("Unexpected #CP\n");
0236 BUG();
0237 }
0238
0239 if (WARN_ON_ONCE(user_mode(regs) || (error_code & CP_EC) != CP_ENDBR))
0240 return;
0241
0242 if (unlikely(regs->ip == (unsigned long)&ibt_selftest_ip)) {
0243 regs->ax = 0;
0244 return;
0245 }
0246
0247 pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs));
0248 if (!ibt_fatal) {
0249 printk(KERN_DEFAULT CUT_HERE);
0250 __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL);
0251 return;
0252 }
0253 BUG();
0254 }
0255
0256
0257 noinline bool ibt_selftest(void)
0258 {
0259 unsigned long ret;
0260
0261 asm (" lea ibt_selftest_ip(%%rip), %%rax\n\t"
0262 ANNOTATE_RETPOLINE_SAFE
0263 " jmp *%%rax\n\t"
0264 "ibt_selftest_ip:\n\t"
0265 UNWIND_HINT_FUNC
0266 ANNOTATE_NOENDBR
0267 " nop\n\t"
0268
0269 : "=a" (ret) : : "memory");
0270
0271 return !ret;
0272 }
0273
0274 static int __init ibt_setup(char *str)
0275 {
0276 if (!strcmp(str, "off"))
0277 setup_clear_cpu_cap(X86_FEATURE_IBT);
0278
0279 if (!strcmp(str, "warn"))
0280 ibt_fatal = false;
0281
0282 return 1;
0283 }
0284
0285 __setup("ibt=", ibt_setup);
0286
0287 #endif
0288
0289 #ifdef CONFIG_X86_F00F_BUG
0290 void handle_invalid_op(struct pt_regs *regs)
0291 #else
0292 static inline void handle_invalid_op(struct pt_regs *regs)
0293 #endif
0294 {
0295 do_error_trap(regs, 0, "invalid opcode", X86_TRAP_UD, SIGILL,
0296 ILL_ILLOPN, error_get_trap_addr(regs));
0297 }
0298
0299 static noinstr bool handle_bug(struct pt_regs *regs)
0300 {
0301 bool handled = false;
0302
0303 if (!is_valid_bugaddr(regs->ip))
0304 return handled;
0305
0306
0307
0308
0309 instrumentation_begin();
0310
0311
0312
0313
0314 if (regs->flags & X86_EFLAGS_IF)
0315 raw_local_irq_enable();
0316 if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
0317 regs->ip += LEN_UD2;
0318 handled = true;
0319 }
0320 if (regs->flags & X86_EFLAGS_IF)
0321 raw_local_irq_disable();
0322 instrumentation_end();
0323
0324 return handled;
0325 }
0326
0327 DEFINE_IDTENTRY_RAW(exc_invalid_op)
0328 {
0329 irqentry_state_t state;
0330
0331
0332
0333
0334
0335
0336 if (!user_mode(regs) && handle_bug(regs))
0337 return;
0338
0339 state = irqentry_enter(regs);
0340 instrumentation_begin();
0341 handle_invalid_op(regs);
0342 instrumentation_end();
0343 irqentry_exit(regs, state);
0344 }
0345
0346 DEFINE_IDTENTRY(exc_coproc_segment_overrun)
0347 {
0348 do_error_trap(regs, 0, "coprocessor segment overrun",
0349 X86_TRAP_OLD_MF, SIGFPE, 0, NULL);
0350 }
0351
0352 DEFINE_IDTENTRY_ERRORCODE(exc_invalid_tss)
0353 {
0354 do_error_trap(regs, error_code, "invalid TSS", X86_TRAP_TS, SIGSEGV,
0355 0, NULL);
0356 }
0357
0358 DEFINE_IDTENTRY_ERRORCODE(exc_segment_not_present)
0359 {
0360 do_error_trap(regs, error_code, "segment not present", X86_TRAP_NP,
0361 SIGBUS, 0, NULL);
0362 }
0363
0364 DEFINE_IDTENTRY_ERRORCODE(exc_stack_segment)
0365 {
0366 do_error_trap(regs, error_code, "stack segment", X86_TRAP_SS, SIGBUS,
0367 0, NULL);
0368 }
0369
0370 DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check)
0371 {
0372 char *str = "alignment check";
0373
0374 if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP)
0375 return;
0376
0377 if (!user_mode(regs))
0378 die("Split lock detected\n", regs, error_code);
0379
0380 local_irq_enable();
0381
0382 if (handle_user_split_lock(regs, error_code))
0383 goto out;
0384
0385 do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
0386 error_code, BUS_ADRALN, NULL);
0387
0388 out:
0389 local_irq_disable();
0390 }
0391
0392 #ifdef CONFIG_VMAP_STACK
0393 __visible void __noreturn handle_stack_overflow(struct pt_regs *regs,
0394 unsigned long fault_address,
0395 struct stack_info *info)
0396 {
0397 const char *name = stack_type_name(info->type);
0398
0399 printk(KERN_EMERG "BUG: %s stack guard page was hit at %p (stack is %p..%p)\n",
0400 name, (void *)fault_address, info->begin, info->end);
0401
0402 die("stack guard page", regs, 0);
0403
0404
0405 panic("%s stack guard hit", name);
0406 }
0407 #endif
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428 DEFINE_IDTENTRY_DF(exc_double_fault)
0429 {
0430 static const char str[] = "double fault";
0431 struct task_struct *tsk = current;
0432
0433 #ifdef CONFIG_VMAP_STACK
0434 unsigned long address = read_cr2();
0435 struct stack_info info;
0436 #endif
0437
0438 #ifdef CONFIG_X86_ESPFIX64
0439 extern unsigned char native_irq_return_iret[];
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455 if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
0456 regs->cs == __KERNEL_CS &&
0457 regs->ip == (unsigned long)native_irq_return_iret)
0458 {
0459 struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
0460 unsigned long *p = (unsigned long *)regs->sp;
0461
0462
0463
0464
0465
0466
0467
0468 gpregs->ip = p[0];
0469 gpregs->cs = p[1];
0470 gpregs->flags = p[2];
0471 gpregs->sp = p[3];
0472 gpregs->ss = p[4];
0473 gpregs->orig_ax = 0;
0474
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484
0485
0486 regs->ip = (unsigned long)asm_exc_general_protection;
0487 regs->sp = (unsigned long)&gpregs->orig_ax;
0488
0489 return;
0490 }
0491 #endif
0492
0493 irqentry_nmi_enter(regs);
0494 instrumentation_begin();
0495 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
0496
0497 tsk->thread.error_code = error_code;
0498 tsk->thread.trap_nr = X86_TRAP_DF;
0499
0500 #ifdef CONFIG_VMAP_STACK
0501
0502
0503
0504
0505
0506
0507
0508
0509
0510
0511
0512
0513
0514
0515
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534
0535
0536
0537
0538 if (get_stack_guard_info((void *)address, &info))
0539 handle_stack_overflow(regs, address, &info);
0540 #endif
0541
0542 pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
0543 die("double fault", regs, error_code);
0544 panic("Machine halted.");
0545 instrumentation_end();
0546 }
0547
0548 DEFINE_IDTENTRY(exc_bounds)
0549 {
0550 if (notify_die(DIE_TRAP, "bounds", regs, 0,
0551 X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
0552 return;
0553 cond_local_irq_enable(regs);
0554
0555 if (!user_mode(regs))
0556 die("bounds", regs, 0);
0557
0558 do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, 0, 0, NULL);
0559
0560 cond_local_irq_disable(regs);
0561 }
0562
0563 enum kernel_gp_hint {
0564 GP_NO_HINT,
0565 GP_NON_CANONICAL,
0566 GP_CANONICAL
0567 };
0568
0569
0570
0571
0572
0573
0574 static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
0575 unsigned long *addr)
0576 {
0577 u8 insn_buf[MAX_INSN_SIZE];
0578 struct insn insn;
0579 int ret;
0580
0581 if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip,
0582 MAX_INSN_SIZE))
0583 return GP_NO_HINT;
0584
0585 ret = insn_decode_kernel(&insn, insn_buf);
0586 if (ret < 0)
0587 return GP_NO_HINT;
0588
0589 *addr = (unsigned long)insn_get_addr_ref(&insn, regs);
0590 if (*addr == -1UL)
0591 return GP_NO_HINT;
0592
0593 #ifdef CONFIG_X86_64
0594
0595
0596
0597
0598
0599 if (*addr < ~__VIRTUAL_MASK &&
0600 *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK)
0601 return GP_NON_CANONICAL;
0602 #endif
0603
0604 return GP_CANONICAL;
0605 }
0606
0607 #define GPFSTR "general protection fault"
0608
0609 static bool fixup_iopl_exception(struct pt_regs *regs)
0610 {
0611 struct thread_struct *t = ¤t->thread;
0612 unsigned char byte;
0613 unsigned long ip;
0614
0615 if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3)
0616 return false;
0617
0618 if (insn_get_effective_ip(regs, &ip))
0619 return false;
0620
0621 if (get_user(byte, (const char __user *)ip))
0622 return false;
0623
0624 if (byte != 0xfa && byte != 0xfb)
0625 return false;
0626
0627 if (!t->iopl_warn && printk_ratelimit()) {
0628 pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
0629 current->comm, task_pid_nr(current), ip);
0630 print_vma_addr(KERN_CONT " in ", ip);
0631 pr_cont("\n");
0632 t->iopl_warn = 1;
0633 }
0634
0635 regs->ip += 1;
0636 return true;
0637 }
0638
0639
0640
0641
0642
0643
0644 static bool try_fixup_enqcmd_gp(void)
0645 {
0646 #ifdef CONFIG_IOMMU_SVA
0647 u32 pasid;
0648
0649
0650
0651
0652
0653
0654
0655
0656 lockdep_assert_irqs_disabled();
0657
0658
0659
0660
0661
0662 if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
0663 return false;
0664
0665 pasid = current->mm->pasid;
0666
0667
0668
0669
0670
0671 if (!pasid_valid(pasid))
0672 return false;
0673
0674
0675
0676
0677
0678 if (current->pasid_activated)
0679 return false;
0680
0681 wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID);
0682 current->pasid_activated = 1;
0683
0684 return true;
0685 #else
0686 return false;
0687 #endif
0688 }
0689
0690 static bool gp_try_fixup_and_notify(struct pt_regs *regs, int trapnr,
0691 unsigned long error_code, const char *str)
0692 {
0693 if (fixup_exception(regs, trapnr, error_code, 0))
0694 return true;
0695
0696 current->thread.error_code = error_code;
0697 current->thread.trap_nr = trapnr;
0698
0699
0700
0701
0702
0703 if (!preemptible() && kprobe_running() &&
0704 kprobe_fault_handler(regs, trapnr))
0705 return true;
0706
0707 return notify_die(DIE_GPF, str, regs, error_code, trapnr, SIGSEGV) == NOTIFY_STOP;
0708 }
0709
0710 static void gp_user_force_sig_segv(struct pt_regs *regs, int trapnr,
0711 unsigned long error_code, const char *str)
0712 {
0713 current->thread.error_code = error_code;
0714 current->thread.trap_nr = trapnr;
0715 show_signal(current, SIGSEGV, "", str, regs, error_code);
0716 force_sig(SIGSEGV);
0717 }
0718
0719 DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
0720 {
0721 char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
0722 enum kernel_gp_hint hint = GP_NO_HINT;
0723 unsigned long gp_addr;
0724
0725 if (user_mode(regs) && try_fixup_enqcmd_gp())
0726 return;
0727
0728 cond_local_irq_enable(regs);
0729
0730 if (static_cpu_has(X86_FEATURE_UMIP)) {
0731 if (user_mode(regs) && fixup_umip_exception(regs))
0732 goto exit;
0733 }
0734
0735 if (v8086_mode(regs)) {
0736 local_irq_enable();
0737 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
0738 local_irq_disable();
0739 return;
0740 }
0741
0742 if (user_mode(regs)) {
0743 if (fixup_iopl_exception(regs))
0744 goto exit;
0745
0746 if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0))
0747 goto exit;
0748
0749 gp_user_force_sig_segv(regs, X86_TRAP_GP, error_code, desc);
0750 goto exit;
0751 }
0752
0753 if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc))
0754 goto exit;
0755
0756 if (error_code)
0757 snprintf(desc, sizeof(desc), "segment-related " GPFSTR);
0758 else
0759 hint = get_kernel_gp_address(regs, &gp_addr);
0760
0761 if (hint != GP_NO_HINT)
0762 snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx",
0763 (hint == GP_NON_CANONICAL) ? "probably for non-canonical address"
0764 : "maybe for address",
0765 gp_addr);
0766
0767
0768
0769
0770
0771 if (hint != GP_NON_CANONICAL)
0772 gp_addr = 0;
0773
0774 die_addr(desc, regs, error_code, gp_addr);
0775
0776 exit:
0777 cond_local_irq_disable(regs);
0778 }
0779
0780 static bool do_int3(struct pt_regs *regs)
0781 {
0782 int res;
0783
0784 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
0785 if (kgdb_ll_trap(DIE_INT3, "int3", regs, 0, X86_TRAP_BP,
0786 SIGTRAP) == NOTIFY_STOP)
0787 return true;
0788 #endif
0789
0790 #ifdef CONFIG_KPROBES
0791 if (kprobe_int3_handler(regs))
0792 return true;
0793 #endif
0794 res = notify_die(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, SIGTRAP);
0795
0796 return res == NOTIFY_STOP;
0797 }
0798 NOKPROBE_SYMBOL(do_int3);
0799
0800 static void do_int3_user(struct pt_regs *regs)
0801 {
0802 if (do_int3(regs))
0803 return;
0804
0805 cond_local_irq_enable(regs);
0806 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, 0, 0, NULL);
0807 cond_local_irq_disable(regs);
0808 }
0809
0810 DEFINE_IDTENTRY_RAW(exc_int3)
0811 {
0812
0813
0814
0815
0816
0817 if (poke_int3_handler(regs))
0818 return;
0819
0820
0821
0822
0823
0824
0825
0826
0827 if (user_mode(regs)) {
0828 irqentry_enter_from_user_mode(regs);
0829 instrumentation_begin();
0830 do_int3_user(regs);
0831 instrumentation_end();
0832 irqentry_exit_to_user_mode(regs);
0833 } else {
0834 irqentry_state_t irq_state = irqentry_nmi_enter(regs);
0835
0836 instrumentation_begin();
0837 if (!do_int3(regs))
0838 die("int3", regs, 0);
0839 instrumentation_end();
0840 irqentry_nmi_exit(regs, irq_state);
0841 }
0842 }
0843
0844 #ifdef CONFIG_X86_64
0845
0846
0847
0848
0849
0850 asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
0851 {
0852 struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
0853 if (regs != eregs)
0854 *regs = *eregs;
0855 return regs;
0856 }
0857
0858 #ifdef CONFIG_AMD_MEM_ENCRYPT
0859 asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *regs)
0860 {
0861 unsigned long sp, *stack;
0862 struct stack_info info;
0863 struct pt_regs *regs_ret;
0864
0865
0866
0867
0868
0869 if (ip_within_syscall_gap(regs)) {
0870 sp = this_cpu_read(cpu_current_top_of_stack);
0871 goto sync;
0872 }
0873
0874
0875
0876
0877
0878
0879 sp = regs->sp;
0880 stack = (unsigned long *)sp;
0881
0882 if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
0883 info.type > STACK_TYPE_EXCEPTION_LAST)
0884 sp = __this_cpu_ist_top_va(VC2);
0885
0886 sync:
0887
0888
0889
0890
0891
0892 sp = ALIGN_DOWN(sp, 8) - sizeof(*regs_ret);
0893
0894 regs_ret = (struct pt_regs *)sp;
0895 *regs_ret = *regs;
0896
0897 return regs_ret;
0898 }
0899 #endif
0900
0901 asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
0902 {
0903 struct pt_regs tmp, *new_stack;
0904
0905
0906
0907
0908
0909
0910
0911
0912
0913 new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
0914
0915
0916 __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8);
0917
0918
0919 __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip));
0920
0921
0922 __memcpy(new_stack, &tmp, sizeof(tmp));
0923
0924 BUG_ON(!user_mode(new_stack));
0925 return new_stack;
0926 }
0927 #endif
0928
0929 static bool is_sysenter_singlestep(struct pt_regs *regs)
0930 {
0931
0932
0933
0934
0935
0936
0937
0938
0939 #ifdef CONFIG_X86_32
0940 return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) <
0941 (unsigned long)__end_SYSENTER_singlestep_region -
0942 (unsigned long)__begin_SYSENTER_singlestep_region;
0943 #elif defined(CONFIG_IA32_EMULATION)
0944 return (regs->ip - (unsigned long)entry_SYSENTER_compat) <
0945 (unsigned long)__end_entry_SYSENTER_compat -
0946 (unsigned long)entry_SYSENTER_compat;
0947 #else
0948 return false;
0949 #endif
0950 }
0951
0952 static __always_inline unsigned long debug_read_clear_dr6(void)
0953 {
0954 unsigned long dr6;
0955
0956
0957
0958
0959
0960
0961
0962
0963
0964
0965
0966
0967 get_debugreg(dr6, 6);
0968 set_debugreg(DR6_RESERVED, 6);
0969 dr6 ^= DR6_RESERVED;
0970
0971 return dr6;
0972 }
0973
0974
0975
0976
0977
0978
0979
0980
0981
0982
0983
0984
0985
0986
0987
0988
0989
0990
0991
0992
0993
0994
0995
0996
0997
0998
0999 static bool notify_debug(struct pt_regs *regs, unsigned long *dr6)
1000 {
1001
1002
1003
1004
1005
1006
1007
1008 if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP)
1009 return true;
1010
1011 return false;
1012 }
1013
1014 static __always_inline void exc_debug_kernel(struct pt_regs *regs,
1015 unsigned long dr6)
1016 {
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028 unsigned long dr7 = local_db_save();
1029 irqentry_state_t irq_state = irqentry_nmi_enter(regs);
1030 instrumentation_begin();
1031
1032
1033
1034
1035
1036 WARN_ON_ONCE(user_mode(regs));
1037
1038 if (test_thread_flag(TIF_BLOCKSTEP)) {
1039
1040
1041
1042
1043
1044
1045 unsigned long debugctl;
1046
1047 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
1048 debugctl |= DEBUGCTLMSR_BTF;
1049 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
1050 }
1051
1052
1053
1054
1055
1056 if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
1057 dr6 &= ~DR_STEP;
1058
1059
1060
1061
1062 if (!dr6)
1063 goto out;
1064
1065 if (notify_debug(regs, &dr6))
1066 goto out;
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079 if (WARN_ON_ONCE(dr6 & DR_STEP))
1080 regs->flags &= ~X86_EFLAGS_TF;
1081 out:
1082 instrumentation_end();
1083 irqentry_nmi_exit(regs, irq_state);
1084
1085 local_db_restore(dr7);
1086 }
1087
1088 static __always_inline void exc_debug_user(struct pt_regs *regs,
1089 unsigned long dr6)
1090 {
1091 bool icebp;
1092
1093
1094
1095
1096
1097 WARN_ON_ONCE(!user_mode(regs));
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108 irqentry_enter_from_user_mode(regs);
1109 instrumentation_begin();
1110
1111
1112
1113
1114
1115
1116
1117
1118 current->thread.virtual_dr6 = (dr6 & DR_STEP);
1119
1120
1121
1122
1123
1124
1125 clear_thread_flag(TIF_BLOCKSTEP);
1126
1127
1128
1129
1130
1131
1132 icebp = !dr6;
1133
1134 if (notify_debug(regs, &dr6))
1135 goto out;
1136
1137
1138 local_irq_enable();
1139
1140 if (v8086_mode(regs)) {
1141 handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB);
1142 goto out_irq;
1143 }
1144
1145
1146 if (dr6 & DR_BUS_LOCK)
1147 handle_bus_lock(regs);
1148
1149
1150 dr6 |= current->thread.virtual_dr6;
1151 if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)
1152 send_sigtrap(regs, 0, get_si_code(dr6));
1153
1154 out_irq:
1155 local_irq_disable();
1156 out:
1157 instrumentation_end();
1158 irqentry_exit_to_user_mode(regs);
1159 }
1160
1161 #ifdef CONFIG_X86_64
1162
1163 DEFINE_IDTENTRY_DEBUG(exc_debug)
1164 {
1165 exc_debug_kernel(regs, debug_read_clear_dr6());
1166 }
1167
1168
1169 DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
1170 {
1171 exc_debug_user(regs, debug_read_clear_dr6());
1172 }
1173 #else
1174
1175 DEFINE_IDTENTRY_RAW(exc_debug)
1176 {
1177 unsigned long dr6 = debug_read_clear_dr6();
1178
1179 if (user_mode(regs))
1180 exc_debug_user(regs, dr6);
1181 else
1182 exc_debug_kernel(regs, dr6);
1183 }
1184 #endif
1185
1186
1187
1188
1189
1190
1191 static void math_error(struct pt_regs *regs, int trapnr)
1192 {
1193 struct task_struct *task = current;
1194 struct fpu *fpu = &task->thread.fpu;
1195 int si_code;
1196 char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
1197 "simd exception";
1198
1199 cond_local_irq_enable(regs);
1200
1201 if (!user_mode(regs)) {
1202 if (fixup_exception(regs, trapnr, 0, 0))
1203 goto exit;
1204
1205 task->thread.error_code = 0;
1206 task->thread.trap_nr = trapnr;
1207
1208 if (notify_die(DIE_TRAP, str, regs, 0, trapnr,
1209 SIGFPE) != NOTIFY_STOP)
1210 die(str, regs, 0);
1211 goto exit;
1212 }
1213
1214
1215
1216
1217
1218 fpu_sync_fpstate(fpu);
1219
1220 task->thread.trap_nr = trapnr;
1221 task->thread.error_code = 0;
1222
1223 si_code = fpu__exception_code(fpu, trapnr);
1224
1225 if (!si_code)
1226 goto exit;
1227
1228 if (fixup_vdso_exception(regs, trapnr, 0, 0))
1229 goto exit;
1230
1231 force_sig_fault(SIGFPE, si_code,
1232 (void __user *)uprobe_get_trap_addr(regs));
1233 exit:
1234 cond_local_irq_disable(regs);
1235 }
1236
1237 DEFINE_IDTENTRY(exc_coprocessor_error)
1238 {
1239 math_error(regs, X86_TRAP_MF);
1240 }
1241
1242 DEFINE_IDTENTRY(exc_simd_coprocessor_error)
1243 {
1244 if (IS_ENABLED(CONFIG_X86_INVD_BUG)) {
1245
1246 if (!static_cpu_has(X86_FEATURE_XMM)) {
1247 __exc_general_protection(regs, 0);
1248 return;
1249 }
1250 }
1251 math_error(regs, X86_TRAP_XF);
1252 }
1253
1254 DEFINE_IDTENTRY(exc_spurious_interrupt_bug)
1255 {
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275 }
1276
1277 static bool handle_xfd_event(struct pt_regs *regs)
1278 {
1279 u64 xfd_err;
1280 int err;
1281
1282 if (!IS_ENABLED(CONFIG_X86_64) || !cpu_feature_enabled(X86_FEATURE_XFD))
1283 return false;
1284
1285 rdmsrl(MSR_IA32_XFD_ERR, xfd_err);
1286 if (!xfd_err)
1287 return false;
1288
1289 wrmsrl(MSR_IA32_XFD_ERR, 0);
1290
1291
1292 if (WARN_ON(!user_mode(regs)))
1293 return false;
1294
1295 local_irq_enable();
1296
1297 err = xfd_enable_feature(xfd_err);
1298
1299 switch (err) {
1300 case -EPERM:
1301 force_sig_fault(SIGILL, ILL_ILLOPC, error_get_trap_addr(regs));
1302 break;
1303 case -EFAULT:
1304 force_sig(SIGSEGV);
1305 break;
1306 }
1307
1308 local_irq_disable();
1309 return true;
1310 }
1311
1312 DEFINE_IDTENTRY(exc_device_not_available)
1313 {
1314 unsigned long cr0 = read_cr0();
1315
1316 if (handle_xfd_event(regs))
1317 return;
1318
1319 #ifdef CONFIG_MATH_EMULATION
1320 if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
1321 struct math_emu_info info = { };
1322
1323 cond_local_irq_enable(regs);
1324
1325 info.regs = regs;
1326 math_emulate(&info);
1327
1328 cond_local_irq_disable(regs);
1329 return;
1330 }
1331 #endif
1332
1333
1334 if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
1335
1336 write_cr0(cr0 & ~X86_CR0_TS);
1337 } else {
1338
1339
1340
1341
1342
1343 die("unexpected #NM exception", regs, 0);
1344 }
1345 }
1346
1347 #ifdef CONFIG_INTEL_TDX_GUEST
1348
1349 #define VE_FAULT_STR "VE fault"
1350
1351 static void ve_raise_fault(struct pt_regs *regs, long error_code)
1352 {
1353 if (user_mode(regs)) {
1354 gp_user_force_sig_segv(regs, X86_TRAP_VE, error_code, VE_FAULT_STR);
1355 return;
1356 }
1357
1358 if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code, VE_FAULT_STR))
1359 return;
1360
1361 die_addr(VE_FAULT_STR, regs, error_code, 0);
1362 }
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407 DEFINE_IDTENTRY(exc_virtualization_exception)
1408 {
1409 struct ve_info ve;
1410
1411
1412
1413
1414
1415
1416 tdx_get_ve_info(&ve);
1417
1418 cond_local_irq_enable(regs);
1419
1420
1421
1422
1423
1424 if (!tdx_handle_virt_exception(regs, &ve))
1425 ve_raise_fault(regs, 0);
1426
1427 cond_local_irq_disable(regs);
1428 }
1429
1430 #endif
1431
1432 #ifdef CONFIG_X86_32
1433 DEFINE_IDTENTRY_SW(iret_error)
1434 {
1435 local_irq_enable();
1436 if (notify_die(DIE_TRAP, "iret exception", regs, 0,
1437 X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
1438 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, 0,
1439 ILL_BADSTK, (void __user *)NULL);
1440 }
1441 local_irq_disable();
1442 }
1443 #endif
1444
1445 void __init trap_init(void)
1446 {
1447
1448 setup_cpu_entry_areas();
1449
1450
1451 sev_es_init_vc_handling();
1452
1453
1454 cpu_init_exception_handling();
1455
1456 idt_setup_traps();
1457 cpu_init();
1458 }