Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 #include <linux/extable.h>
0003 #include <linux/uaccess.h>
0004 #include <linux/sched/debug.h>
0005 #include <linux/bitfield.h>
0006 #include <xen/xen.h>
0007 
0008 #include <asm/fpu/api.h>
0009 #include <asm/sev.h>
0010 #include <asm/traps.h>
0011 #include <asm/kdebug.h>
0012 #include <asm/insn-eval.h>
0013 #include <asm/sgx.h>
0014 
0015 static inline unsigned long *pt_regs_nr(struct pt_regs *regs, int nr)
0016 {
0017     int reg_offset = pt_regs_offset(regs, nr);
0018     static unsigned long __dummy;
0019 
0020     if (WARN_ON_ONCE(reg_offset < 0))
0021         return &__dummy;
0022 
0023     return (unsigned long *)((unsigned long)regs + reg_offset);
0024 }
0025 
0026 static inline unsigned long
0027 ex_fixup_addr(const struct exception_table_entry *x)
0028 {
0029     return (unsigned long)&x->fixup + x->fixup;
0030 }
0031 
0032 static bool ex_handler_default(const struct exception_table_entry *e,
0033                    struct pt_regs *regs)
0034 {
0035     if (e->data & EX_FLAG_CLEAR_AX)
0036         regs->ax = 0;
0037     if (e->data & EX_FLAG_CLEAR_DX)
0038         regs->dx = 0;
0039 
0040     regs->ip = ex_fixup_addr(e);
0041     return true;
0042 }
0043 
0044 /*
0045  * This is the *very* rare case where we do a "load_unaligned_zeropad()"
0046  * and it's a page crosser into a non-existent page.
0047  *
0048  * This happens when we optimistically load a pathname a word-at-a-time
0049  * and the name is less than the full word and the  next page is not
0050  * mapped. Typically that only happens for CONFIG_DEBUG_PAGEALLOC.
0051  *
0052  * NOTE! The faulting address is always a 'mov mem,reg' type instruction
0053  * of size 'long', and the exception fixup must always point to right
0054  * after the instruction.
0055  */
0056 static bool ex_handler_zeropad(const struct exception_table_entry *e,
0057                    struct pt_regs *regs,
0058                    unsigned long fault_addr)
0059 {
0060     struct insn insn;
0061     const unsigned long mask = sizeof(long) - 1;
0062     unsigned long offset, addr, next_ip, len;
0063     unsigned long *reg;
0064 
0065     next_ip = ex_fixup_addr(e);
0066     len = next_ip - regs->ip;
0067     if (len > MAX_INSN_SIZE)
0068         return false;
0069 
0070     if (insn_decode(&insn, (void *) regs->ip, len, INSN_MODE_KERN))
0071         return false;
0072     if (insn.length != len)
0073         return false;
0074 
0075     if (insn.opcode.bytes[0] != 0x8b)
0076         return false;
0077     if (insn.opnd_bytes != sizeof(long))
0078         return false;
0079 
0080     addr = (unsigned long) insn_get_addr_ref(&insn, regs);
0081     if (addr == ~0ul)
0082         return false;
0083 
0084     offset = addr & mask;
0085     addr = addr & ~mask;
0086     if (fault_addr != addr + sizeof(long))
0087         return false;
0088 
0089     reg = insn_get_modrm_reg_ptr(&insn, regs);
0090     if (!reg)
0091         return false;
0092 
0093     *reg = *(unsigned long *)addr >> (offset * 8);
0094     return ex_handler_default(e, regs);
0095 }
0096 
0097 static bool ex_handler_fault(const struct exception_table_entry *fixup,
0098                  struct pt_regs *regs, int trapnr)
0099 {
0100     regs->ax = trapnr;
0101     return ex_handler_default(fixup, regs);
0102 }
0103 
0104 static bool ex_handler_sgx(const struct exception_table_entry *fixup,
0105                struct pt_regs *regs, int trapnr)
0106 {
0107     regs->ax = trapnr | SGX_ENCLS_FAULT_FLAG;
0108     return ex_handler_default(fixup, regs);
0109 }
0110 
0111 /*
0112  * Handler for when we fail to restore a task's FPU state.  We should never get
0113  * here because the FPU state of a task using the FPU (task->thread.fpu.state)
0114  * should always be valid.  However, past bugs have allowed userspace to set
0115  * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
0116  * These caused XRSTOR to fail when switching to the task, leaking the FPU
0117  * registers of the task previously executing on the CPU.  Mitigate this class
0118  * of vulnerability by restoring from the initial state (essentially, zeroing
0119  * out all the FPU registers) if we can't restore from the task's FPU state.
0120  */
0121 static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
0122                  struct pt_regs *regs)
0123 {
0124     regs->ip = ex_fixup_addr(fixup);
0125 
0126     WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
0127           (void *)instruction_pointer(regs));
0128 
0129     fpu_reset_from_exception_fixup();
0130     return true;
0131 }
0132 
0133 static bool ex_handler_uaccess(const struct exception_table_entry *fixup,
0134                    struct pt_regs *regs, int trapnr)
0135 {
0136     WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
0137     return ex_handler_default(fixup, regs);
0138 }
0139 
0140 static bool ex_handler_copy(const struct exception_table_entry *fixup,
0141                 struct pt_regs *regs, int trapnr)
0142 {
0143     WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
0144     return ex_handler_fault(fixup, regs, trapnr);
0145 }
0146 
0147 static bool ex_handler_msr(const struct exception_table_entry *fixup,
0148                struct pt_regs *regs, bool wrmsr, bool safe, int reg)
0149 {
0150     if (__ONCE_LITE_IF(!safe && wrmsr)) {
0151         pr_warn("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
0152             (unsigned int)regs->cx, (unsigned int)regs->dx,
0153             (unsigned int)regs->ax,  regs->ip, (void *)regs->ip);
0154         show_stack_regs(regs);
0155     }
0156 
0157     if (__ONCE_LITE_IF(!safe && !wrmsr)) {
0158         pr_warn("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
0159             (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
0160         show_stack_regs(regs);
0161     }
0162 
0163     if (!wrmsr) {
0164         /* Pretend that the read succeeded and returned 0. */
0165         regs->ax = 0;
0166         regs->dx = 0;
0167     }
0168 
0169     if (safe)
0170         *pt_regs_nr(regs, reg) = -EIO;
0171 
0172     return ex_handler_default(fixup, regs);
0173 }
0174 
0175 static bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
0176                 struct pt_regs *regs)
0177 {
0178     if (static_cpu_has(X86_BUG_NULL_SEG))
0179         asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
0180     asm volatile ("mov %0, %%fs" : : "rm" (0));
0181     return ex_handler_default(fixup, regs);
0182 }
0183 
0184 static bool ex_handler_imm_reg(const struct exception_table_entry *fixup,
0185                    struct pt_regs *regs, int reg, int imm)
0186 {
0187     *pt_regs_nr(regs, reg) = (long)imm;
0188     return ex_handler_default(fixup, regs);
0189 }
0190 
0191 static bool ex_handler_ucopy_len(const struct exception_table_entry *fixup,
0192                   struct pt_regs *regs, int trapnr, int reg, int imm)
0193 {
0194     regs->cx = imm * regs->cx + *pt_regs_nr(regs, reg);
0195     return ex_handler_uaccess(fixup, regs, trapnr);
0196 }
0197 
0198 int ex_get_fixup_type(unsigned long ip)
0199 {
0200     const struct exception_table_entry *e = search_exception_tables(ip);
0201 
0202     return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE;
0203 }
0204 
0205 int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
0206             unsigned long fault_addr)
0207 {
0208     const struct exception_table_entry *e;
0209     int type, reg, imm;
0210 
0211 #ifdef CONFIG_PNPBIOS
0212     if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
0213         extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
0214         extern u32 pnp_bios_is_utter_crap;
0215         pnp_bios_is_utter_crap = 1;
0216         printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
0217         __asm__ volatile(
0218             "movl %0, %%esp\n\t"
0219             "jmp *%1\n\t"
0220             : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
0221         panic("do_trap: can't hit this");
0222     }
0223 #endif
0224 
0225     e = search_exception_tables(regs->ip);
0226     if (!e)
0227         return 0;
0228 
0229     type = FIELD_GET(EX_DATA_TYPE_MASK, e->data);
0230     reg  = FIELD_GET(EX_DATA_REG_MASK,  e->data);
0231     imm  = FIELD_GET(EX_DATA_IMM_MASK,  e->data);
0232 
0233     switch (type) {
0234     case EX_TYPE_DEFAULT:
0235     case EX_TYPE_DEFAULT_MCE_SAFE:
0236         return ex_handler_default(e, regs);
0237     case EX_TYPE_FAULT:
0238     case EX_TYPE_FAULT_MCE_SAFE:
0239         return ex_handler_fault(e, regs, trapnr);
0240     case EX_TYPE_UACCESS:
0241         return ex_handler_uaccess(e, regs, trapnr);
0242     case EX_TYPE_COPY:
0243         return ex_handler_copy(e, regs, trapnr);
0244     case EX_TYPE_CLEAR_FS:
0245         return ex_handler_clear_fs(e, regs);
0246     case EX_TYPE_FPU_RESTORE:
0247         return ex_handler_fprestore(e, regs);
0248     case EX_TYPE_BPF:
0249         return ex_handler_bpf(e, regs);
0250     case EX_TYPE_WRMSR:
0251         return ex_handler_msr(e, regs, true, false, reg);
0252     case EX_TYPE_RDMSR:
0253         return ex_handler_msr(e, regs, false, false, reg);
0254     case EX_TYPE_WRMSR_SAFE:
0255         return ex_handler_msr(e, regs, true, true, reg);
0256     case EX_TYPE_RDMSR_SAFE:
0257         return ex_handler_msr(e, regs, false, true, reg);
0258     case EX_TYPE_WRMSR_IN_MCE:
0259         ex_handler_msr_mce(regs, true);
0260         break;
0261     case EX_TYPE_RDMSR_IN_MCE:
0262         ex_handler_msr_mce(regs, false);
0263         break;
0264     case EX_TYPE_POP_REG:
0265         regs->sp += sizeof(long);
0266         fallthrough;
0267     case EX_TYPE_IMM_REG:
0268         return ex_handler_imm_reg(e, regs, reg, imm);
0269     case EX_TYPE_FAULT_SGX:
0270         return ex_handler_sgx(e, regs, trapnr);
0271     case EX_TYPE_UCOPY_LEN:
0272         return ex_handler_ucopy_len(e, regs, trapnr, reg, imm);
0273     case EX_TYPE_ZEROPAD:
0274         return ex_handler_zeropad(e, regs, fault_addr);
0275     }
0276     BUG();
0277 }
0278 
0279 extern unsigned int early_recursion_flag;
0280 
0281 /* Restricted version used during very early boot */
0282 void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
0283 {
0284     /* Ignore early NMIs. */
0285     if (trapnr == X86_TRAP_NMI)
0286         return;
0287 
0288     if (early_recursion_flag > 2)
0289         goto halt_loop;
0290 
0291     /*
0292      * Old CPUs leave the high bits of CS on the stack
0293      * undefined.  I'm not sure which CPUs do this, but at least
0294      * the 486 DX works this way.
0295      * Xen pv domains are not using the default __KERNEL_CS.
0296      */
0297     if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
0298         goto fail;
0299 
0300     /*
0301      * The full exception fixup machinery is available as soon as
0302      * the early IDT is loaded.  This means that it is the
0303      * responsibility of extable users to either function correctly
0304      * when handlers are invoked early or to simply avoid causing
0305      * exceptions before they're ready to handle them.
0306      *
0307      * This is better than filtering which handlers can be used,
0308      * because refusing to call a handler here is guaranteed to
0309      * result in a hard-to-debug panic.
0310      *
0311      * Keep in mind that not all vectors actually get here.  Early
0312      * page faults, for example, are special.
0313      */
0314     if (fixup_exception(regs, trapnr, regs->orig_ax, 0))
0315         return;
0316 
0317     if (trapnr == X86_TRAP_UD) {
0318         if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
0319             /* Skip the ud2. */
0320             regs->ip += LEN_UD2;
0321             return;
0322         }
0323 
0324         /*
0325          * If this was a BUG and report_bug returns or if this
0326          * was just a normal #UD, we want to continue onward and
0327          * crash.
0328          */
0329     }
0330 
0331 fail:
0332     early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
0333              (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
0334              regs->orig_ax, read_cr2());
0335 
0336     show_regs(regs);
0337 
0338 halt_loop:
0339     while (true)
0340         halt();
0341 }