Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  *  linux/arch/arm/mm/fault.c
0004  *
0005  *  Copyright (C) 1995  Linus Torvalds
0006  *  Modifications for ARM processor (c) 1995-2004 Russell King
0007  */
0008 #include <linux/extable.h>
0009 #include <linux/signal.h>
0010 #include <linux/mm.h>
0011 #include <linux/hardirq.h>
0012 #include <linux/init.h>
0013 #include <linux/kprobes.h>
0014 #include <linux/uaccess.h>
0015 #include <linux/page-flags.h>
0016 #include <linux/sched/signal.h>
0017 #include <linux/sched/debug.h>
0018 #include <linux/highmem.h>
0019 #include <linux/perf_event.h>
0020 #include <linux/kfence.h>
0021 
0022 #include <asm/system_misc.h>
0023 #include <asm/system_info.h>
0024 #include <asm/tlbflush.h>
0025 
0026 #include "fault.h"
0027 
0028 #ifdef CONFIG_MMU
0029 
0030 /*
0031  * This is useful to dump out the page tables associated with
0032  * 'addr' in mm 'mm'.
0033  */
0034 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
0035 {
0036     pgd_t *pgd;
0037 
0038     if (!mm)
0039         mm = &init_mm;
0040 
0041     pgd = pgd_offset(mm, addr);
0042     printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd));
0043 
0044     do {
0045         p4d_t *p4d;
0046         pud_t *pud;
0047         pmd_t *pmd;
0048         pte_t *pte;
0049 
0050         p4d = p4d_offset(pgd, addr);
0051         if (p4d_none(*p4d))
0052             break;
0053 
0054         if (p4d_bad(*p4d)) {
0055             pr_cont("(bad)");
0056             break;
0057         }
0058 
0059         pud = pud_offset(p4d, addr);
0060         if (PTRS_PER_PUD != 1)
0061             pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
0062 
0063         if (pud_none(*pud))
0064             break;
0065 
0066         if (pud_bad(*pud)) {
0067             pr_cont("(bad)");
0068             break;
0069         }
0070 
0071         pmd = pmd_offset(pud, addr);
0072         if (PTRS_PER_PMD != 1)
0073             pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd));
0074 
0075         if (pmd_none(*pmd))
0076             break;
0077 
0078         if (pmd_bad(*pmd)) {
0079             pr_cont("(bad)");
0080             break;
0081         }
0082 
0083         /* We must not map this if we have highmem enabled */
0084         if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
0085             break;
0086 
0087         pte = pte_offset_map(pmd, addr);
0088         pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
0089 #ifndef CONFIG_ARM_LPAE
0090         pr_cont(", *ppte=%08llx",
0091                (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
0092 #endif
0093         pte_unmap(pte);
0094     } while(0);
0095 
0096     pr_cont("\n");
0097 }
0098 #else                   /* CONFIG_MMU */
0099 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
0100 { }
0101 #endif                  /* CONFIG_MMU */
0102 
0103 static inline bool is_write_fault(unsigned int fsr)
0104 {
0105     return (fsr & FSR_WRITE) && !(fsr & FSR_CM);
0106 }
0107 
0108 static void die_kernel_fault(const char *msg, struct mm_struct *mm,
0109                  unsigned long addr, unsigned int fsr,
0110                  struct pt_regs *regs)
0111 {
0112     bust_spinlocks(1);
0113     pr_alert("8<--- cut here ---\n");
0114     pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
0115          msg, addr);
0116 
0117     show_pte(KERN_ALERT, mm, addr);
0118     die("Oops", regs, fsr);
0119     bust_spinlocks(0);
0120     make_task_dead(SIGKILL);
0121 }
0122 
0123 /*
0124  * Oops.  The kernel tried to access some page that wasn't present.
0125  */
0126 static void
0127 __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
0128           struct pt_regs *regs)
0129 {
0130     const char *msg;
0131     /*
0132      * Are we prepared to handle this kernel fault?
0133      */
0134     if (fixup_exception(regs))
0135         return;
0136 
0137     /*
0138      * No handler, we'll have to terminate things with extreme prejudice.
0139      */
0140     if (addr < PAGE_SIZE) {
0141         msg = "NULL pointer dereference";
0142     } else {
0143         if (kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
0144             return;
0145 
0146         msg = "paging request";
0147     }
0148 
0149     die_kernel_fault(msg, mm, addr, fsr, regs);
0150 }
0151 
0152 /*
0153  * Something tried to access memory that isn't in our memory map..
0154  * User mode accesses just cause a SIGSEGV
0155  */
0156 static void
0157 __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
0158         int code, struct pt_regs *regs)
0159 {
0160     struct task_struct *tsk = current;
0161 
0162     if (addr > TASK_SIZE)
0163         harden_branch_predictor();
0164 
0165 #ifdef CONFIG_DEBUG_USER
0166     if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
0167         ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {
0168         pr_err("8<--- cut here ---\n");
0169         pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
0170                tsk->comm, sig, addr, fsr);
0171         show_pte(KERN_ERR, tsk->mm, addr);
0172         show_regs(regs);
0173     }
0174 #endif
0175 #ifndef CONFIG_KUSER_HELPERS
0176     if ((sig == SIGSEGV) && ((addr & PAGE_MASK) == 0xffff0000))
0177         printk_ratelimited(KERN_DEBUG
0178                    "%s: CONFIG_KUSER_HELPERS disabled at 0x%08lx\n",
0179                    tsk->comm, addr);
0180 #endif
0181 
0182     tsk->thread.address = addr;
0183     tsk->thread.error_code = fsr;
0184     tsk->thread.trap_no = 14;
0185     force_sig_fault(sig, code, (void __user *)addr);
0186 }
0187 
0188 void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
0189 {
0190     struct task_struct *tsk = current;
0191     struct mm_struct *mm = tsk->active_mm;
0192 
0193     /*
0194      * If we are in kernel mode at this point, we
0195      * have no context to handle this fault with.
0196      */
0197     if (user_mode(regs))
0198         __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
0199     else
0200         __do_kernel_fault(mm, addr, fsr, regs);
0201 }
0202 
0203 #ifdef CONFIG_MMU
0204 #define VM_FAULT_BADMAP     ((__force vm_fault_t)0x010000)
0205 #define VM_FAULT_BADACCESS  ((__force vm_fault_t)0x020000)
0206 
0207 static inline bool is_permission_fault(unsigned int fsr)
0208 {
0209     int fs = fsr_fs(fsr);
0210 #ifdef CONFIG_ARM_LPAE
0211     if ((fs & FS_PERM_NOLL_MASK) == FS_PERM_NOLL)
0212         return true;
0213 #else
0214     if (fs == FS_L1_PERM || fs == FS_L2_PERM)
0215         return true;
0216 #endif
0217     return false;
0218 }
0219 
0220 static vm_fault_t __kprobes
0221 __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int flags,
0222         unsigned long vma_flags, struct pt_regs *regs)
0223 {
0224     struct vm_area_struct *vma = find_vma(mm, addr);
0225     if (unlikely(!vma))
0226         return VM_FAULT_BADMAP;
0227 
0228     if (unlikely(vma->vm_start > addr)) {
0229         if (!(vma->vm_flags & VM_GROWSDOWN))
0230             return VM_FAULT_BADMAP;
0231         if (addr < FIRST_USER_ADDRESS)
0232             return VM_FAULT_BADMAP;
0233         if (expand_stack(vma, addr))
0234             return VM_FAULT_BADMAP;
0235     }
0236 
0237     /*
0238      * ok, we have a good vm_area for this memory access, check the
0239      * permissions on the VMA allow for the fault which occurred.
0240      */
0241     if (!(vma->vm_flags & vma_flags))
0242         return VM_FAULT_BADACCESS;
0243 
0244     return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
0245 }
0246 
0247 static int __kprobes
0248 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
0249 {
0250     struct mm_struct *mm = current->mm;
0251     int sig, code;
0252     vm_fault_t fault;
0253     unsigned int flags = FAULT_FLAG_DEFAULT;
0254     unsigned long vm_flags = VM_ACCESS_FLAGS;
0255 
0256     if (kprobe_page_fault(regs, fsr))
0257         return 0;
0258 
0259 
0260     /* Enable interrupts if they were enabled in the parent context. */
0261     if (interrupts_enabled(regs))
0262         local_irq_enable();
0263 
0264     /*
0265      * If we're in an interrupt or have no user
0266      * context, we must not take the fault..
0267      */
0268     if (faulthandler_disabled() || !mm)
0269         goto no_context;
0270 
0271     if (user_mode(regs))
0272         flags |= FAULT_FLAG_USER;
0273 
0274     if (is_write_fault(fsr)) {
0275         flags |= FAULT_FLAG_WRITE;
0276         vm_flags = VM_WRITE;
0277     }
0278 
0279     if (fsr & FSR_LNX_PF) {
0280         vm_flags = VM_EXEC;
0281 
0282         if (is_permission_fault(fsr) && !user_mode(regs))
0283             die_kernel_fault("execution of memory",
0284                      mm, addr, fsr, regs);
0285     }
0286 
0287     perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
0288 
0289     /*
0290      * As per x86, we may deadlock here.  However, since the kernel only
0291      * validly references user space from well defined areas of the code,
0292      * we can bug out early if this is from code which shouldn't.
0293      */
0294     if (!mmap_read_trylock(mm)) {
0295         if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
0296             goto no_context;
0297 retry:
0298         mmap_read_lock(mm);
0299     } else {
0300         /*
0301          * The above down_read_trylock() might have succeeded in
0302          * which case, we'll have missed the might_sleep() from
0303          * down_read()
0304          */
0305         might_sleep();
0306 #ifdef CONFIG_DEBUG_VM
0307         if (!user_mode(regs) &&
0308             !search_exception_tables(regs->ARM_pc))
0309             goto no_context;
0310 #endif
0311     }
0312 
0313     fault = __do_page_fault(mm, addr, flags, vm_flags, regs);
0314 
0315     /* If we need to retry but a fatal signal is pending, handle the
0316      * signal first. We do not need to release the mmap_lock because
0317      * it would already be released in __lock_page_or_retry in
0318      * mm/filemap.c. */
0319     if (fault_signal_pending(fault, regs)) {
0320         if (!user_mode(regs))
0321             goto no_context;
0322         return 0;
0323     }
0324 
0325     /* The fault is fully completed (including releasing mmap lock) */
0326     if (fault & VM_FAULT_COMPLETED)
0327         return 0;
0328 
0329     if (!(fault & VM_FAULT_ERROR)) {
0330         if (fault & VM_FAULT_RETRY) {
0331             flags |= FAULT_FLAG_TRIED;
0332             goto retry;
0333         }
0334     }
0335 
0336     mmap_read_unlock(mm);
0337 
0338     /*
0339      * Handle the "normal" case first - VM_FAULT_MAJOR
0340      */
0341     if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
0342         return 0;
0343 
0344     /*
0345      * If we are in kernel mode at this point, we
0346      * have no context to handle this fault with.
0347      */
0348     if (!user_mode(regs))
0349         goto no_context;
0350 
0351     if (fault & VM_FAULT_OOM) {
0352         /*
0353          * We ran out of memory, call the OOM killer, and return to
0354          * userspace (which will retry the fault, or kill us if we
0355          * got oom-killed)
0356          */
0357         pagefault_out_of_memory();
0358         return 0;
0359     }
0360 
0361     if (fault & VM_FAULT_SIGBUS) {
0362         /*
0363          * We had some memory, but were unable to
0364          * successfully fix up this page fault.
0365          */
0366         sig = SIGBUS;
0367         code = BUS_ADRERR;
0368     } else {
0369         /*
0370          * Something tried to access memory that
0371          * isn't in our memory map..
0372          */
0373         sig = SIGSEGV;
0374         code = fault == VM_FAULT_BADACCESS ?
0375             SEGV_ACCERR : SEGV_MAPERR;
0376     }
0377 
0378     __do_user_fault(addr, fsr, sig, code, regs);
0379     return 0;
0380 
0381 no_context:
0382     __do_kernel_fault(mm, addr, fsr, regs);
0383     return 0;
0384 }
0385 #else                   /* CONFIG_MMU */
0386 static int
0387 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
0388 {
0389     return 0;
0390 }
0391 #endif                  /* CONFIG_MMU */
0392 
0393 /*
0394  * First Level Translation Fault Handler
0395  *
0396  * We enter here because the first level page table doesn't contain
0397  * a valid entry for the address.
0398  *
0399  * If the address is in kernel space (>= TASK_SIZE), then we are
0400  * probably faulting in the vmalloc() area.
0401  *
0402  * If the init_task's first level page tables contains the relevant
0403  * entry, we copy the it to this task.  If not, we send the process
0404  * a signal, fixup the exception, or oops the kernel.
0405  *
0406  * NOTE! We MUST NOT take any locks for this case. We may be in an
0407  * interrupt or a critical region, and should only copy the information
0408  * from the master page table, nothing more.
0409  */
0410 #ifdef CONFIG_MMU
0411 static int __kprobes
0412 do_translation_fault(unsigned long addr, unsigned int fsr,
0413              struct pt_regs *regs)
0414 {
0415     unsigned int index;
0416     pgd_t *pgd, *pgd_k;
0417     p4d_t *p4d, *p4d_k;
0418     pud_t *pud, *pud_k;
0419     pmd_t *pmd, *pmd_k;
0420 
0421     if (addr < TASK_SIZE)
0422         return do_page_fault(addr, fsr, regs);
0423 
0424     if (user_mode(regs))
0425         goto bad_area;
0426 
0427     index = pgd_index(addr);
0428 
0429     pgd = cpu_get_pgd() + index;
0430     pgd_k = init_mm.pgd + index;
0431 
0432     p4d = p4d_offset(pgd, addr);
0433     p4d_k = p4d_offset(pgd_k, addr);
0434 
0435     if (p4d_none(*p4d_k))
0436         goto bad_area;
0437     if (!p4d_present(*p4d))
0438         set_p4d(p4d, *p4d_k);
0439 
0440     pud = pud_offset(p4d, addr);
0441     pud_k = pud_offset(p4d_k, addr);
0442 
0443     if (pud_none(*pud_k))
0444         goto bad_area;
0445     if (!pud_present(*pud))
0446         set_pud(pud, *pud_k);
0447 
0448     pmd = pmd_offset(pud, addr);
0449     pmd_k = pmd_offset(pud_k, addr);
0450 
0451 #ifdef CONFIG_ARM_LPAE
0452     /*
0453      * Only one hardware entry per PMD with LPAE.
0454      */
0455     index = 0;
0456 #else
0457     /*
0458      * On ARM one Linux PGD entry contains two hardware entries (see page
0459      * tables layout in pgtable.h). We normally guarantee that we always
0460      * fill both L1 entries. But create_mapping() doesn't follow the rule.
0461      * It can create inidividual L1 entries, so here we have to call
0462      * pmd_none() check for the entry really corresponded to address, not
0463      * for the first of pair.
0464      */
0465     index = (addr >> SECTION_SHIFT) & 1;
0466 #endif
0467     if (pmd_none(pmd_k[index]))
0468         goto bad_area;
0469 
0470     copy_pmd(pmd, pmd_k);
0471     return 0;
0472 
0473 bad_area:
0474     do_bad_area(addr, fsr, regs);
0475     return 0;
0476 }
0477 #else                   /* CONFIG_MMU */
0478 static int
0479 do_translation_fault(unsigned long addr, unsigned int fsr,
0480              struct pt_regs *regs)
0481 {
0482     return 0;
0483 }
0484 #endif                  /* CONFIG_MMU */
0485 
0486 /*
0487  * Some section permission faults need to be handled gracefully.
0488  * They can happen due to a __{get,put}_user during an oops.
0489  */
0490 #ifndef CONFIG_ARM_LPAE
0491 static int
0492 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
0493 {
0494     do_bad_area(addr, fsr, regs);
0495     return 0;
0496 }
0497 #endif /* CONFIG_ARM_LPAE */
0498 
0499 /*
0500  * This abort handler always returns "fault".
0501  */
0502 static int
0503 do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
0504 {
0505     return 1;
0506 }
0507 
0508 struct fsr_info {
0509     int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
0510     int sig;
0511     int code;
0512     const char *name;
0513 };
0514 
0515 /* FSR definition */
0516 #ifdef CONFIG_ARM_LPAE
0517 #include "fsr-3level.c"
0518 #else
0519 #include "fsr-2level.c"
0520 #endif
0521 
0522 void __init
0523 hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
0524         int sig, int code, const char *name)
0525 {
0526     if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
0527         BUG();
0528 
0529     fsr_info[nr].fn   = fn;
0530     fsr_info[nr].sig  = sig;
0531     fsr_info[nr].code = code;
0532     fsr_info[nr].name = name;
0533 }
0534 
0535 /*
0536  * Dispatch a data abort to the relevant handler.
0537  */
0538 asmlinkage void
0539 do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
0540 {
0541     const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
0542 
0543     if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
0544         return;
0545 
0546     pr_alert("8<--- cut here ---\n");
0547     pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
0548         inf->name, fsr, addr);
0549     show_pte(KERN_ALERT, current->mm, addr);
0550 
0551     arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
0552                fsr, 0);
0553 }
0554 
0555 void __init
0556 hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
0557          int sig, int code, const char *name)
0558 {
0559     if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info))
0560         BUG();
0561 
0562     ifsr_info[nr].fn   = fn;
0563     ifsr_info[nr].sig  = sig;
0564     ifsr_info[nr].code = code;
0565     ifsr_info[nr].name = name;
0566 }
0567 
0568 asmlinkage void
0569 do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
0570 {
0571     const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
0572 
0573     if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
0574         return;
0575 
0576     pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
0577         inf->name, ifsr, addr);
0578 
0579     arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
0580                ifsr, 0);
0581 }
0582 
0583 /*
0584  * Abort handler to be used only during first unmasking of asynchronous aborts
0585  * on the boot CPU. This makes sure that the machine will not die if the
0586  * firmware/bootloader left an imprecise abort pending for us to trip over.
0587  */
0588 static int __init early_abort_handler(unsigned long addr, unsigned int fsr,
0589                       struct pt_regs *regs)
0590 {
0591     pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during "
0592         "first unmask, this is most likely caused by a "
0593         "firmware/bootloader bug.\n", fsr);
0594 
0595     return 0;
0596 }
0597 
0598 void __init early_abt_enable(void)
0599 {
0600     fsr_info[FSR_FS_AEA].fn = early_abort_handler;
0601     local_abt_enable();
0602     fsr_info[FSR_FS_AEA].fn = do_bad;
0603 }
0604 
0605 #ifndef CONFIG_ARM_LPAE
0606 static int __init exceptions_init(void)
0607 {
0608     if (cpu_architecture() >= CPU_ARCH_ARMv6) {
0609         hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
0610                 "I-cache maintenance fault");
0611     }
0612 
0613     if (cpu_architecture() >= CPU_ARCH_ARMv7) {
0614         /*
0615          * TODO: Access flag faults introduced in ARMv6K.
0616          * Runtime check for 'K' extension is needed
0617          */
0618         hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
0619                 "section access flag fault");
0620         hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
0621                 "section access flag fault");
0622     }
0623 
0624     return 0;
0625 }
0626 
0627 arch_initcall(exceptions_init);
0628 #endif