Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *  Copyright (C) 1995-1996  Gary Thomas (gdt@linuxppc.org)
0004  *  Copyright 2007-2010 Freescale Semiconductor, Inc.
0005  *
0006  *  Modified by Cort Dougan (cort@cs.nmt.edu)
0007  *  and Paul Mackerras (paulus@samba.org)
0008  */
0009 
0010 /*
0011  * This file handles the architecture-dependent parts of hardware exceptions
0012  */
0013 
0014 #include <linux/errno.h>
0015 #include <linux/sched.h>
0016 #include <linux/sched/debug.h>
0017 #include <linux/kernel.h>
0018 #include <linux/mm.h>
0019 #include <linux/pkeys.h>
0020 #include <linux/stddef.h>
0021 #include <linux/unistd.h>
0022 #include <linux/ptrace.h>
0023 #include <linux/user.h>
0024 #include <linux/interrupt.h>
0025 #include <linux/init.h>
0026 #include <linux/extable.h>
0027 #include <linux/module.h>   /* print_modules */
0028 #include <linux/prctl.h>
0029 #include <linux/delay.h>
0030 #include <linux/kprobes.h>
0031 #include <linux/kexec.h>
0032 #include <linux/backlight.h>
0033 #include <linux/bug.h>
0034 #include <linux/kdebug.h>
0035 #include <linux/ratelimit.h>
0036 #include <linux/context_tracking.h>
0037 #include <linux/smp.h>
0038 #include <linux/console.h>
0039 #include <linux/kmsg_dump.h>
0040 #include <linux/debugfs.h>
0041 
0042 #include <asm/emulated_ops.h>
0043 #include <linux/uaccess.h>
0044 #include <asm/interrupt.h>
0045 #include <asm/io.h>
0046 #include <asm/machdep.h>
0047 #include <asm/rtas.h>
0048 #include <asm/pmc.h>
0049 #include <asm/reg.h>
0050 #ifdef CONFIG_PMAC_BACKLIGHT
0051 #include <asm/backlight.h>
0052 #endif
0053 #ifdef CONFIG_PPC64
0054 #include <asm/firmware.h>
0055 #include <asm/processor.h>
0056 #endif
0057 #include <asm/kexec.h>
0058 #include <asm/ppc-opcode.h>
0059 #include <asm/rio.h>
0060 #include <asm/fadump.h>
0061 #include <asm/switch_to.h>
0062 #include <asm/tm.h>
0063 #include <asm/debug.h>
0064 #include <asm/asm-prototypes.h>
0065 #include <asm/hmi.h>
0066 #include <sysdev/fsl_pci.h>
0067 #include <asm/kprobes.h>
0068 #include <asm/stacktrace.h>
0069 #include <asm/nmi.h>
0070 #include <asm/disassemble.h>
0071 
0072 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
0073 int (*__debugger)(struct pt_regs *regs) __read_mostly;
0074 int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
0075 int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
0076 int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly;
0077 int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly;
0078 int (*__debugger_break_match)(struct pt_regs *regs) __read_mostly;
0079 int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly;
0080 
0081 EXPORT_SYMBOL(__debugger);
0082 EXPORT_SYMBOL(__debugger_ipi);
0083 EXPORT_SYMBOL(__debugger_bpt);
0084 EXPORT_SYMBOL(__debugger_sstep);
0085 EXPORT_SYMBOL(__debugger_iabr_match);
0086 EXPORT_SYMBOL(__debugger_break_match);
0087 EXPORT_SYMBOL(__debugger_fault_handler);
0088 #endif
0089 
0090 /* Transactional Memory trap debug */
0091 #ifdef TM_DEBUG_SW
0092 #define TM_DEBUG(x...) printk(KERN_INFO x)
0093 #else
0094 #define TM_DEBUG(x...) do { } while(0)
0095 #endif
0096 
0097 static const char *signame(int signr)
0098 {
0099     switch (signr) {
0100     case SIGBUS:    return "bus error";
0101     case SIGFPE:    return "floating point exception";
0102     case SIGILL:    return "illegal instruction";
0103     case SIGSEGV:   return "segfault";
0104     case SIGTRAP:   return "unhandled trap";
0105     }
0106 
0107     return "unknown signal";
0108 }
0109 
0110 /*
0111  * Trap & Exception support
0112  */
0113 
0114 #ifdef CONFIG_PMAC_BACKLIGHT
0115 static void pmac_backlight_unblank(void)
0116 {
0117     mutex_lock(&pmac_backlight_mutex);
0118     if (pmac_backlight) {
0119         struct backlight_properties *props;
0120 
0121         props = &pmac_backlight->props;
0122         props->brightness = props->max_brightness;
0123         props->power = FB_BLANK_UNBLANK;
0124         backlight_update_status(pmac_backlight);
0125     }
0126     mutex_unlock(&pmac_backlight_mutex);
0127 }
0128 #else
0129 static inline void pmac_backlight_unblank(void) { }
0130 #endif
0131 
0132 /*
0133  * If oops/die is expected to crash the machine, return true here.
0134  *
0135  * This should not be expected to be 100% accurate, there may be
0136  * notifiers registered or other unexpected conditions that may bring
0137  * down the kernel. Or if the current process in the kernel is holding
0138  * locks or has other critical state, the kernel may become effectively
0139  * unusable anyway.
0140  */
0141 bool die_will_crash(void)
0142 {
0143     if (should_fadump_crash())
0144         return true;
0145     if (kexec_should_crash(current))
0146         return true;
0147     if (in_interrupt() || panic_on_oops ||
0148             !current->pid || is_global_init(current))
0149         return true;
0150 
0151     return false;
0152 }
0153 
0154 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
0155 static int die_owner = -1;
0156 static unsigned int die_nest_count;
0157 static int die_counter;
0158 
0159 extern void panic_flush_kmsg_start(void)
0160 {
0161     /*
0162      * These are mostly taken from kernel/panic.c, but tries to do
0163      * relatively minimal work. Don't use delay functions (TB may
0164      * be broken), don't crash dump (need to set a firmware log),
0165      * don't run notifiers. We do want to get some information to
0166      * Linux console.
0167      */
0168     console_verbose();
0169     bust_spinlocks(1);
0170 }
0171 
0172 extern void panic_flush_kmsg_end(void)
0173 {
0174     kmsg_dump(KMSG_DUMP_PANIC);
0175     bust_spinlocks(0);
0176     debug_locks_off();
0177     console_flush_on_panic(CONSOLE_FLUSH_PENDING);
0178 }
0179 
0180 static unsigned long oops_begin(struct pt_regs *regs)
0181 {
0182     int cpu;
0183     unsigned long flags;
0184 
0185     oops_enter();
0186 
0187     /* racy, but better than risking deadlock. */
0188     raw_local_irq_save(flags);
0189     cpu = smp_processor_id();
0190     if (!arch_spin_trylock(&die_lock)) {
0191         if (cpu == die_owner)
0192             /* nested oops. should stop eventually */;
0193         else
0194             arch_spin_lock(&die_lock);
0195     }
0196     die_nest_count++;
0197     die_owner = cpu;
0198     console_verbose();
0199     bust_spinlocks(1);
0200     if (machine_is(powermac))
0201         pmac_backlight_unblank();
0202     return flags;
0203 }
0204 NOKPROBE_SYMBOL(oops_begin);
0205 
0206 static void oops_end(unsigned long flags, struct pt_regs *regs,
0207                    int signr)
0208 {
0209     bust_spinlocks(0);
0210     add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
0211     die_nest_count--;
0212     oops_exit();
0213     printk("\n");
0214     if (!die_nest_count) {
0215         /* Nest count reaches zero, release the lock. */
0216         die_owner = -1;
0217         arch_spin_unlock(&die_lock);
0218     }
0219     raw_local_irq_restore(flags);
0220 
0221     /*
0222      * system_reset_excption handles debugger, crash dump, panic, for 0x100
0223      */
0224     if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
0225         return;
0226 
0227     crash_fadump(regs, "die oops");
0228 
0229     if (kexec_should_crash(current))
0230         crash_kexec(regs);
0231 
0232     if (!signr)
0233         return;
0234 
0235     /*
0236      * While our oops output is serialised by a spinlock, output
0237      * from panic() called below can race and corrupt it. If we
0238      * know we are going to panic, delay for 1 second so we have a
0239      * chance to get clean backtraces from all CPUs that are oopsing.
0240      */
0241     if (in_interrupt() || panic_on_oops || !current->pid ||
0242         is_global_init(current)) {
0243         mdelay(MSEC_PER_SEC);
0244     }
0245 
0246     if (panic_on_oops)
0247         panic("Fatal exception");
0248     make_task_dead(signr);
0249 }
0250 NOKPROBE_SYMBOL(oops_end);
0251 
0252 static char *get_mmu_str(void)
0253 {
0254     if (early_radix_enabled())
0255         return " MMU=Radix";
0256     if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
0257         return " MMU=Hash";
0258     return "";
0259 }
0260 
0261 static int __die(const char *str, struct pt_regs *regs, long err)
0262 {
0263     printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
0264 
0265     printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
0266            IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
0267            PAGE_SIZE / 1024, get_mmu_str(),
0268            IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
0269            IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
0270            IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
0271            debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
0272            IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
0273            ppc_md.name ? ppc_md.name : "");
0274 
0275     if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
0276         return 1;
0277 
0278     print_modules();
0279     show_regs(regs);
0280 
0281     return 0;
0282 }
0283 NOKPROBE_SYMBOL(__die);
0284 
0285 void die(const char *str, struct pt_regs *regs, long err)
0286 {
0287     unsigned long flags;
0288 
0289     /*
0290      * system_reset_excption handles debugger, crash dump, panic, for 0x100
0291      */
0292     if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) {
0293         if (debugger(regs))
0294             return;
0295     }
0296 
0297     flags = oops_begin(regs);
0298     if (__die(str, regs, err))
0299         err = 0;
0300     oops_end(flags, regs, err);
0301 }
0302 NOKPROBE_SYMBOL(die);
0303 
0304 void user_single_step_report(struct pt_regs *regs)
0305 {
0306     force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
0307 }
0308 
0309 static void show_signal_msg(int signr, struct pt_regs *regs, int code,
0310                 unsigned long addr)
0311 {
0312     static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
0313                       DEFAULT_RATELIMIT_BURST);
0314 
0315     if (!show_unhandled_signals)
0316         return;
0317 
0318     if (!unhandled_signal(current, signr))
0319         return;
0320 
0321     if (!__ratelimit(&rs))
0322         return;
0323 
0324     pr_info("%s[%d]: %s (%d) at %lx nip %lx lr %lx code %x",
0325         current->comm, current->pid, signame(signr), signr,
0326         addr, regs->nip, regs->link, code);
0327 
0328     print_vma_addr(KERN_CONT " in ", regs->nip);
0329 
0330     pr_cont("\n");
0331 
0332     show_user_instructions(regs);
0333 }
0334 
0335 static bool exception_common(int signr, struct pt_regs *regs, int code,
0336                   unsigned long addr)
0337 {
0338     if (!user_mode(regs)) {
0339         die("Exception in kernel mode", regs, signr);
0340         return false;
0341     }
0342 
0343     /*
0344      * Must not enable interrupts even for user-mode exception, because
0345      * this can be called from machine check, which may be a NMI or IRQ
0346      * which don't like interrupts being enabled. Could check for
0347      * in_hardirq || in_nmi perhaps, but there doesn't seem to be a good
0348      * reason why _exception() should enable irqs for an exception handler,
0349      * the handlers themselves do that directly.
0350      */
0351 
0352     show_signal_msg(signr, regs, code, addr);
0353 
0354     current->thread.trap_nr = code;
0355 
0356     return true;
0357 }
0358 
0359 void _exception_pkey(struct pt_regs *regs, unsigned long addr, int key)
0360 {
0361     if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
0362         return;
0363 
0364     force_sig_pkuerr((void __user *) addr, key);
0365 }
0366 
0367 void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
0368 {
0369     if (!exception_common(signr, regs, code, addr))
0370         return;
0371 
0372     force_sig_fault(signr, code, (void __user *)addr);
0373 }
0374 
0375 /*
0376  * The interrupt architecture has a quirk in that the HV interrupts excluding
0377  * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
0378  * that an interrupt handler must do is save off a GPR into a scratch register,
0379  * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
0380  * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
0381  * that it is non-reentrant, which leads to random data corruption.
0382  *
0383  * The solution is for NMI interrupts in HV mode to check if they originated
0384  * from these critical HV interrupt regions. If so, then mark them not
0385  * recoverable.
0386  *
0387  * An alternative would be for HV NMIs to use SPRG for scratch to avoid the
0388  * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
0389  * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
0390  * that would work. However any other guest OS that may have the SPRG live
0391  * and MSR[RI]=1 could encounter silent corruption.
0392  *
0393  * Builds that do not support KVM could take this second option to increase
0394  * the recoverability of NMIs.
0395  */
0396 noinstr void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
0397 {
0398 #ifdef CONFIG_PPC_POWERNV
0399     unsigned long kbase = (unsigned long)_stext;
0400     unsigned long nip = regs->nip;
0401 
0402     if (!(regs->msr & MSR_RI))
0403         return;
0404     if (!(regs->msr & MSR_HV))
0405         return;
0406     if (regs->msr & MSR_PR)
0407         return;
0408 
0409     /*
0410      * Now test if the interrupt has hit a range that may be using
0411      * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
0412      * problem ranges all run un-relocated. Test real and virt modes
0413      * at the same time by dropping the high bit of the nip (virt mode
0414      * entry points still have the +0x4000 offset).
0415      */
0416     nip &= ~0xc000000000000000ULL;
0417     if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600))
0418         goto nonrecoverable;
0419     if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00))
0420         goto nonrecoverable;
0421     if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0))
0422         goto nonrecoverable;
0423     if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0))
0424         goto nonrecoverable;
0425 
0426     /* Trampoline code runs un-relocated so subtract kbase. */
0427     if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
0428             nip < (unsigned long)(end_real_trampolines - kbase))
0429         goto nonrecoverable;
0430     if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
0431             nip < (unsigned long)(end_virt_trampolines - kbase))
0432         goto nonrecoverable;
0433     return;
0434 
0435 nonrecoverable:
0436     regs->msr &= ~MSR_RI;
0437     local_paca->hsrr_valid = 0;
0438     local_paca->srr_valid = 0;
0439 #endif
0440 }
0441 DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
0442 {
0443     unsigned long hsrr0, hsrr1;
0444     bool saved_hsrrs = false;
0445 
0446     /*
0447      * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
0448      * The system reset interrupt itself may clobber HSRRs (e.g., to call
0449      * OPAL), so save them here and restore them before returning.
0450      *
0451      * Machine checks don't need to save HSRRs, as the real mode handler
0452      * is careful to avoid them, and the regular handler is not delivered
0453      * as an NMI.
0454      */
0455     if (cpu_has_feature(CPU_FTR_HVMODE)) {
0456         hsrr0 = mfspr(SPRN_HSRR0);
0457         hsrr1 = mfspr(SPRN_HSRR1);
0458         saved_hsrrs = true;
0459     }
0460 
0461     hv_nmi_check_nonrecoverable(regs);
0462 
0463     __this_cpu_inc(irq_stat.sreset_irqs);
0464 
0465     /* See if any machine dependent calls */
0466     if (ppc_md.system_reset_exception) {
0467         if (ppc_md.system_reset_exception(regs))
0468             goto out;
0469     }
0470 
0471     if (debugger(regs))
0472         goto out;
0473 
0474     kmsg_dump(KMSG_DUMP_OOPS);
0475     /*
0476      * A system reset is a request to dump, so we always send
0477      * it through the crashdump code (if fadump or kdump are
0478      * registered).
0479      */
0480     crash_fadump(regs, "System Reset");
0481 
0482     crash_kexec(regs);
0483 
0484     /*
0485      * We aren't the primary crash CPU. We need to send it
0486      * to a holding pattern to avoid it ending up in the panic
0487      * code.
0488      */
0489     crash_kexec_secondary(regs);
0490 
0491     /*
0492      * No debugger or crash dump registered, print logs then
0493      * panic.
0494      */
0495     die("System Reset", regs, SIGABRT);
0496 
0497     mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
0498     add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
0499     nmi_panic(regs, "System Reset");
0500 
0501 out:
0502 #ifdef CONFIG_PPC_BOOK3S_64
0503     BUG_ON(get_paca()->in_nmi == 0);
0504     if (get_paca()->in_nmi > 1)
0505         die("Unrecoverable nested System Reset", regs, SIGABRT);
0506 #endif
0507     /* Must die if the interrupt is not recoverable */
0508     if (regs_is_unrecoverable(regs)) {
0509         /* For the reason explained in die_mce, nmi_exit before die */
0510         nmi_exit();
0511         die("Unrecoverable System Reset", regs, SIGABRT);
0512     }
0513 
0514     if (saved_hsrrs) {
0515         mtspr(SPRN_HSRR0, hsrr0);
0516         mtspr(SPRN_HSRR1, hsrr1);
0517     }
0518 
0519     /* What should we do here? We could issue a shutdown or hard reset. */
0520 
0521     return 0;
0522 }
0523 
0524 /*
0525  * I/O accesses can cause machine checks on powermacs.
0526  * Check if the NIP corresponds to the address of a sync
0527  * instruction for which there is an entry in the exception
0528  * table.
0529  *  -- paulus.
0530  */
0531 static inline int check_io_access(struct pt_regs *regs)
0532 {
0533 #ifdef CONFIG_PPC32
0534     unsigned long msr = regs->msr;
0535     const struct exception_table_entry *entry;
0536     unsigned int *nip = (unsigned int *)regs->nip;
0537 
0538     if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000)))
0539         && (entry = search_exception_tables(regs->nip)) != NULL) {
0540         /*
0541          * Check that it's a sync instruction, or somewhere
0542          * in the twi; isync; nop sequence that inb/inw/inl uses.
0543          * As the address is in the exception table
0544          * we should be able to read the instr there.
0545          * For the debug message, we look at the preceding
0546          * load or store.
0547          */
0548         if (*nip == PPC_RAW_NOP())
0549             nip -= 2;
0550         else if (*nip == PPC_RAW_ISYNC())
0551             --nip;
0552         if (*nip == PPC_RAW_SYNC() || get_op(*nip) == OP_TRAP) {
0553             unsigned int rb;
0554 
0555             --nip;
0556             rb = (*nip >> 11) & 0x1f;
0557             printk(KERN_DEBUG "%s bad port %lx at %p\n",
0558                    (*nip & 0x100)? "OUT to": "IN from",
0559                    regs->gpr[rb] - _IO_BASE, nip);
0560             regs_set_recoverable(regs);
0561             regs_set_return_ip(regs, extable_fixup(entry));
0562             return 1;
0563         }
0564     }
0565 #endif /* CONFIG_PPC32 */
0566     return 0;
0567 }
0568 
0569 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
0570 /* On 4xx, the reason for the machine check or program exception
0571    is in the ESR. */
0572 #define get_reason(regs)    ((regs)->esr)
0573 #define REASON_FP       ESR_FP
0574 #define REASON_ILLEGAL      (ESR_PIL | ESR_PUO)
0575 #define REASON_PRIVILEGED   ESR_PPR
0576 #define REASON_TRAP     ESR_PTR
0577 #define REASON_PREFIXED     0
0578 #define REASON_BOUNDARY     0
0579 
0580 /* single-step stuff */
0581 #define single_stepping(regs)   (current->thread.debug.dbcr0 & DBCR0_IC)
0582 #define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC)
0583 #define clear_br_trace(regs)    do {} while(0)
0584 #else
0585 /* On non-4xx, the reason for the machine check or program
0586    exception is in the MSR. */
0587 #define get_reason(regs)    ((regs)->msr)
0588 #define REASON_TM       SRR1_PROGTM
0589 #define REASON_FP       SRR1_PROGFPE
0590 #define REASON_ILLEGAL      SRR1_PROGILL
0591 #define REASON_PRIVILEGED   SRR1_PROGPRIV
0592 #define REASON_TRAP     SRR1_PROGTRAP
0593 #define REASON_PREFIXED     SRR1_PREFIXED
0594 #define REASON_BOUNDARY     SRR1_BOUNDARY
0595 
0596 #define single_stepping(regs)   ((regs)->msr & MSR_SE)
0597 #define clear_single_step(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_SE))
0598 #define clear_br_trace(regs)    (regs_set_return_msr((regs), (regs)->msr & ~MSR_BE))
0599 #endif
0600 
0601 #define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4)
0602 
0603 #if defined(CONFIG_E500)
0604 int machine_check_e500mc(struct pt_regs *regs)
0605 {
0606     unsigned long mcsr = mfspr(SPRN_MCSR);
0607     unsigned long pvr = mfspr(SPRN_PVR);
0608     unsigned long reason = mcsr;
0609     int recoverable = 1;
0610 
0611     if (reason & MCSR_LD) {
0612         recoverable = fsl_rio_mcheck_exception(regs);
0613         if (recoverable == 1)
0614             goto silent_out;
0615     }
0616 
0617     printk("Machine check in kernel mode.\n");
0618     printk("Caused by (from MCSR=%lx): ", reason);
0619 
0620     if (reason & MCSR_MCP)
0621         pr_cont("Machine Check Signal\n");
0622 
0623     if (reason & MCSR_ICPERR) {
0624         pr_cont("Instruction Cache Parity Error\n");
0625 
0626         /*
0627          * This is recoverable by invalidating the i-cache.
0628          */
0629         mtspr(SPRN_L1CSR1, mfspr(SPRN_L1CSR1) | L1CSR1_ICFI);
0630         while (mfspr(SPRN_L1CSR1) & L1CSR1_ICFI)
0631             ;
0632 
0633         /*
0634          * This will generally be accompanied by an instruction
0635          * fetch error report -- only treat MCSR_IF as fatal
0636          * if it wasn't due to an L1 parity error.
0637          */
0638         reason &= ~MCSR_IF;
0639     }
0640 
0641     if (reason & MCSR_DCPERR_MC) {
0642         pr_cont("Data Cache Parity Error\n");
0643 
0644         /*
0645          * In write shadow mode we auto-recover from the error, but it
0646          * may still get logged and cause a machine check.  We should
0647          * only treat the non-write shadow case as non-recoverable.
0648          */
0649         /* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
0650          * is not implemented but L1 data cache always runs in write
0651          * shadow mode. Hence on data cache parity errors HW will
0652          * automatically invalidate the L1 Data Cache.
0653          */
0654         if (PVR_VER(pvr) != PVR_VER_E6500) {
0655             if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
0656                 recoverable = 0;
0657         }
0658     }
0659 
0660     if (reason & MCSR_L2MMU_MHIT) {
0661         pr_cont("Hit on multiple TLB entries\n");
0662         recoverable = 0;
0663     }
0664 
0665     if (reason & MCSR_NMI)
0666         pr_cont("Non-maskable interrupt\n");
0667 
0668     if (reason & MCSR_IF) {
0669         pr_cont("Instruction Fetch Error Report\n");
0670         recoverable = 0;
0671     }
0672 
0673     if (reason & MCSR_LD) {
0674         pr_cont("Load Error Report\n");
0675         recoverable = 0;
0676     }
0677 
0678     if (reason & MCSR_ST) {
0679         pr_cont("Store Error Report\n");
0680         recoverable = 0;
0681     }
0682 
0683     if (reason & MCSR_LDG) {
0684         pr_cont("Guarded Load Error Report\n");
0685         recoverable = 0;
0686     }
0687 
0688     if (reason & MCSR_TLBSYNC)
0689         pr_cont("Simultaneous tlbsync operations\n");
0690 
0691     if (reason & MCSR_BSL2_ERR) {
0692         pr_cont("Level 2 Cache Error\n");
0693         recoverable = 0;
0694     }
0695 
0696     if (reason & MCSR_MAV) {
0697         u64 addr;
0698 
0699         addr = mfspr(SPRN_MCAR);
0700         addr |= (u64)mfspr(SPRN_MCARU) << 32;
0701 
0702         pr_cont("Machine Check %s Address: %#llx\n",
0703                reason & MCSR_MEA ? "Effective" : "Physical", addr);
0704     }
0705 
0706 silent_out:
0707     mtspr(SPRN_MCSR, mcsr);
0708     return mfspr(SPRN_MCSR) == 0 && recoverable;
0709 }
0710 
0711 int machine_check_e500(struct pt_regs *regs)
0712 {
0713     unsigned long reason = mfspr(SPRN_MCSR);
0714 
0715     if (reason & MCSR_BUS_RBERR) {
0716         if (fsl_rio_mcheck_exception(regs))
0717             return 1;
0718         if (fsl_pci_mcheck_exception(regs))
0719             return 1;
0720     }
0721 
0722     printk("Machine check in kernel mode.\n");
0723     printk("Caused by (from MCSR=%lx): ", reason);
0724 
0725     if (reason & MCSR_MCP)
0726         pr_cont("Machine Check Signal\n");
0727     if (reason & MCSR_ICPERR)
0728         pr_cont("Instruction Cache Parity Error\n");
0729     if (reason & MCSR_DCP_PERR)
0730         pr_cont("Data Cache Push Parity Error\n");
0731     if (reason & MCSR_DCPERR)
0732         pr_cont("Data Cache Parity Error\n");
0733     if (reason & MCSR_BUS_IAERR)
0734         pr_cont("Bus - Instruction Address Error\n");
0735     if (reason & MCSR_BUS_RAERR)
0736         pr_cont("Bus - Read Address Error\n");
0737     if (reason & MCSR_BUS_WAERR)
0738         pr_cont("Bus - Write Address Error\n");
0739     if (reason & MCSR_BUS_IBERR)
0740         pr_cont("Bus - Instruction Data Error\n");
0741     if (reason & MCSR_BUS_RBERR)
0742         pr_cont("Bus - Read Data Bus Error\n");
0743     if (reason & MCSR_BUS_WBERR)
0744         pr_cont("Bus - Write Data Bus Error\n");
0745     if (reason & MCSR_BUS_IPERR)
0746         pr_cont("Bus - Instruction Parity Error\n");
0747     if (reason & MCSR_BUS_RPERR)
0748         pr_cont("Bus - Read Parity Error\n");
0749 
0750     return 0;
0751 }
0752 
0753 int machine_check_generic(struct pt_regs *regs)
0754 {
0755     return 0;
0756 }
0757 #elif defined(CONFIG_PPC32)
0758 int machine_check_generic(struct pt_regs *regs)
0759 {
0760     unsigned long reason = regs->msr;
0761 
0762     printk("Machine check in kernel mode.\n");
0763     printk("Caused by (from SRR1=%lx): ", reason);
0764     switch (reason & 0x601F0000) {
0765     case 0x80000:
0766         pr_cont("Machine check signal\n");
0767         break;
0768     case 0x40000:
0769     case 0x140000:  /* 7450 MSS error and TEA */
0770         pr_cont("Transfer error ack signal\n");
0771         break;
0772     case 0x20000:
0773         pr_cont("Data parity error signal\n");
0774         break;
0775     case 0x10000:
0776         pr_cont("Address parity error signal\n");
0777         break;
0778     case 0x20000000:
0779         pr_cont("L1 Data Cache error\n");
0780         break;
0781     case 0x40000000:
0782         pr_cont("L1 Instruction Cache error\n");
0783         break;
0784     case 0x00100000:
0785         pr_cont("L2 data cache parity error\n");
0786         break;
0787     default:
0788         pr_cont("Unknown values in msr\n");
0789     }
0790     return 0;
0791 }
0792 #endif /* everything else */
0793 
0794 void die_mce(const char *str, struct pt_regs *regs, long err)
0795 {
0796     /*
0797      * The machine check wants to kill the interrupted context,
0798      * but make_task_dead() checks for in_interrupt() and panics
0799      * in that case, so exit the irq/nmi before calling die.
0800      */
0801     if (in_nmi())
0802         nmi_exit();
0803     else
0804         irq_exit();
0805     die(str, regs, err);
0806 }
0807 
0808 /*
0809  * BOOK3S_64 does not usually call this handler as a non-maskable interrupt
0810  * (it uses its own early real-mode handler to handle the MCE proper
0811  * and then raises irq_work to call this handler when interrupts are
0812  * enabled). The only time when this is not true is if the early handler
0813  * is unrecoverable, then it does call this directly to try to get a
0814  * message out.
0815  */
0816 static void __machine_check_exception(struct pt_regs *regs)
0817 {
0818     int recover = 0;
0819 
0820     __this_cpu_inc(irq_stat.mce_exceptions);
0821 
0822     add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
0823 
0824     /* See if any machine dependent calls. In theory, we would want
0825      * to call the CPU first, and call the ppc_md. one if the CPU
0826      * one returns a positive number. However there is existing code
0827      * that assumes the board gets a first chance, so let's keep it
0828      * that way for now and fix things later. --BenH.
0829      */
0830     if (ppc_md.machine_check_exception)
0831         recover = ppc_md.machine_check_exception(regs);
0832     else if (cur_cpu_spec->machine_check)
0833         recover = cur_cpu_spec->machine_check(regs);
0834 
0835     if (recover > 0)
0836         goto bail;
0837 
0838     if (debugger_fault_handler(regs))
0839         goto bail;
0840 
0841     if (check_io_access(regs))
0842         goto bail;
0843 
0844     die_mce("Machine check", regs, SIGBUS);
0845 
0846 bail:
0847     /* Must die if the interrupt is not recoverable */
0848     if (regs_is_unrecoverable(regs))
0849         die_mce("Unrecoverable Machine check", regs, SIGBUS);
0850 }
0851 
0852 #ifdef CONFIG_PPC_BOOK3S_64
0853 DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async)
0854 {
0855     __machine_check_exception(regs);
0856 }
0857 #endif
0858 DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
0859 {
0860     __machine_check_exception(regs);
0861 
0862     return 0;
0863 }
0864 
0865 DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */
0866 {
0867     die("System Management Interrupt", regs, SIGABRT);
0868 }
0869 
0870 #ifdef CONFIG_VSX
0871 static void p9_hmi_special_emu(struct pt_regs *regs)
0872 {
0873     unsigned int ra, rb, t, i, sel, instr, rc;
0874     const void __user *addr;
0875     u8 vbuf[16] __aligned(16), *vdst;
0876     unsigned long ea, msr, msr_mask;
0877     bool swap;
0878 
0879     if (__get_user(instr, (unsigned int __user *)regs->nip))
0880         return;
0881 
0882     /*
0883      * lxvb16x  opcode: 0x7c0006d8
0884      * lxvd2x   opcode: 0x7c000698
0885      * lxvh8x   opcode: 0x7c000658
0886      * lxvw4x   opcode: 0x7c000618
0887      */
0888     if ((instr & 0xfc00073e) != 0x7c000618) {
0889         pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
0890              " instr=%08x\n",
0891              smp_processor_id(), current->comm, current->pid,
0892              regs->nip, instr);
0893         return;
0894     }
0895 
0896     /* Grab vector registers into the task struct */
0897     msr = regs->msr; /* Grab msr before we flush the bits */
0898     flush_vsx_to_thread(current);
0899     enable_kernel_altivec();
0900 
0901     /*
0902      * Is userspace running with a different endian (this is rare but
0903      * not impossible)
0904      */
0905     swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
0906 
0907     /* Decode the instruction */
0908     ra = (instr >> 16) & 0x1f;
0909     rb = (instr >> 11) & 0x1f;
0910     t = (instr >> 21) & 0x1f;
0911     if (instr & 1)
0912         vdst = (u8 *)&current->thread.vr_state.vr[t];
0913     else
0914         vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
0915 
0916     /* Grab the vector address */
0917     ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
0918     if (is_32bit_task())
0919         ea &= 0xfffffffful;
0920     addr = (__force const void __user *)ea;
0921 
0922     /* Check it */
0923     if (!access_ok(addr, 16)) {
0924         pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
0925              " instr=%08x addr=%016lx\n",
0926              smp_processor_id(), current->comm, current->pid,
0927              regs->nip, instr, (unsigned long)addr);
0928         return;
0929     }
0930 
0931     /* Read the vector */
0932     rc = 0;
0933     if ((unsigned long)addr & 0xfUL)
0934         /* unaligned case */
0935         rc = __copy_from_user_inatomic(vbuf, addr, 16);
0936     else
0937         __get_user_atomic_128_aligned(vbuf, addr, rc);
0938     if (rc) {
0939         pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
0940              " instr=%08x addr=%016lx\n",
0941              smp_processor_id(), current->comm, current->pid,
0942              regs->nip, instr, (unsigned long)addr);
0943         return;
0944     }
0945 
0946     pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
0947          " instr=%08x addr=%016lx\n",
0948          smp_processor_id(), current->comm, current->pid, regs->nip,
0949          instr, (unsigned long) addr);
0950 
0951     /* Grab instruction "selector" */
0952     sel = (instr >> 6) & 3;
0953 
0954     /*
0955      * Check to make sure the facility is actually enabled. This
0956      * could happen if we get a false positive hit.
0957      *
0958      * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
0959      * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
0960      */
0961     msr_mask = MSR_VSX;
0962     if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
0963         msr_mask = MSR_VEC;
0964     if (!(msr & msr_mask)) {
0965         pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
0966              " instr=%08x msr:%016lx\n",
0967              smp_processor_id(), current->comm, current->pid,
0968              regs->nip, instr, msr);
0969         return;
0970     }
0971 
0972     /* Do logging here before we modify sel based on endian */
0973     switch (sel) {
0974     case 0: /* lxvw4x */
0975         PPC_WARN_EMULATED(lxvw4x, regs);
0976         break;
0977     case 1: /* lxvh8x */
0978         PPC_WARN_EMULATED(lxvh8x, regs);
0979         break;
0980     case 2: /* lxvd2x */
0981         PPC_WARN_EMULATED(lxvd2x, regs);
0982         break;
0983     case 3: /* lxvb16x */
0984         PPC_WARN_EMULATED(lxvb16x, regs);
0985         break;
0986     }
0987 
0988 #ifdef __LITTLE_ENDIAN__
0989     /*
0990      * An LE kernel stores the vector in the task struct as an LE
0991      * byte array (effectively swapping both the components and
0992      * the content of the components). Those instructions expect
0993      * the components to remain in ascending address order, so we
0994      * swap them back.
0995      *
0996      * If we are running a BE user space, the expectation is that
0997      * of a simple memcpy, so forcing the emulation to look like
0998      * a lxvb16x should do the trick.
0999      */
1000     if (swap)
1001         sel = 3;
1002 
1003     switch (sel) {
1004     case 0: /* lxvw4x */
1005         for (i = 0; i < 4; i++)
1006             ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
1007         break;
1008     case 1: /* lxvh8x */
1009         for (i = 0; i < 8; i++)
1010             ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
1011         break;
1012     case 2: /* lxvd2x */
1013         for (i = 0; i < 2; i++)
1014             ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
1015         break;
1016     case 3: /* lxvb16x */
1017         for (i = 0; i < 16; i++)
1018             vdst[i] = vbuf[15-i];
1019         break;
1020     }
1021 #else /* __LITTLE_ENDIAN__ */
1022     /* On a big endian kernel, a BE userspace only needs a memcpy */
1023     if (!swap)
1024         sel = 3;
1025 
1026     /* Otherwise, we need to swap the content of the components */
1027     switch (sel) {
1028     case 0: /* lxvw4x */
1029         for (i = 0; i < 4; i++)
1030             ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
1031         break;
1032     case 1: /* lxvh8x */
1033         for (i = 0; i < 8; i++)
1034             ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
1035         break;
1036     case 2: /* lxvd2x */
1037         for (i = 0; i < 2; i++)
1038             ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
1039         break;
1040     case 3: /* lxvb16x */
1041         memcpy(vdst, vbuf, 16);
1042         break;
1043     }
1044 #endif /* !__LITTLE_ENDIAN__ */
1045 
1046     /* Go to next instruction */
1047     regs_add_return_ip(regs, 4);
1048 }
1049 #endif /* CONFIG_VSX */
1050 
1051 DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
1052 {
1053     struct pt_regs *old_regs;
1054 
1055     old_regs = set_irq_regs(regs);
1056 
1057 #ifdef CONFIG_VSX
1058     /* Real mode flagged P9 special emu is needed */
1059     if (local_paca->hmi_p9_special_emu) {
1060         local_paca->hmi_p9_special_emu = 0;
1061 
1062         /*
1063          * We don't want to take page faults while doing the
1064          * emulation, we just replay the instruction if necessary.
1065          */
1066         pagefault_disable();
1067         p9_hmi_special_emu(regs);
1068         pagefault_enable();
1069     }
1070 #endif /* CONFIG_VSX */
1071 
1072     if (ppc_md.handle_hmi_exception)
1073         ppc_md.handle_hmi_exception(regs);
1074 
1075     set_irq_regs(old_regs);
1076 }
1077 
1078 DEFINE_INTERRUPT_HANDLER(unknown_exception)
1079 {
1080     printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1081            regs->nip, regs->msr, regs->trap);
1082 
1083     _exception(SIGTRAP, regs, TRAP_UNK, 0);
1084 }
1085 
1086 DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
1087 {
1088     printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1089            regs->nip, regs->msr, regs->trap);
1090 
1091     _exception(SIGTRAP, regs, TRAP_UNK, 0);
1092 }
1093 
1094 DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception)
1095 {
1096     printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
1097            regs->nip, regs->msr, regs->trap);
1098 
1099     _exception(SIGTRAP, regs, TRAP_UNK, 0);
1100 
1101     return 0;
1102 }
1103 
1104 DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
1105 {
1106     if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
1107                     5, SIGTRAP) == NOTIFY_STOP)
1108         return;
1109     if (debugger_iabr_match(regs))
1110         return;
1111     _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1112 }
1113 
1114 DEFINE_INTERRUPT_HANDLER(RunModeException)
1115 {
1116     _exception(SIGTRAP, regs, TRAP_UNK, 0);
1117 }
1118 
1119 static void __single_step_exception(struct pt_regs *regs)
1120 {
1121     clear_single_step(regs);
1122     clear_br_trace(regs);
1123 
1124     if (kprobe_post_handler(regs))
1125         return;
1126 
1127     if (notify_die(DIE_SSTEP, "single_step", regs, 5,
1128                     5, SIGTRAP) == NOTIFY_STOP)
1129         return;
1130     if (debugger_sstep(regs))
1131         return;
1132 
1133     _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
1134 }
1135 
1136 DEFINE_INTERRUPT_HANDLER(single_step_exception)
1137 {
1138     __single_step_exception(regs);
1139 }
1140 
1141 /*
1142  * After we have successfully emulated an instruction, we have to
1143  * check if the instruction was being single-stepped, and if so,
1144  * pretend we got a single-step exception.  This was pointed out
1145  * by Kumar Gala.  -- paulus
1146  */
1147 static void emulate_single_step(struct pt_regs *regs)
1148 {
1149     if (single_stepping(regs))
1150         __single_step_exception(regs);
1151 }
1152 
1153 static inline int __parse_fpscr(unsigned long fpscr)
1154 {
1155     int ret = FPE_FLTUNK;
1156 
1157     /* Invalid operation */
1158     if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
1159         ret = FPE_FLTINV;
1160 
1161     /* Overflow */
1162     else if ((fpscr & FPSCR_OE) && (fpscr & FPSCR_OX))
1163         ret = FPE_FLTOVF;
1164 
1165     /* Underflow */
1166     else if ((fpscr & FPSCR_UE) && (fpscr & FPSCR_UX))
1167         ret = FPE_FLTUND;
1168 
1169     /* Divide by zero */
1170     else if ((fpscr & FPSCR_ZE) && (fpscr & FPSCR_ZX))
1171         ret = FPE_FLTDIV;
1172 
1173     /* Inexact result */
1174     else if ((fpscr & FPSCR_XE) && (fpscr & FPSCR_XX))
1175         ret = FPE_FLTRES;
1176 
1177     return ret;
1178 }
1179 
1180 static void parse_fpe(struct pt_regs *regs)
1181 {
1182     int code = 0;
1183 
1184     flush_fp_to_thread(current);
1185 
1186 #ifdef CONFIG_PPC_FPU_REGS
1187     code = __parse_fpscr(current->thread.fp_state.fpscr);
1188 #endif
1189 
1190     _exception(SIGFPE, regs, code, regs->nip);
1191 }
1192 
1193 /*
1194  * Illegal instruction emulation support.  Originally written to
1195  * provide the PVR to user applications using the mfspr rd, PVR.
1196  * Return non-zero if we can't emulate, or -EFAULT if the associated
1197  * memory access caused an access fault.  Return zero on success.
1198  *
1199  * There are a couple of ways to do this, either "decode" the instruction
1200  * or directly match lots of bits.  In this case, matching lots of
1201  * bits is faster and easier.
1202  *
1203  */
1204 static int emulate_string_inst(struct pt_regs *regs, u32 instword)
1205 {
1206     u8 rT = (instword >> 21) & 0x1f;
1207     u8 rA = (instword >> 16) & 0x1f;
1208     u8 NB_RB = (instword >> 11) & 0x1f;
1209     u32 num_bytes;
1210     unsigned long EA;
1211     int pos = 0;
1212 
1213     /* Early out if we are an invalid form of lswx */
1214     if ((instword & PPC_INST_STRING_MASK) == PPC_INST_LSWX)
1215         if ((rT == rA) || (rT == NB_RB))
1216             return -EINVAL;
1217 
1218     EA = (rA == 0) ? 0 : regs->gpr[rA];
1219 
1220     switch (instword & PPC_INST_STRING_MASK) {
1221         case PPC_INST_LSWX:
1222         case PPC_INST_STSWX:
1223             EA += NB_RB;
1224             num_bytes = regs->xer & 0x7f;
1225             break;
1226         case PPC_INST_LSWI:
1227         case PPC_INST_STSWI:
1228             num_bytes = (NB_RB == 0) ? 32 : NB_RB;
1229             break;
1230         default:
1231             return -EINVAL;
1232     }
1233 
1234     while (num_bytes != 0)
1235     {
1236         u8 val;
1237         u32 shift = 8 * (3 - (pos & 0x3));
1238 
1239         /* if process is 32-bit, clear upper 32 bits of EA */
1240         if ((regs->msr & MSR_64BIT) == 0)
1241             EA &= 0xFFFFFFFF;
1242 
1243         switch ((instword & PPC_INST_STRING_MASK)) {
1244             case PPC_INST_LSWX:
1245             case PPC_INST_LSWI:
1246                 if (get_user(val, (u8 __user *)EA))
1247                     return -EFAULT;
1248                 /* first time updating this reg,
1249                  * zero it out */
1250                 if (pos == 0)
1251                     regs->gpr[rT] = 0;
1252                 regs->gpr[rT] |= val << shift;
1253                 break;
1254             case PPC_INST_STSWI:
1255             case PPC_INST_STSWX:
1256                 val = regs->gpr[rT] >> shift;
1257                 if (put_user(val, (u8 __user *)EA))
1258                     return -EFAULT;
1259                 break;
1260         }
1261         /* move EA to next address */
1262         EA += 1;
1263         num_bytes--;
1264 
1265         /* manage our position within the register */
1266         if (++pos == 4) {
1267             pos = 0;
1268             if (++rT == 32)
1269                 rT = 0;
1270         }
1271     }
1272 
1273     return 0;
1274 }
1275 
1276 static int emulate_popcntb_inst(struct pt_regs *regs, u32 instword)
1277 {
1278     u32 ra,rs;
1279     unsigned long tmp;
1280 
1281     ra = (instword >> 16) & 0x1f;
1282     rs = (instword >> 21) & 0x1f;
1283 
1284     tmp = regs->gpr[rs];
1285     tmp = tmp - ((tmp >> 1) & 0x5555555555555555ULL);
1286     tmp = (tmp & 0x3333333333333333ULL) + ((tmp >> 2) & 0x3333333333333333ULL);
1287     tmp = (tmp + (tmp >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
1288     regs->gpr[ra] = tmp;
1289 
1290     return 0;
1291 }
1292 
1293 static int emulate_isel(struct pt_regs *regs, u32 instword)
1294 {
1295     u8 rT = (instword >> 21) & 0x1f;
1296     u8 rA = (instword >> 16) & 0x1f;
1297     u8 rB = (instword >> 11) & 0x1f;
1298     u8 BC = (instword >> 6) & 0x1f;
1299     u8 bit;
1300     unsigned long tmp;
1301 
1302     tmp = (rA == 0) ? 0 : regs->gpr[rA];
1303     bit = (regs->ccr >> (31 - BC)) & 0x1;
1304 
1305     regs->gpr[rT] = bit ? tmp : regs->gpr[rB];
1306 
1307     return 0;
1308 }
1309 
1310 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1311 static inline bool tm_abort_check(struct pt_regs *regs, int cause)
1312 {
1313         /* If we're emulating a load/store in an active transaction, we cannot
1314          * emulate it as the kernel operates in transaction suspended context.
1315          * We need to abort the transaction.  This creates a persistent TM
1316          * abort so tell the user what caused it with a new code.
1317      */
1318     if (MSR_TM_TRANSACTIONAL(regs->msr)) {
1319         tm_enable();
1320         tm_abort(cause);
1321         return true;
1322     }
1323     return false;
1324 }
1325 #else
1326 static inline bool tm_abort_check(struct pt_regs *regs, int reason)
1327 {
1328     return false;
1329 }
1330 #endif
1331 
1332 static int emulate_instruction(struct pt_regs *regs)
1333 {
1334     u32 instword;
1335     u32 rd;
1336 
1337     if (!user_mode(regs))
1338         return -EINVAL;
1339 
1340     if (get_user(instword, (u32 __user *)(regs->nip)))
1341         return -EFAULT;
1342 
1343     /* Emulate the mfspr rD, PVR. */
1344     if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) {
1345         PPC_WARN_EMULATED(mfpvr, regs);
1346         rd = (instword >> 21) & 0x1f;
1347         regs->gpr[rd] = mfspr(SPRN_PVR);
1348         return 0;
1349     }
1350 
1351     /* Emulating the dcba insn is just a no-op.  */
1352     if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) {
1353         PPC_WARN_EMULATED(dcba, regs);
1354         return 0;
1355     }
1356 
1357     /* Emulate the mcrxr insn.  */
1358     if ((instword & PPC_INST_MCRXR_MASK) == PPC_INST_MCRXR) {
1359         int shift = (instword >> 21) & 0x1c;
1360         unsigned long msk = 0xf0000000UL >> shift;
1361 
1362         PPC_WARN_EMULATED(mcrxr, regs);
1363         regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
1364         regs->xer &= ~0xf0000000UL;
1365         return 0;
1366     }
1367 
1368     /* Emulate load/store string insn. */
1369     if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
1370         if (tm_abort_check(regs,
1371                    TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
1372             return -EINVAL;
1373         PPC_WARN_EMULATED(string, regs);
1374         return emulate_string_inst(regs, instword);
1375     }
1376 
1377     /* Emulate the popcntb (Population Count Bytes) instruction. */
1378     if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) {
1379         PPC_WARN_EMULATED(popcntb, regs);
1380         return emulate_popcntb_inst(regs, instword);
1381     }
1382 
1383     /* Emulate isel (Integer Select) instruction */
1384     if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) {
1385         PPC_WARN_EMULATED(isel, regs);
1386         return emulate_isel(regs, instword);
1387     }
1388 
1389     /* Emulate sync instruction variants */
1390     if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
1391         PPC_WARN_EMULATED(sync, regs);
1392         asm volatile("sync");
1393         return 0;
1394     }
1395 
1396 #ifdef CONFIG_PPC64
1397     /* Emulate the mfspr rD, DSCR. */
1398     if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
1399         PPC_INST_MFSPR_DSCR_USER) ||
1400          ((instword & PPC_INST_MFSPR_DSCR_MASK) ==
1401         PPC_INST_MFSPR_DSCR)) &&
1402             cpu_has_feature(CPU_FTR_DSCR)) {
1403         PPC_WARN_EMULATED(mfdscr, regs);
1404         rd = (instword >> 21) & 0x1f;
1405         regs->gpr[rd] = mfspr(SPRN_DSCR);
1406         return 0;
1407     }
1408     /* Emulate the mtspr DSCR, rD. */
1409     if ((((instword & PPC_INST_MTSPR_DSCR_USER_MASK) ==
1410         PPC_INST_MTSPR_DSCR_USER) ||
1411          ((instword & PPC_INST_MTSPR_DSCR_MASK) ==
1412         PPC_INST_MTSPR_DSCR)) &&
1413             cpu_has_feature(CPU_FTR_DSCR)) {
1414         PPC_WARN_EMULATED(mtdscr, regs);
1415         rd = (instword >> 21) & 0x1f;
1416         current->thread.dscr = regs->gpr[rd];
1417         current->thread.dscr_inherit = 1;
1418         mtspr(SPRN_DSCR, current->thread.dscr);
1419         return 0;
1420     }
1421 #endif
1422 
1423     return -EINVAL;
1424 }
1425 
1426 int is_valid_bugaddr(unsigned long addr)
1427 {
1428     return is_kernel_addr(addr);
1429 }
1430 
1431 #ifdef CONFIG_MATH_EMULATION
1432 static int emulate_math(struct pt_regs *regs)
1433 {
1434     int ret;
1435 
1436     ret = do_mathemu(regs);
1437     if (ret >= 0)
1438         PPC_WARN_EMULATED(math, regs);
1439 
1440     switch (ret) {
1441     case 0:
1442         emulate_single_step(regs);
1443         return 0;
1444     case 1: {
1445             int code = 0;
1446             code = __parse_fpscr(current->thread.fp_state.fpscr);
1447             _exception(SIGFPE, regs, code, regs->nip);
1448             return 0;
1449         }
1450     case -EFAULT:
1451         _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1452         return 0;
1453     }
1454 
1455     return -1;
1456 }
1457 #else
1458 static inline int emulate_math(struct pt_regs *regs) { return -1; }
1459 #endif
1460 
1461 static void do_program_check(struct pt_regs *regs)
1462 {
1463     unsigned int reason = get_reason(regs);
1464 
1465     /* We can now get here via a FP Unavailable exception if the core
1466      * has no FPU, in that case the reason flags will be 0 */
1467 
1468     if (reason & REASON_FP) {
1469         /* IEEE FP exception */
1470         parse_fpe(regs);
1471         return;
1472     }
1473     if (reason & REASON_TRAP) {
1474         unsigned long bugaddr;
1475         /* Debugger is first in line to stop recursive faults in
1476          * rcu_lock, notify_die, or atomic_notifier_call_chain */
1477         if (debugger_bpt(regs))
1478             return;
1479 
1480         if (kprobe_handler(regs))
1481             return;
1482 
1483         /* trap exception */
1484         if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
1485                 == NOTIFY_STOP)
1486             return;
1487 
1488         bugaddr = regs->nip;
1489         /*
1490          * Fixup bugaddr for BUG_ON() in real mode
1491          */
1492         if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
1493             bugaddr += PAGE_OFFSET;
1494 
1495         if (!(regs->msr & MSR_PR) &&  /* not user-mode */
1496             report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
1497             const struct exception_table_entry *entry;
1498 
1499             entry = search_exception_tables(bugaddr);
1500             if (entry) {
1501                 regs_set_return_ip(regs, extable_fixup(entry) + regs->nip - bugaddr);
1502                 return;
1503             }
1504         }
1505         _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
1506         return;
1507     }
1508 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1509     if (reason & REASON_TM) {
1510         /* This is a TM "Bad Thing Exception" program check.
1511          * This occurs when:
1512          * -  An rfid/hrfid/mtmsrd attempts to cause an illegal
1513          *    transition in TM states.
1514          * -  A trechkpt is attempted when transactional.
1515          * -  A treclaim is attempted when non transactional.
1516          * -  A tend is illegally attempted.
1517          * -  writing a TM SPR when transactional.
1518          *
1519          * If usermode caused this, it's done something illegal and
1520          * gets a SIGILL slap on the wrist.  We call it an illegal
1521          * operand to distinguish from the instruction just being bad
1522          * (e.g. executing a 'tend' on a CPU without TM!); it's an
1523          * illegal /placement/ of a valid instruction.
1524          */
1525         if (user_mode(regs)) {
1526             _exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
1527             return;
1528         } else {
1529             printk(KERN_EMERG "Unexpected TM Bad Thing exception "
1530                    "at %lx (msr 0x%lx) tm_scratch=%llx\n",
1531                    regs->nip, regs->msr, get_paca()->tm_scratch);
1532             die("Unrecoverable exception", regs, SIGABRT);
1533         }
1534     }
1535 #endif
1536 
1537     /*
1538      * If we took the program check in the kernel skip down to sending a
1539      * SIGILL. The subsequent cases all relate to emulating instructions
1540      * which we should only do for userspace. We also do not want to enable
1541      * interrupts for kernel faults because that might lead to further
1542      * faults, and loose the context of the original exception.
1543      */
1544     if (!user_mode(regs))
1545         goto sigill;
1546 
1547     interrupt_cond_local_irq_enable(regs);
1548 
1549     /* (reason & REASON_ILLEGAL) would be the obvious thing here,
1550      * but there seems to be a hardware bug on the 405GP (RevD)
1551      * that means ESR is sometimes set incorrectly - either to
1552      * ESR_DST (!?) or 0.  In the process of chasing this with the
1553      * hardware people - not sure if it can happen on any illegal
1554      * instruction or only on FP instructions, whether there is a
1555      * pattern to occurrences etc. -dgibson 31/Mar/2003
1556      */
1557     if (!emulate_math(regs))
1558         return;
1559 
1560     /* Try to emulate it if we should. */
1561     if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
1562         switch (emulate_instruction(regs)) {
1563         case 0:
1564             regs_add_return_ip(regs, 4);
1565             emulate_single_step(regs);
1566             return;
1567         case -EFAULT:
1568             _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
1569             return;
1570         }
1571     }
1572 
1573 sigill:
1574     if (reason & REASON_PRIVILEGED)
1575         _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
1576     else
1577         _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1578 
1579 }
1580 
1581 DEFINE_INTERRUPT_HANDLER(program_check_exception)
1582 {
1583     do_program_check(regs);
1584 }
1585 
1586 /*
1587  * This occurs when running in hypervisor mode on POWER6 or later
1588  * and an illegal instruction is encountered.
1589  */
1590 DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
1591 {
1592     regs_set_return_msr(regs, regs->msr | REASON_ILLEGAL);
1593     do_program_check(regs);
1594 }
1595 
1596 DEFINE_INTERRUPT_HANDLER(alignment_exception)
1597 {
1598     int sig, code, fixed = 0;
1599     unsigned long  reason;
1600 
1601     interrupt_cond_local_irq_enable(regs);
1602 
1603     reason = get_reason(regs);
1604     if (reason & REASON_BOUNDARY) {
1605         sig = SIGBUS;
1606         code = BUS_ADRALN;
1607         goto bad;
1608     }
1609 
1610     if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
1611         return;
1612 
1613     /* we don't implement logging of alignment exceptions */
1614     if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
1615         fixed = fix_alignment(regs);
1616 
1617     if (fixed == 1) {
1618         /* skip over emulated instruction */
1619         regs_add_return_ip(regs, inst_length(reason));
1620         emulate_single_step(regs);
1621         return;
1622     }
1623 
1624     /* Operand address was bad */
1625     if (fixed == -EFAULT) {
1626         sig = SIGSEGV;
1627         code = SEGV_ACCERR;
1628     } else {
1629         sig = SIGBUS;
1630         code = BUS_ADRALN;
1631     }
1632 bad:
1633     if (user_mode(regs))
1634         _exception(sig, regs, code, regs->dar);
1635     else
1636         bad_page_fault(regs, sig);
1637 }
1638 
1639 DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
1640 {
1641     die("Kernel stack overflow", regs, SIGSEGV);
1642 }
1643 
1644 DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
1645 {
1646     printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
1647               "%lx at %lx\n", regs->trap, regs->nip);
1648     die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
1649 }
1650 
1651 DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
1652 {
1653     if (user_mode(regs)) {
1654         /* A user program has executed an altivec instruction,
1655            but this kernel doesn't support altivec. */
1656         _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1657         return;
1658     }
1659 
1660     printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
1661             "%lx at %lx\n", regs->trap, regs->nip);
1662     die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
1663 }
1664 
1665 DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
1666 {
1667     if (user_mode(regs)) {
1668         /* A user program has executed an vsx instruction,
1669            but this kernel doesn't support vsx. */
1670         _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1671         return;
1672     }
1673 
1674     printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception "
1675             "%lx at %lx\n", regs->trap, regs->nip);
1676     die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
1677 }
1678 
1679 #ifdef CONFIG_PPC_BOOK3S_64
1680 static void tm_unavailable(struct pt_regs *regs)
1681 {
1682 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1683     if (user_mode(regs)) {
1684         current->thread.load_tm++;
1685         regs_set_return_msr(regs, regs->msr | MSR_TM);
1686         tm_enable();
1687         tm_restore_sprs(&current->thread);
1688         return;
1689     }
1690 #endif
1691     pr_emerg("Unrecoverable TM Unavailable Exception "
1692             "%lx at %lx\n", regs->trap, regs->nip);
1693     die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
1694 }
1695 
1696 DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
1697 {
1698     static char *facility_strings[] = {
1699         [FSCR_FP_LG] = "FPU",
1700         [FSCR_VECVSX_LG] = "VMX/VSX",
1701         [FSCR_DSCR_LG] = "DSCR",
1702         [FSCR_PM_LG] = "PMU SPRs",
1703         [FSCR_BHRB_LG] = "BHRB",
1704         [FSCR_TM_LG] = "TM",
1705         [FSCR_EBB_LG] = "EBB",
1706         [FSCR_TAR_LG] = "TAR",
1707         [FSCR_MSGP_LG] = "MSGP",
1708         [FSCR_SCV_LG] = "SCV",
1709         [FSCR_PREFIX_LG] = "PREFIX",
1710     };
1711     char *facility = "unknown";
1712     u64 value;
1713     u32 instword, rd;
1714     u8 status;
1715     bool hv;
1716 
1717     hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL);
1718     if (hv)
1719         value = mfspr(SPRN_HFSCR);
1720     else
1721         value = mfspr(SPRN_FSCR);
1722 
1723     status = value >> 56;
1724     if ((hv || status >= 2) &&
1725         (status < ARRAY_SIZE(facility_strings)) &&
1726         facility_strings[status])
1727         facility = facility_strings[status];
1728 
1729     /* We should not have taken this interrupt in kernel */
1730     if (!user_mode(regs)) {
1731         pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
1732              facility, status, regs->nip);
1733         die("Unexpected facility unavailable exception", regs, SIGABRT);
1734     }
1735 
1736     interrupt_cond_local_irq_enable(regs);
1737 
1738     if (status == FSCR_DSCR_LG) {
1739         /*
1740          * User is accessing the DSCR register using the problem
1741          * state only SPR number (0x03) either through a mfspr or
1742          * a mtspr instruction. If it is a write attempt through
1743          * a mtspr, then we set the inherit bit. This also allows
1744          * the user to write or read the register directly in the
1745          * future by setting via the FSCR DSCR bit. But in case it
1746          * is a read DSCR attempt through a mfspr instruction, we
1747          * just emulate the instruction instead. This code path will
1748          * always emulate all the mfspr instructions till the user
1749          * has attempted at least one mtspr instruction. This way it
1750          * preserves the same behaviour when the user is accessing
1751          * the DSCR through privilege level only SPR number (0x11)
1752          * which is emulated through illegal instruction exception.
1753          * We always leave HFSCR DSCR set.
1754          */
1755         if (get_user(instword, (u32 __user *)(regs->nip))) {
1756             pr_err("Failed to fetch the user instruction\n");
1757             return;
1758         }
1759 
1760         /* Write into DSCR (mtspr 0x03, RS) */
1761         if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
1762                 == PPC_INST_MTSPR_DSCR_USER) {
1763             rd = (instword >> 21) & 0x1f;
1764             current->thread.dscr = regs->gpr[rd];
1765             current->thread.dscr_inherit = 1;
1766             current->thread.fscr |= FSCR_DSCR;
1767             mtspr(SPRN_FSCR, current->thread.fscr);
1768         }
1769 
1770         /* Read from DSCR (mfspr RT, 0x03) */
1771         if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
1772                 == PPC_INST_MFSPR_DSCR_USER) {
1773             if (emulate_instruction(regs)) {
1774                 pr_err("DSCR based mfspr emulation failed\n");
1775                 return;
1776             }
1777             regs_add_return_ip(regs, 4);
1778             emulate_single_step(regs);
1779         }
1780         return;
1781     }
1782 
1783     if (status == FSCR_TM_LG) {
1784         /*
1785          * If we're here then the hardware is TM aware because it
1786          * generated an exception with FSRM_TM set.
1787          *
1788          * If cpu_has_feature(CPU_FTR_TM) is false, then either firmware
1789          * told us not to do TM, or the kernel is not built with TM
1790          * support.
1791          *
1792          * If both of those things are true, then userspace can spam the
1793          * console by triggering the printk() below just by continually
1794          * doing tbegin (or any TM instruction). So in that case just
1795          * send the process a SIGILL immediately.
1796          */
1797         if (!cpu_has_feature(CPU_FTR_TM))
1798             goto out;
1799 
1800         tm_unavailable(regs);
1801         return;
1802     }
1803 
1804     pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
1805         hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
1806 
1807 out:
1808     _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
1809 }
1810 #endif
1811 
1812 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1813 
1814 DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm)
1815 {
1816     /* Note:  This does not handle any kind of FP laziness. */
1817 
1818     TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",
1819          regs->nip, regs->msr);
1820 
1821         /* We can only have got here if the task started using FP after
1822          * beginning the transaction.  So, the transactional regs are just a
1823          * copy of the checkpointed ones.  But, we still need to recheckpoint
1824          * as we're enabling FP for the process; it will return, abort the
1825          * transaction, and probably retry but now with FP enabled.  So the
1826          * checkpointed FP registers need to be loaded.
1827      */
1828     tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1829 
1830     /*
1831      * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
1832      * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
1833      *
1834      * At this point, ck{fp,vr}_state contains the exact values we want to
1835      * recheckpoint.
1836      */
1837 
1838     /* Enable FP for the task: */
1839     current->thread.load_fp = 1;
1840 
1841     /*
1842      * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
1843      */
1844     tm_recheckpoint(&current->thread);
1845 }
1846 
1847 DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm)
1848 {
1849     /* See the comments in fp_unavailable_tm().  This function operates
1850      * the same way.
1851      */
1852 
1853     TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"
1854          "MSR=%lx\n",
1855          regs->nip, regs->msr);
1856     tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1857     current->thread.load_vec = 1;
1858     tm_recheckpoint(&current->thread);
1859     current->thread.used_vr = 1;
1860 }
1861 
1862 DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm)
1863 {
1864     /* See the comments in fp_unavailable_tm().  This works similarly,
1865      * though we're loading both FP and VEC registers in here.
1866      *
1867      * If FP isn't in use, load FP regs.  If VEC isn't in use, load VEC
1868      * regs.  Either way, set MSR_VSX.
1869      */
1870 
1871     TM_DEBUG("VSX Unavailable trap whilst transactional at 0x%lx,"
1872          "MSR=%lx\n",
1873          regs->nip, regs->msr);
1874 
1875     current->thread.used_vsr = 1;
1876 
1877     /* This reclaims FP and/or VR regs if they're already enabled */
1878     tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1879 
1880     current->thread.load_vec = 1;
1881     current->thread.load_fp = 1;
1882 
1883     tm_recheckpoint(&current->thread);
1884 }
1885 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
1886 
1887 #ifdef CONFIG_PPC64
1888 DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi);
1889 DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
1890 {
1891     __this_cpu_inc(irq_stat.pmu_irqs);
1892 
1893     perf_irq(regs);
1894 
1895     return 0;
1896 }
1897 #endif
1898 
1899 DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async);
1900 DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async)
1901 {
1902     __this_cpu_inc(irq_stat.pmu_irqs);
1903 
1904     perf_irq(regs);
1905 }
1906 
1907 DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
1908 {
1909     /*
1910      * On 64-bit, if perf interrupts hit in a local_irq_disable
1911      * (soft-masked) region, we consider them as NMIs. This is required to
1912      * prevent hash faults on user addresses when reading callchains (and
1913      * looks better from an irq tracing perspective).
1914      */
1915     if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
1916         performance_monitor_exception_nmi(regs);
1917     else
1918         performance_monitor_exception_async(regs);
1919 
1920     return 0;
1921 }
1922 
1923 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
1924 static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
1925 {
1926     int changed = 0;
1927     /*
1928      * Determine the cause of the debug event, clear the
1929      * event flags and send a trap to the handler. Torez
1930      */
1931     if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
1932         dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);
1933 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
1934         current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
1935 #endif
1936         do_send_trap(regs, mfspr(SPRN_DAC1), debug_status,
1937                  5);
1938         changed |= 0x01;
1939     }  else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) {
1940         dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W);
1941         do_send_trap(regs, mfspr(SPRN_DAC2), debug_status,
1942                  6);
1943         changed |= 0x01;
1944     }  else if (debug_status & DBSR_IAC1) {
1945         current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
1946         dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
1947         do_send_trap(regs, mfspr(SPRN_IAC1), debug_status,
1948                  1);
1949         changed |= 0x01;
1950     }  else if (debug_status & DBSR_IAC2) {
1951         current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
1952         do_send_trap(regs, mfspr(SPRN_IAC2), debug_status,
1953                  2);
1954         changed |= 0x01;
1955     }  else if (debug_status & DBSR_IAC3) {
1956         current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
1957         dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
1958         do_send_trap(regs, mfspr(SPRN_IAC3), debug_status,
1959                  3);
1960         changed |= 0x01;
1961     }  else if (debug_status & DBSR_IAC4) {
1962         current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
1963         do_send_trap(regs, mfspr(SPRN_IAC4), debug_status,
1964                  4);
1965         changed |= 0x01;
1966     }
1967     /*
1968      * At the point this routine was called, the MSR(DE) was turned off.
1969      * Check all other debug flags and see if that bit needs to be turned
1970      * back on or not.
1971      */
1972     if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
1973                    current->thread.debug.dbcr1))
1974         regs_set_return_msr(regs, regs->msr | MSR_DE);
1975     else
1976         /* Make sure the IDM flag is off */
1977         current->thread.debug.dbcr0 &= ~DBCR0_IDM;
1978 
1979     if (changed & 0x01)
1980         mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
1981 }
1982 
1983 DEFINE_INTERRUPT_HANDLER(DebugException)
1984 {
1985     unsigned long debug_status = regs->dsisr;
1986 
1987     current->thread.debug.dbsr = debug_status;
1988 
1989     /* Hack alert: On BookE, Branch Taken stops on the branch itself, while
1990      * on server, it stops on the target of the branch. In order to simulate
1991      * the server behaviour, we thus restart right away with a single step
1992      * instead of stopping here when hitting a BT
1993      */
1994     if (debug_status & DBSR_BT) {
1995         regs_set_return_msr(regs, regs->msr & ~MSR_DE);
1996 
1997         /* Disable BT */
1998         mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
1999         /* Clear the BT event */
2000         mtspr(SPRN_DBSR, DBSR_BT);
2001 
2002         /* Do the single step trick only when coming from userspace */
2003         if (user_mode(regs)) {
2004             current->thread.debug.dbcr0 &= ~DBCR0_BT;
2005             current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
2006             regs_set_return_msr(regs, regs->msr | MSR_DE);
2007             return;
2008         }
2009 
2010         if (kprobe_post_handler(regs))
2011             return;
2012 
2013         if (notify_die(DIE_SSTEP, "block_step", regs, 5,
2014                    5, SIGTRAP) == NOTIFY_STOP) {
2015             return;
2016         }
2017         if (debugger_sstep(regs))
2018             return;
2019     } else if (debug_status & DBSR_IC) {    /* Instruction complete */
2020         regs_set_return_msr(regs, regs->msr & ~MSR_DE);
2021 
2022         /* Disable instruction completion */
2023         mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
2024         /* Clear the instruction completion event */
2025         mtspr(SPRN_DBSR, DBSR_IC);
2026 
2027         if (kprobe_post_handler(regs))
2028             return;
2029 
2030         if (notify_die(DIE_SSTEP, "single_step", regs, 5,
2031                    5, SIGTRAP) == NOTIFY_STOP) {
2032             return;
2033         }
2034 
2035         if (debugger_sstep(regs))
2036             return;
2037 
2038         if (user_mode(regs)) {
2039             current->thread.debug.dbcr0 &= ~DBCR0_IC;
2040             if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
2041                            current->thread.debug.dbcr1))
2042                 regs_set_return_msr(regs, regs->msr | MSR_DE);
2043             else
2044                 /* Make sure the IDM bit is off */
2045                 current->thread.debug.dbcr0 &= ~DBCR0_IDM;
2046         }
2047 
2048         _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
2049     } else
2050         handle_debug(regs, debug_status);
2051 }
2052 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */
2053 
2054 #ifdef CONFIG_ALTIVEC
2055 DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
2056 {
2057     int err;
2058 
2059     if (!user_mode(regs)) {
2060         printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode"
2061                " at %lx\n", regs->nip);
2062         die("Kernel VMX/Altivec assist exception", regs, SIGILL);
2063     }
2064 
2065     flush_altivec_to_thread(current);
2066 
2067     PPC_WARN_EMULATED(altivec, regs);
2068     err = emulate_altivec(regs);
2069     if (err == 0) {
2070         regs_add_return_ip(regs, 4); /* skip emulated instruction */
2071         emulate_single_step(regs);
2072         return;
2073     }
2074 
2075     if (err == -EFAULT) {
2076         /* got an error reading the instruction */
2077         _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2078     } else {
2079         /* didn't recognize the instruction */
2080         /* XXX quick hack for now: set the non-Java bit in the VSCR */
2081         printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "
2082                    "in %s at %lx\n", current->comm, regs->nip);
2083         current->thread.vr_state.vscr.u[3] |= 0x10000;
2084     }
2085 }
2086 #endif /* CONFIG_ALTIVEC */
2087 
2088 #ifdef CONFIG_FSL_BOOKE
2089 DEFINE_INTERRUPT_HANDLER(CacheLockingException)
2090 {
2091     unsigned long error_code = regs->dsisr;
2092 
2093     /* We treat cache locking instructions from the user
2094      * as priv ops, in the future we could try to do
2095      * something smarter
2096      */
2097     if (error_code & (ESR_DLK|ESR_ILK))
2098         _exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
2099     return;
2100 }
2101 #endif /* CONFIG_FSL_BOOKE */
2102 
2103 #ifdef CONFIG_SPE
2104 DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException)
2105 {
2106     extern int do_spe_mathemu(struct pt_regs *regs);
2107     unsigned long spefscr;
2108     int fpexc_mode;
2109     int code = FPE_FLTUNK;
2110     int err;
2111 
2112     interrupt_cond_local_irq_enable(regs);
2113 
2114     flush_spe_to_thread(current);
2115 
2116     spefscr = current->thread.spefscr;
2117     fpexc_mode = current->thread.fpexc_mode;
2118 
2119     if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) {
2120         code = FPE_FLTOVF;
2121     }
2122     else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) {
2123         code = FPE_FLTUND;
2124     }
2125     else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV))
2126         code = FPE_FLTDIV;
2127     else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) {
2128         code = FPE_FLTINV;
2129     }
2130     else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES))
2131         code = FPE_FLTRES;
2132 
2133     err = do_spe_mathemu(regs);
2134     if (err == 0) {
2135         regs_add_return_ip(regs, 4); /* skip emulated instruction */
2136         emulate_single_step(regs);
2137         return;
2138     }
2139 
2140     if (err == -EFAULT) {
2141         /* got an error reading the instruction */
2142         _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2143     } else if (err == -EINVAL) {
2144         /* didn't recognize the instruction */
2145         printk(KERN_ERR "unrecognized spe instruction "
2146                "in %s at %lx\n", current->comm, regs->nip);
2147     } else {
2148         _exception(SIGFPE, regs, code, regs->nip);
2149     }
2150 
2151     return;
2152 }
2153 
2154 DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
2155 {
2156     extern int speround_handler(struct pt_regs *regs);
2157     int err;
2158 
2159     interrupt_cond_local_irq_enable(regs);
2160 
2161     preempt_disable();
2162     if (regs->msr & MSR_SPE)
2163         giveup_spe(current);
2164     preempt_enable();
2165 
2166     regs_add_return_ip(regs, -4);
2167     err = speround_handler(regs);
2168     if (err == 0) {
2169         regs_add_return_ip(regs, 4); /* skip emulated instruction */
2170         emulate_single_step(regs);
2171         return;
2172     }
2173 
2174     if (err == -EFAULT) {
2175         /* got an error reading the instruction */
2176         _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
2177     } else if (err == -EINVAL) {
2178         /* didn't recognize the instruction */
2179         printk(KERN_ERR "unrecognized spe instruction "
2180                "in %s at %lx\n", current->comm, regs->nip);
2181     } else {
2182         _exception(SIGFPE, regs, FPE_FLTUNK, regs->nip);
2183         return;
2184     }
2185 }
2186 #endif
2187 
2188 /*
2189  * We enter here if we get an unrecoverable exception, that is, one
2190  * that happened at a point where the RI (recoverable interrupt) bit
2191  * in the MSR is 0.  This indicates that SRR0/1 are live, and that
2192  * we therefore lost state by taking this exception.
2193  */
2194 void __noreturn unrecoverable_exception(struct pt_regs *regs)
2195 {
2196     pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
2197          regs->trap, regs->nip, regs->msr);
2198     die("Unrecoverable exception", regs, SIGABRT);
2199     /* die() should not return */
2200     for (;;)
2201         ;
2202 }
2203 
2204 #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
2205 /*
2206  * Default handler for a Watchdog exception,
2207  * spins until a reboot occurs
2208  */
2209 void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
2210 {
2211     /* Generic WatchdogHandler, implement your own */
2212     mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE));
2213     return;
2214 }
2215 
2216 DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException)
2217 {
2218     printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
2219     WatchdogHandler(regs);
2220     return 0;
2221 }
2222 #endif
2223 
2224 /*
2225  * We enter here if we discover during exception entry that we are
2226  * running in supervisor mode with a userspace value in the stack pointer.
2227  */
2228 DEFINE_INTERRUPT_HANDLER(kernel_bad_stack)
2229 {
2230     printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
2231            regs->gpr[1], regs->nip);
2232     die("Bad kernel stack pointer", regs, SIGABRT);
2233 }
2234 
2235 #ifdef CONFIG_PPC_EMULATED_STATS
2236 
2237 #define WARN_EMULATED_SETUP(type)   .type = { .name = #type }
2238 
2239 struct ppc_emulated ppc_emulated = {
2240 #ifdef CONFIG_ALTIVEC
2241     WARN_EMULATED_SETUP(altivec),
2242 #endif
2243     WARN_EMULATED_SETUP(dcba),
2244     WARN_EMULATED_SETUP(dcbz),
2245     WARN_EMULATED_SETUP(fp_pair),
2246     WARN_EMULATED_SETUP(isel),
2247     WARN_EMULATED_SETUP(mcrxr),
2248     WARN_EMULATED_SETUP(mfpvr),
2249     WARN_EMULATED_SETUP(multiple),
2250     WARN_EMULATED_SETUP(popcntb),
2251     WARN_EMULATED_SETUP(spe),
2252     WARN_EMULATED_SETUP(string),
2253     WARN_EMULATED_SETUP(sync),
2254     WARN_EMULATED_SETUP(unaligned),
2255 #ifdef CONFIG_MATH_EMULATION
2256     WARN_EMULATED_SETUP(math),
2257 #endif
2258 #ifdef CONFIG_VSX
2259     WARN_EMULATED_SETUP(vsx),
2260 #endif
2261 #ifdef CONFIG_PPC64
2262     WARN_EMULATED_SETUP(mfdscr),
2263     WARN_EMULATED_SETUP(mtdscr),
2264     WARN_EMULATED_SETUP(lq_stq),
2265     WARN_EMULATED_SETUP(lxvw4x),
2266     WARN_EMULATED_SETUP(lxvh8x),
2267     WARN_EMULATED_SETUP(lxvd2x),
2268     WARN_EMULATED_SETUP(lxvb16x),
2269 #endif
2270 };
2271 
2272 u32 ppc_warn_emulated;
2273 
2274 void ppc_warn_emulated_print(const char *type)
2275 {
2276     pr_warn_ratelimited("%s used emulated %s instruction\n", current->comm,
2277                 type);
2278 }
2279 
2280 static int __init ppc_warn_emulated_init(void)
2281 {
2282     struct dentry *dir;
2283     unsigned int i;
2284     struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
2285 
2286     dir = debugfs_create_dir("emulated_instructions",
2287                  arch_debugfs_dir);
2288 
2289     debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated);
2290 
2291     for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++)
2292         debugfs_create_u32(entries[i].name, 0644, dir,
2293                    (u32 *)&entries[i].val.counter);
2294 
2295     return 0;
2296 }
2297 
2298 device_initcall(ppc_warn_emulated_init);
2299 
2300 #endif /* CONFIG_PPC_EMULATED_STATS */