powerpc/kexec/crash.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Architecture specific (PPC64) functions for kexec based crash dumps.
0004  *
0005  * Copyright (C) 2005, IBM Corp.
0006  *
0007  * Created by: Haren Myneni
0008  */
0009
0010 #include <linux/kernel.h>
0011 #include <linux/smp.h>
0012 #include <linux/reboot.h>
0013 #include <linux/kexec.h>
0014 #include <linux/export.h>
0015 #include <linux/crash_dump.h>
0016 #include <linux/delay.h>
0017 #include <linux/irq.h>
0018 #include <linux/types.h>
0019
0020 #include <asm/processor.h>
0021 #include <asm/machdep.h>
0022 #include <asm/kexec.h>
0023 #include <asm/smp.h>
0024 #include <asm/setjmp.h>
0025 #include <asm/debug.h>
0026 #include <asm/interrupt.h>
0027
0028 /*
0029  * The primary CPU waits a while for all secondary CPUs to enter. This is to
0030  * avoid sending an IPI if the secondary CPUs are entering
0031  * crash_kexec_secondary on their own (eg via a system reset).
0032  *
0033  * The secondary timeout has to be longer than the primary. Both timeouts are
0034  * in milliseconds.
0035  */
0036 #define PRIMARY_TIMEOUT     500
0037 #define SECONDARY_TIMEOUT   1000
0038
0039 #define IPI_TIMEOUT     10000
0040 #define REAL_MODE_TIMEOUT   10000
0041
0042 static int time_to_dump;
0043
0044 /*
0045  * In case of system reset, secondary CPUs enter crash_kexec_secondary with out
0046  * having to send an IPI explicitly. So, indicate if the crash is via
0047  * system reset to avoid sending another IPI.
0048  */
0049 static int is_via_system_reset;
0050
0051 /*
0052  * crash_wake_offline should be set to 1 by platforms that intend to wake
0053  * up offline cpus prior to jumping to a kdump kernel. Currently powernv
0054  * sets it to 1, since we want to avoid things from happening when an
0055  * offline CPU wakes up due to something like an HMI (malfunction error),
0056  * which propagates to all threads.
0057  */
0058 int crash_wake_offline;
0059
0060 #define CRASH_HANDLER_MAX 3
0061 /* List of shutdown handles */
0062 static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
0063 static DEFINE_SPINLOCK(crash_handlers_lock);
0064
0065 static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
0066 static int crash_shutdown_cpu = -1;
0067
0068 static int handle_fault(struct pt_regs *regs)
0069 {
0070     if (crash_shutdown_cpu == smp_processor_id())
0071         longjmp(crash_shutdown_buf, 1);
0072     return 0;
0073 }
0074
0075 #ifdef CONFIG_SMP
0076
0077 static atomic_t cpus_in_crash;
0078 void crash_ipi_callback(struct pt_regs *regs)
0079 {
0080     static cpumask_t cpus_state_saved = CPU_MASK_NONE;
0081
0082     int cpu = smp_processor_id();
0083
0084     hard_irq_disable();
0085     if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
0086         crash_save_cpu(regs, cpu);
0087         cpumask_set_cpu(cpu, &cpus_state_saved);
0088     }
0089
0090     atomic_inc(&cpus_in_crash);
0091     smp_mb__after_atomic();
0092
0093     /*
0094      * Starting the kdump boot.
0095      * This barrier is needed to make sure that all CPUs are stopped.
0096      */
0097     while (!time_to_dump)
0098         cpu_relax();
0099
0100     if (ppc_md.kexec_cpu_down)
0101         ppc_md.kexec_cpu_down(1, 1);
0102
0103 #ifdef CONFIG_PPC64
0104     kexec_smp_wait();
0105 #else
0106     for (;;);   /* FIXME */
0107 #endif
0108
0109     /* NOTREACHED */
0110 }
0111
0112 static void crash_kexec_prepare_cpus(void)
0113 {
0114     unsigned int msecs;
0115     volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
0116     volatile int tries = 0;
0117     int (*old_handler)(struct pt_regs *regs);
0118
0119     printk(KERN_EMERG "Sending IPI to other CPUs\n");
0120
0121     if (crash_wake_offline)
0122         ncpus = num_present_cpus() - 1;
0123
0124     /*
0125      * If we came in via system reset, secondaries enter via crash_kexec_secondary().
0126      * So, wait a while for the secondary CPUs to enter for that case.
0127      * Else, send IPI to all other CPUs.
0128      */
0129     if (is_via_system_reset)
0130         mdelay(PRIMARY_TIMEOUT);
0131     else
0132         crash_send_ipi(crash_ipi_callback);
0133     smp_wmb();
0134
0135 again:
0136     /*
0137      * FIXME: Until we will have the way to stop other CPUs reliably,
0138      * the crash CPU will send an IPI and wait for other CPUs to
0139      * respond.
0140      */
0141     msecs = IPI_TIMEOUT;
0142     while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
0143         mdelay(1);
0144
0145     /* Would it be better to replace the trap vector here? */
0146
0147     if (atomic_read(&cpus_in_crash) >= ncpus) {
0148         printk(KERN_EMERG "IPI complete\n");
0149         return;
0150     }
0151
0152     printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
0153         ncpus - atomic_read(&cpus_in_crash));
0154
0155     /*
0156      * If we have a panic timeout set then we can't wait indefinitely
0157      * for someone to activate system reset. We also give up on the
0158      * second time through if system reset fail to work.
0159      */
0160     if ((panic_timeout > 0) || (tries > 0))
0161         return;
0162
0163     /*
0164      * A system reset will cause all CPUs to take an 0x100 exception.
0165      * The primary CPU returns here via setjmp, and the secondary
0166      * CPUs reexecute the crash_kexec_secondary path.
0167      */
0168     old_handler = __debugger;
0169     __debugger = handle_fault;
0170     crash_shutdown_cpu = smp_processor_id();
0171
0172     if (setjmp(crash_shutdown_buf) == 0) {
0173         printk(KERN_EMERG "Activate system reset (dumprestart) "
0174                   "to stop other cpu(s)\n");
0175
0176         /*
0177          * A system reset will force all CPUs to execute the
0178          * crash code again. We need to reset cpus_in_crash so we
0179          * wait for everyone to do this.
0180          */
0181         atomic_set(&cpus_in_crash, 0);
0182         smp_mb();
0183
0184         while (atomic_read(&cpus_in_crash) < ncpus)
0185             cpu_relax();
0186     }
0187
0188     crash_shutdown_cpu = -1;
0189     __debugger = old_handler;
0190
0191     tries++;
0192     goto again;
0193 }
0194
0195 /*
0196  * This function will be called by secondary cpus.
0197  */
0198 void crash_kexec_secondary(struct pt_regs *regs)
0199 {
0200     unsigned long flags;
0201     int msecs = SECONDARY_TIMEOUT;
0202
0203     local_irq_save(flags);
0204
0205     /* Wait for the primary crash CPU to signal its progress */
0206     while (crashing_cpu < 0) {
0207         if (--msecs < 0) {
0208             /* No response, kdump image may not have been loaded */
0209             local_irq_restore(flags);
0210             return;
0211         }
0212
0213         mdelay(1);
0214     }
0215
0216     crash_ipi_callback(regs);
0217 }
0218
0219 #else   /* ! CONFIG_SMP */
0220
0221 static void crash_kexec_prepare_cpus(void)
0222 {
0223     /*
0224      * move the secondaries to us so that we can copy
0225      * the new kernel 0-0x100 safely
0226      *
0227      * do this if kexec in setup.c ?
0228      */
0229 #ifdef CONFIG_PPC64
0230     smp_release_cpus();
0231 #else
0232     /* FIXME */
0233 #endif
0234 }
0235
0236 void crash_kexec_secondary(struct pt_regs *regs)
0237 {
0238 }
0239 #endif  /* CONFIG_SMP */
0240
0241 /* wait for all the CPUs to hit real mode but timeout if they don't come in */
0242 #if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
0243 noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu)
0244 {
0245     unsigned int msecs;
0246     int i;
0247
0248     msecs = REAL_MODE_TIMEOUT;
0249     for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
0250         if (i == cpu)
0251             continue;
0252
0253         while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
0254             barrier();
0255             if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
0256                 break;
0257             msecs--;
0258             mdelay(1);
0259         }
0260     }
0261     mb();
0262 }
0263 #else
0264 static inline void crash_kexec_wait_realmode(int cpu) {}
0265 #endif  /* CONFIG_SMP && CONFIG_PPC64 */
0266
0267 void crash_kexec_prepare(void)
0268 {
0269     /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */
0270     printk_deferred_enter();
0271
0272     /*
0273      * This function is only called after the system
0274      * has panicked or is otherwise in a critical state.
0275      * The minimum amount of code to allow a kexec'd kernel
0276      * to run successfully needs to happen here.
0277      *
0278      * In practice this means stopping other cpus in
0279      * an SMP system.
0280      * The kernel is broken so disable interrupts.
0281      */
0282     hard_irq_disable();
0283
0284     /*
0285      * Make a note of crashing cpu. Will be used in machine_kexec
0286      * such that another IPI will not be sent.
0287      */
0288     crashing_cpu = smp_processor_id();
0289
0290     crash_kexec_prepare_cpus();
0291 }
0292
0293 /*
0294  * Register a function to be called on shutdown.  Only use this if you
0295  * can't reset your device in the second kernel.
0296  */
0297 int crash_shutdown_register(crash_shutdown_t handler)
0298 {
0299     unsigned int i, rc;
0300
0301     spin_lock(&crash_handlers_lock);
0302     for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
0303         if (!crash_shutdown_handles[i]) {
0304             /* Insert handle at first empty entry */
0305             crash_shutdown_handles[i] = handler;
0306             rc = 0;
0307             break;
0308         }
0309
0310     if (i == CRASH_HANDLER_MAX) {
0311         printk(KERN_ERR "Crash shutdown handles full, "
0312                "not registered.\n");
0313         rc = 1;
0314     }
0315
0316     spin_unlock(&crash_handlers_lock);
0317     return rc;
0318 }
0319 EXPORT_SYMBOL(crash_shutdown_register);
0320
0321 int crash_shutdown_unregister(crash_shutdown_t handler)
0322 {
0323     unsigned int i, rc;
0324
0325     spin_lock(&crash_handlers_lock);
0326     for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
0327         if (crash_shutdown_handles[i] == handler)
0328             break;
0329
0330     if (i == CRASH_HANDLER_MAX) {
0331         printk(KERN_ERR "Crash shutdown handle not found\n");
0332         rc = 1;
0333     } else {
0334         /* Shift handles down */
0335         for (; i < (CRASH_HANDLER_MAX - 1); i++)
0336             crash_shutdown_handles[i] =
0337                 crash_shutdown_handles[i+1];
0338         /*
0339          * Reset last entry to NULL now that it has been shifted down,
0340          * this will allow new handles to be added here.
0341          */
0342         crash_shutdown_handles[i] = NULL;
0343         rc = 0;
0344     }
0345
0346     spin_unlock(&crash_handlers_lock);
0347     return rc;
0348 }
0349 EXPORT_SYMBOL(crash_shutdown_unregister);
0350
0351 void default_machine_crash_shutdown(struct pt_regs *regs)
0352 {
0353     unsigned int i;
0354     int (*old_handler)(struct pt_regs *regs);
0355
0356     if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
0357         is_via_system_reset = 1;
0358
0359     crash_smp_send_stop();
0360
0361     crash_save_cpu(regs, crashing_cpu);
0362
0363     time_to_dump = 1;
0364
0365     crash_kexec_wait_realmode(crashing_cpu);
0366
0367     machine_kexec_mask_interrupts();
0368
0369     /*
0370      * Call registered shutdown routines safely.  Swap out
0371      * __debugger_fault_handler, and replace on exit.
0372      */
0373     old_handler = __debugger_fault_handler;
0374     __debugger_fault_handler = handle_fault;
0375     crash_shutdown_cpu = smp_processor_id();
0376     for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
0377         if (setjmp(crash_shutdown_buf) == 0) {
0378             /*
0379              * Insert syncs and delay to ensure
0380              * instructions in the dangerous region don't
0381              * leak away from this protected region.
0382              */
0383             asm volatile("sync; isync");
0384             /* dangerous region */
0385             crash_shutdown_handles[i]();
0386             asm volatile("sync; isync");
0387         }
0388     }
0389     crash_shutdown_cpu = -1;
0390     __debugger_fault_handler = old_handler;
0391
0392     if (ppc_md.kexec_cpu_down)
0393         ppc_md.kexec_cpu_down(1, 0);
0394 }