Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /* Pseudo NMI support on sparc64 systems.
0003  *
0004  * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
0005  *
0006  * The NMI watchdog support and infrastructure is based almost
0007  * entirely upon the x86 NMI support code.
0008  */
0009 #include <linux/kernel.h>
0010 #include <linux/param.h>
0011 #include <linux/init.h>
0012 #include <linux/percpu.h>
0013 #include <linux/nmi.h>
0014 #include <linux/export.h>
0015 #include <linux/kprobes.h>
0016 #include <linux/kernel_stat.h>
0017 #include <linux/reboot.h>
0018 #include <linux/slab.h>
0019 #include <linux/kdebug.h>
0020 #include <linux/delay.h>
0021 #include <linux/smp.h>
0022 
0023 #include <asm/perf_event.h>
0024 #include <asm/ptrace.h>
0025 #include <asm/pcr.h>
0026 
0027 #include "kstack.h"
0028 
0029 /* We don't have a real NMI on sparc64, but we can fake one
0030  * up using profiling counter overflow interrupts and interrupt
0031  * levels.
0032  *
0033  * The profile overflow interrupts at level 15, so we use
0034  * level 14 as our IRQ off level.
0035  */
0036 
0037 static int panic_on_timeout;
0038 
0039 /* nmi_active:
0040  * >0: the NMI watchdog is active, but can be disabled
0041  * <0: the NMI watchdog has not been set up, and cannot be enabled
0042  *  0: the NMI watchdog is disabled, but can be enabled
0043  */
0044 atomic_t nmi_active = ATOMIC_INIT(0);       /* oprofile uses this */
0045 EXPORT_SYMBOL(nmi_active);
0046 static int nmi_init_done;
0047 static unsigned int nmi_hz = HZ;
0048 static DEFINE_PER_CPU(short, wd_enabled);
0049 static int endflag __initdata;
0050 
0051 static DEFINE_PER_CPU(unsigned int, last_irq_sum);
0052 static DEFINE_PER_CPU(long, alert_counter);
0053 static DEFINE_PER_CPU(int, nmi_touch);
0054 
0055 void arch_touch_nmi_watchdog(void)
0056 {
0057     if (atomic_read(&nmi_active)) {
0058         int cpu;
0059 
0060         for_each_present_cpu(cpu) {
0061             if (per_cpu(nmi_touch, cpu) != 1)
0062                 per_cpu(nmi_touch, cpu) = 1;
0063         }
0064     }
0065 }
0066 EXPORT_SYMBOL(arch_touch_nmi_watchdog);
0067 
0068 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
0069 {
0070     int this_cpu = smp_processor_id();
0071 
0072     if (notify_die(DIE_NMIWATCHDOG, str, regs, 0,
0073                pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
0074         return;
0075 
0076     if (do_panic || panic_on_oops)
0077         panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
0078     else
0079         WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
0080 }
0081 
0082 notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
0083 {
0084     unsigned int sum, touched = 0;
0085     void *orig_sp;
0086 
0087     clear_softint(1 << irq);
0088 
0089     local_cpu_data().__nmi_count++;
0090 
0091     nmi_enter();
0092 
0093     orig_sp = set_hardirq_stack();
0094 
0095     if (notify_die(DIE_NMI, "nmi", regs, 0,
0096                pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
0097         touched = 1;
0098     else
0099         pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
0100 
0101     sum = local_cpu_data().irq0_irqs;
0102     if (__this_cpu_read(nmi_touch)) {
0103         __this_cpu_write(nmi_touch, 0);
0104         touched = 1;
0105     }
0106     if (!touched && __this_cpu_read(last_irq_sum) == sum) {
0107         __this_cpu_inc(alert_counter);
0108         if (__this_cpu_read(alert_counter) == 30 * nmi_hz)
0109             die_nmi("BUG: NMI Watchdog detected LOCKUP",
0110                 regs, panic_on_timeout);
0111     } else {
0112         __this_cpu_write(last_irq_sum, sum);
0113         __this_cpu_write(alert_counter, 0);
0114     }
0115     if (__this_cpu_read(wd_enabled)) {
0116         pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
0117         pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
0118     }
0119 
0120     restore_hardirq_stack(orig_sp);
0121 
0122     nmi_exit();
0123 }
0124 
0125 static inline unsigned int get_nmi_count(int cpu)
0126 {
0127     return cpu_data(cpu).__nmi_count;
0128 }
0129 
0130 static __init void nmi_cpu_busy(void *data)
0131 {
0132     while (endflag == 0)
0133         mb();
0134 }
0135 
0136 static void report_broken_nmi(int cpu, int *prev_nmi_count)
0137 {
0138     printk(KERN_CONT "\n");
0139 
0140     printk(KERN_WARNING
0141         "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
0142             cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
0143 
0144     printk(KERN_WARNING
0145         "Please report this to bugzilla.kernel.org,\n");
0146     printk(KERN_WARNING
0147         "and attach the output of the 'dmesg' command.\n");
0148 
0149     per_cpu(wd_enabled, cpu) = 0;
0150     atomic_dec(&nmi_active);
0151 }
0152 
0153 void stop_nmi_watchdog(void *unused)
0154 {
0155     if (!__this_cpu_read(wd_enabled))
0156         return;
0157     pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
0158     __this_cpu_write(wd_enabled, 0);
0159     atomic_dec(&nmi_active);
0160 }
0161 
0162 static int __init check_nmi_watchdog(void)
0163 {
0164     unsigned int *prev_nmi_count;
0165     int cpu, err;
0166 
0167     if (!atomic_read(&nmi_active))
0168         return 0;
0169 
0170     prev_nmi_count = kmalloc_array(nr_cpu_ids, sizeof(unsigned int),
0171                        GFP_KERNEL);
0172     if (!prev_nmi_count) {
0173         err = -ENOMEM;
0174         goto error;
0175     }
0176 
0177     printk(KERN_INFO "Testing NMI watchdog ... ");
0178 
0179     smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
0180 
0181     for_each_possible_cpu(cpu)
0182         prev_nmi_count[cpu] = get_nmi_count(cpu);
0183     local_irq_enable();
0184     mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
0185 
0186     for_each_online_cpu(cpu) {
0187         if (!per_cpu(wd_enabled, cpu))
0188             continue;
0189         if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
0190             report_broken_nmi(cpu, prev_nmi_count);
0191     }
0192     endflag = 1;
0193     if (!atomic_read(&nmi_active)) {
0194         kfree(prev_nmi_count);
0195         atomic_set(&nmi_active, -1);
0196         err = -ENODEV;
0197         goto error;
0198     }
0199     printk("OK.\n");
0200 
0201     nmi_hz = 1;
0202 
0203     kfree(prev_nmi_count);
0204     return 0;
0205 error:
0206     on_each_cpu(stop_nmi_watchdog, NULL, 1);
0207     return err;
0208 }
0209 
0210 void start_nmi_watchdog(void *unused)
0211 {
0212     if (__this_cpu_read(wd_enabled))
0213         return;
0214 
0215     __this_cpu_write(wd_enabled, 1);
0216     atomic_inc(&nmi_active);
0217 
0218     pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
0219     pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
0220 
0221     pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
0222 }
0223 
0224 static void nmi_adjust_hz_one(void *unused)
0225 {
0226     if (!__this_cpu_read(wd_enabled))
0227         return;
0228 
0229     pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
0230     pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
0231 
0232     pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
0233 }
0234 
0235 void nmi_adjust_hz(unsigned int new_hz)
0236 {
0237     nmi_hz = new_hz;
0238     on_each_cpu(nmi_adjust_hz_one, NULL, 1);
0239 }
0240 EXPORT_SYMBOL_GPL(nmi_adjust_hz);
0241 
0242 static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p)
0243 {
0244     on_each_cpu(stop_nmi_watchdog, NULL, 1);
0245     return 0;
0246 }
0247 
0248 static struct notifier_block nmi_reboot_notifier = {
0249     .notifier_call = nmi_shutdown,
0250 };
0251 
0252 int __init nmi_init(void)
0253 {
0254     int err;
0255 
0256     on_each_cpu(start_nmi_watchdog, NULL, 1);
0257 
0258     err = check_nmi_watchdog();
0259     if (!err) {
0260         err = register_reboot_notifier(&nmi_reboot_notifier);
0261         if (err) {
0262             on_each_cpu(stop_nmi_watchdog, NULL, 1);
0263             atomic_set(&nmi_active, -1);
0264         }
0265     }
0266 
0267     nmi_init_done = 1;
0268 
0269     return err;
0270 }
0271 
0272 static int __init setup_nmi_watchdog(char *str)
0273 {
0274     if (!strncmp(str, "panic", 5))
0275         panic_on_timeout = 1;
0276 
0277     return 0;
0278 }
0279 __setup("nmi_watchdog=", setup_nmi_watchdog);
0280 
0281 /*
0282  * sparc specific NMI watchdog enable function.
0283  * Enables watchdog if it is not enabled already.
0284  */
0285 int watchdog_nmi_enable(unsigned int cpu)
0286 {
0287     if (atomic_read(&nmi_active) == -1) {
0288         pr_warn("NMI watchdog cannot be enabled or disabled\n");
0289         return -1;
0290     }
0291 
0292     /*
0293      * watchdog thread could start even before nmi_init is called.
0294      * Just Return in that case. Let nmi_init finish the init
0295      * process first.
0296      */
0297     if (!nmi_init_done)
0298         return 0;
0299 
0300     smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
0301 
0302     return 0;
0303 }
0304 /*
0305  * sparc specific NMI watchdog disable function.
0306  * Disables watchdog if it is not disabled already.
0307  */
0308 void watchdog_nmi_disable(unsigned int cpu)
0309 {
0310     if (atomic_read(&nmi_active) == -1)
0311         pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
0312     else
0313         smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1);
0314 }