Back to home page

LXR

 
 

    


0001 /*
0002  * Detect hard lockups on a system
0003  *
0004  * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
0005  *
0006  * Note: Most of this code is borrowed heavily from the original softlockup
0007  * detector, so thanks to Ingo for the initial implementation.
0008  * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
0009  * to those contributors as well.
0010  */
0011 
0012 #define pr_fmt(fmt) "NMI watchdog: " fmt
0013 
0014 #include <linux/nmi.h>
0015 #include <linux/module.h>
0016 #include <asm/irq_regs.h>
0017 #include <linux/perf_event.h>
0018 
0019 static DEFINE_PER_CPU(bool, hard_watchdog_warn);
0020 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
0021 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
0022 
0023 /* boot commands */
0024 /*
0025  * Should we panic when a soft-lockup or hard-lockup occurs:
0026  */
0027 unsigned int __read_mostly hardlockup_panic =
0028             CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
0029 static unsigned long hardlockup_allcpu_dumped;
0030 /*
0031  * We may not want to enable hard lockup detection by default in all cases,
0032  * for example when running the kernel as a guest on a hypervisor. In these
0033  * cases this function can be called to disable hard lockup detection. This
0034  * function should only be executed once by the boot processor before the
0035  * kernel command line parameters are parsed, because otherwise it is not
0036  * possible to override this in hardlockup_panic_setup().
0037  */
0038 void hardlockup_detector_disable(void)
0039 {
0040     watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
0041 }
0042 
0043 static int __init hardlockup_panic_setup(char *str)
0044 {
0045     if (!strncmp(str, "panic", 5))
0046         hardlockup_panic = 1;
0047     else if (!strncmp(str, "nopanic", 7))
0048         hardlockup_panic = 0;
0049     else if (!strncmp(str, "0", 1))
0050         watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
0051     else if (!strncmp(str, "1", 1))
0052         watchdog_enabled |= NMI_WATCHDOG_ENABLED;
0053     return 1;
0054 }
0055 __setup("nmi_watchdog=", hardlockup_panic_setup);
0056 
0057 void touch_nmi_watchdog(void)
0058 {
0059     /*
0060      * Using __raw here because some code paths have
0061      * preemption enabled.  If preemption is enabled
0062      * then interrupts should be enabled too, in which
0063      * case we shouldn't have to worry about the watchdog
0064      * going off.
0065      */
0066     raw_cpu_write(watchdog_nmi_touch, true);
0067     touch_softlockup_watchdog();
0068 }
0069 EXPORT_SYMBOL(touch_nmi_watchdog);
0070 
0071 static struct perf_event_attr wd_hw_attr = {
0072     .type       = PERF_TYPE_HARDWARE,
0073     .config     = PERF_COUNT_HW_CPU_CYCLES,
0074     .size       = sizeof(struct perf_event_attr),
0075     .pinned     = 1,
0076     .disabled   = 1,
0077 };
0078 
0079 /* Callback function for perf event subsystem */
0080 static void watchdog_overflow_callback(struct perf_event *event,
0081          struct perf_sample_data *data,
0082          struct pt_regs *regs)
0083 {
0084     /* Ensure the watchdog never gets throttled */
0085     event->hw.interrupts = 0;
0086 
0087     if (atomic_read(&watchdog_park_in_progress) != 0)
0088         return;
0089 
0090     if (__this_cpu_read(watchdog_nmi_touch) == true) {
0091         __this_cpu_write(watchdog_nmi_touch, false);
0092         return;
0093     }
0094 
0095     /* check for a hardlockup
0096      * This is done by making sure our timer interrupt
0097      * is incrementing.  The timer interrupt should have
0098      * fired multiple times before we overflow'd.  If it hasn't
0099      * then this is a good indication the cpu is stuck
0100      */
0101     if (is_hardlockup()) {
0102         int this_cpu = smp_processor_id();
0103 
0104         /* only print hardlockups once */
0105         if (__this_cpu_read(hard_watchdog_warn) == true)
0106             return;
0107 
0108         pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
0109         print_modules();
0110         print_irqtrace_events(current);
0111         if (regs)
0112             show_regs(regs);
0113         else
0114             dump_stack();
0115 
0116         /*
0117          * Perform all-CPU dump only once to avoid multiple hardlockups
0118          * generating interleaving traces
0119          */
0120         if (sysctl_hardlockup_all_cpu_backtrace &&
0121                 !test_and_set_bit(0, &hardlockup_allcpu_dumped))
0122             trigger_allbutself_cpu_backtrace();
0123 
0124         if (hardlockup_panic)
0125             nmi_panic(regs, "Hard LOCKUP");
0126 
0127         __this_cpu_write(hard_watchdog_warn, true);
0128         return;
0129     }
0130 
0131     __this_cpu_write(hard_watchdog_warn, false);
0132     return;
0133 }
0134 
0135 /*
0136  * People like the simple clean cpu node info on boot.
0137  * Reduce the watchdog noise by only printing messages
0138  * that are different from what cpu0 displayed.
0139  */
0140 static unsigned long cpu0_err;
0141 
0142 int watchdog_nmi_enable(unsigned int cpu)
0143 {
0144     struct perf_event_attr *wd_attr;
0145     struct perf_event *event = per_cpu(watchdog_ev, cpu);
0146 
0147     /* nothing to do if the hard lockup detector is disabled */
0148     if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
0149         goto out;
0150 
0151     /* is it already setup and enabled? */
0152     if (event && event->state > PERF_EVENT_STATE_OFF)
0153         goto out;
0154 
0155     /* it is setup but not enabled */
0156     if (event != NULL)
0157         goto out_enable;
0158 
0159     wd_attr = &wd_hw_attr;
0160     wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
0161 
0162     /* Try to register using hardware perf events */
0163     event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
0164 
0165     /* save cpu0 error for future comparision */
0166     if (cpu == 0 && IS_ERR(event))
0167         cpu0_err = PTR_ERR(event);
0168 
0169     if (!IS_ERR(event)) {
0170         /* only print for cpu0 or different than cpu0 */
0171         if (cpu == 0 || cpu0_err)
0172             pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
0173         goto out_save;
0174     }
0175 
0176     /*
0177      * Disable the hard lockup detector if _any_ CPU fails to set up
0178      * set up the hardware perf event. The watchdog() function checks
0179      * the NMI_WATCHDOG_ENABLED bit periodically.
0180      *
0181      * The barriers are for syncing up watchdog_enabled across all the
0182      * cpus, as clear_bit() does not use barriers.
0183      */
0184     smp_mb__before_atomic();
0185     clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
0186     smp_mb__after_atomic();
0187 
0188     /* skip displaying the same error again */
0189     if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
0190         return PTR_ERR(event);
0191 
0192     /* vary the KERN level based on the returned errno */
0193     if (PTR_ERR(event) == -EOPNOTSUPP)
0194         pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
0195     else if (PTR_ERR(event) == -ENOENT)
0196         pr_warn("disabled (cpu%i): hardware events not enabled\n",
0197              cpu);
0198     else
0199         pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
0200             cpu, PTR_ERR(event));
0201 
0202     pr_info("Shutting down hard lockup detector on all cpus\n");
0203 
0204     return PTR_ERR(event);
0205 
0206     /* success path */
0207 out_save:
0208     per_cpu(watchdog_ev, cpu) = event;
0209 out_enable:
0210     perf_event_enable(per_cpu(watchdog_ev, cpu));
0211 out:
0212     return 0;
0213 }
0214 
0215 void watchdog_nmi_disable(unsigned int cpu)
0216 {
0217     struct perf_event *event = per_cpu(watchdog_ev, cpu);
0218 
0219     if (event) {
0220         perf_event_disable(event);
0221         per_cpu(watchdog_ev, cpu) = NULL;
0222 
0223         /* should be in cleanup, but blocks oprofile */
0224         perf_event_release_kernel(event);
0225     }
0226     if (cpu == 0) {
0227         /* watchdog_nmi_enable() expects this to be zero initially. */
0228         cpu0_err = 0;
0229     }
0230 }