Back to home page

LXR

 
 

    


0001 /*
0002  * Detect hard and soft lockups on a system
0003  *
0004  * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
0005  *
0006  * Note: Most of this code is borrowed heavily from the original softlockup
0007  * detector, so thanks to Ingo for the initial implementation.
0008  * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
0009  * to those contributors as well.
0010  */
0011 
0012 #define pr_fmt(fmt) "NMI watchdog: " fmt
0013 
0014 #include <linux/mm.h>
0015 #include <linux/cpu.h>
0016 #include <linux/nmi.h>
0017 #include <linux/init.h>
0018 #include <linux/module.h>
0019 #include <linux/sysctl.h>
0020 #include <linux/smpboot.h>
0021 #include <linux/sched/rt.h>
0022 #include <linux/tick.h>
0023 #include <linux/workqueue.h>
0024 
0025 #include <asm/irq_regs.h>
0026 #include <linux/kvm_para.h>
0027 #include <linux/kthread.h>
0028 
0029 static DEFINE_MUTEX(watchdog_proc_mutex);
0030 
0031 #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
0032 unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
0033 #else
0034 unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
0035 #endif
0036 int __read_mostly nmi_watchdog_enabled;
0037 int __read_mostly soft_watchdog_enabled;
0038 int __read_mostly watchdog_user_enabled;
0039 int __read_mostly watchdog_thresh = 10;
0040 
0041 #ifdef CONFIG_SMP
0042 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
0043 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
0044 #endif
0045 static struct cpumask watchdog_cpumask __read_mostly;
0046 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
0047 
0048 /* Helper for online, unparked cpus. */
0049 #define for_each_watchdog_cpu(cpu) \
0050     for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
0051 
0052 atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
0053 
0054 /*
0055  * The 'watchdog_running' variable is set to 1 when the watchdog threads
0056  * are registered/started and is set to 0 when the watchdog threads are
0057  * unregistered/stopped, so it is an indicator whether the threads exist.
0058  */
0059 static int __read_mostly watchdog_running;
0060 /*
0061  * If a subsystem has a need to deactivate the watchdog temporarily, it
0062  * can use the suspend/resume interface to achieve this. The content of
0063  * the 'watchdog_suspended' variable reflects this state. Existing threads
0064  * are parked/unparked by the lockup_detector_{suspend|resume} functions
0065  * (see comment blocks pertaining to those functions for further details).
0066  *
0067  * 'watchdog_suspended' also prevents threads from being registered/started
0068  * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
0069  * of 'watchdog_running' cannot change while the watchdog is deactivated
0070  * temporarily (see related code in 'proc' handlers).
0071  */
0072 static int __read_mostly watchdog_suspended;
0073 
0074 static u64 __read_mostly sample_period;
0075 
0076 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
0077 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
0078 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
0079 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
0080 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
0081 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
0082 static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
0083 static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
0084 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
0085 static unsigned long soft_lockup_nmi_warn;
0086 
0087 unsigned int __read_mostly softlockup_panic =
0088             CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
0089 
0090 static int __init softlockup_panic_setup(char *str)
0091 {
0092     softlockup_panic = simple_strtoul(str, NULL, 0);
0093 
0094     return 1;
0095 }
0096 __setup("softlockup_panic=", softlockup_panic_setup);
0097 
0098 static int __init nowatchdog_setup(char *str)
0099 {
0100     watchdog_enabled = 0;
0101     return 1;
0102 }
0103 __setup("nowatchdog", nowatchdog_setup);
0104 
0105 static int __init nosoftlockup_setup(char *str)
0106 {
0107     watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;
0108     return 1;
0109 }
0110 __setup("nosoftlockup", nosoftlockup_setup);
0111 
0112 #ifdef CONFIG_SMP
0113 static int __init softlockup_all_cpu_backtrace_setup(char *str)
0114 {
0115     sysctl_softlockup_all_cpu_backtrace =
0116         !!simple_strtol(str, NULL, 0);
0117     return 1;
0118 }
0119 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
0120 static int __init hardlockup_all_cpu_backtrace_setup(char *str)
0121 {
0122     sysctl_hardlockup_all_cpu_backtrace =
0123         !!simple_strtol(str, NULL, 0);
0124     return 1;
0125 }
0126 __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
0127 #endif
0128 
0129 /*
0130  * Hard-lockup warnings should be triggered after just a few seconds. Soft-
0131  * lockups can have false positives under extreme conditions. So we generally
0132  * want a higher threshold for soft lockups than for hard lockups. So we couple
0133  * the thresholds with a factor: we make the soft threshold twice the amount of
0134  * time the hard threshold is.
0135  */
0136 static int get_softlockup_thresh(void)
0137 {
0138     return watchdog_thresh * 2;
0139 }
0140 
0141 /*
0142  * Returns seconds, approximately.  We don't need nanosecond
0143  * resolution, and we don't need to waste time with a big divide when
0144  * 2^30ns == 1.074s.
0145  */
0146 static unsigned long get_timestamp(void)
0147 {
0148     return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
0149 }
0150 
0151 static void set_sample_period(void)
0152 {
0153     /*
0154      * convert watchdog_thresh from seconds to ns
0155      * the divide by 5 is to give hrtimer several chances (two
0156      * or three with the current relation between the soft
0157      * and hard thresholds) to increment before the
0158      * hardlockup detector generates a warning
0159      */
0160     sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
0161 }
0162 
0163 /* Commands for resetting the watchdog */
0164 static void __touch_watchdog(void)
0165 {
0166     __this_cpu_write(watchdog_touch_ts, get_timestamp());
0167 }
0168 
0169 /**
0170  * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
0171  *
0172  * Call when the scheduler may have stalled for legitimate reasons
0173  * preventing the watchdog task from executing - e.g. the scheduler
0174  * entering idle state.  This should only be used for scheduler events.
0175  * Use touch_softlockup_watchdog() for everything else.
0176  */
0177 void touch_softlockup_watchdog_sched(void)
0178 {
0179     /*
0180      * Preemption can be enabled.  It doesn't matter which CPU's timestamp
0181      * gets zeroed here, so use the raw_ operation.
0182      */
0183     raw_cpu_write(watchdog_touch_ts, 0);
0184 }
0185 
0186 void touch_softlockup_watchdog(void)
0187 {
0188     touch_softlockup_watchdog_sched();
0189     wq_watchdog_touch(raw_smp_processor_id());
0190 }
0191 EXPORT_SYMBOL(touch_softlockup_watchdog);
0192 
0193 void touch_all_softlockup_watchdogs(void)
0194 {
0195     int cpu;
0196 
0197     /*
0198      * this is done lockless
0199      * do we care if a 0 races with a timestamp?
0200      * all it means is the softlock check starts one cycle later
0201      */
0202     for_each_watchdog_cpu(cpu)
0203         per_cpu(watchdog_touch_ts, cpu) = 0;
0204     wq_watchdog_touch(-1);
0205 }
0206 
0207 void touch_softlockup_watchdog_sync(void)
0208 {
0209     __this_cpu_write(softlockup_touch_sync, true);
0210     __this_cpu_write(watchdog_touch_ts, 0);
0211 }
0212 
0213 /* watchdog detector functions */
0214 bool is_hardlockup(void)
0215 {
0216     unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
0217 
0218     if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
0219         return true;
0220 
0221     __this_cpu_write(hrtimer_interrupts_saved, hrint);
0222     return false;
0223 }
0224 
0225 static int is_softlockup(unsigned long touch_ts)
0226 {
0227     unsigned long now = get_timestamp();
0228 
0229     if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
0230         /* Warn about unreasonable delays. */
0231         if (time_after(now, touch_ts + get_softlockup_thresh()))
0232             return now - touch_ts;
0233     }
0234     return 0;
0235 }
0236 
0237 static void watchdog_interrupt_count(void)
0238 {
0239     __this_cpu_inc(hrtimer_interrupts);
0240 }
0241 
0242 /*
0243  * These two functions are mostly architecture specific
0244  * defining them as weak here.
0245  */
0246 int __weak watchdog_nmi_enable(unsigned int cpu)
0247 {
0248     return 0;
0249 }
0250 void __weak watchdog_nmi_disable(unsigned int cpu)
0251 {
0252 }
0253 
0254 static int watchdog_enable_all_cpus(void);
0255 static void watchdog_disable_all_cpus(void);
0256 
0257 /* watchdog kicker functions */
0258 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
0259 {
0260     unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
0261     struct pt_regs *regs = get_irq_regs();
0262     int duration;
0263     int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
0264 
0265     if (atomic_read(&watchdog_park_in_progress) != 0)
0266         return HRTIMER_NORESTART;
0267 
0268     /* kick the hardlockup detector */
0269     watchdog_interrupt_count();
0270 
0271     /* kick the softlockup detector */
0272     wake_up_process(__this_cpu_read(softlockup_watchdog));
0273 
0274     /* .. and repeat */
0275     hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
0276 
0277     if (touch_ts == 0) {
0278         if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
0279             /*
0280              * If the time stamp was touched atomically
0281              * make sure the scheduler tick is up to date.
0282              */
0283             __this_cpu_write(softlockup_touch_sync, false);
0284             sched_clock_tick();
0285         }
0286 
0287         /* Clear the guest paused flag on watchdog reset */
0288         kvm_check_and_clear_guest_paused();
0289         __touch_watchdog();
0290         return HRTIMER_RESTART;
0291     }
0292 
0293     /* check for a softlockup
0294      * This is done by making sure a high priority task is
0295      * being scheduled.  The task touches the watchdog to
0296      * indicate it is getting cpu time.  If it hasn't then
0297      * this is a good indication some task is hogging the cpu
0298      */
0299     duration = is_softlockup(touch_ts);
0300     if (unlikely(duration)) {
0301         /*
0302          * If a virtual machine is stopped by the host it can look to
0303          * the watchdog like a soft lockup, check to see if the host
0304          * stopped the vm before we issue the warning
0305          */
0306         if (kvm_check_and_clear_guest_paused())
0307             return HRTIMER_RESTART;
0308 
0309         /* only warn once */
0310         if (__this_cpu_read(soft_watchdog_warn) == true) {
0311             /*
0312              * When multiple processes are causing softlockups the
0313              * softlockup detector only warns on the first one
0314              * because the code relies on a full quiet cycle to
0315              * re-arm.  The second process prevents the quiet cycle
0316              * and never gets reported.  Use task pointers to detect
0317              * this.
0318              */
0319             if (__this_cpu_read(softlockup_task_ptr_saved) !=
0320                 current) {
0321                 __this_cpu_write(soft_watchdog_warn, false);
0322                 __touch_watchdog();
0323             }
0324             return HRTIMER_RESTART;
0325         }
0326 
0327         if (softlockup_all_cpu_backtrace) {
0328             /* Prevent multiple soft-lockup reports if one cpu is already
0329              * engaged in dumping cpu back traces
0330              */
0331             if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
0332                 /* Someone else will report us. Let's give up */
0333                 __this_cpu_write(soft_watchdog_warn, true);
0334                 return HRTIMER_RESTART;
0335             }
0336         }
0337 
0338         pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
0339             smp_processor_id(), duration,
0340             current->comm, task_pid_nr(current));
0341         __this_cpu_write(softlockup_task_ptr_saved, current);
0342         print_modules();
0343         print_irqtrace_events(current);
0344         if (regs)
0345             show_regs(regs);
0346         else
0347             dump_stack();
0348 
0349         if (softlockup_all_cpu_backtrace) {
0350             /* Avoid generating two back traces for current
0351              * given that one is already made above
0352              */
0353             trigger_allbutself_cpu_backtrace();
0354 
0355             clear_bit(0, &soft_lockup_nmi_warn);
0356             /* Barrier to sync with other cpus */
0357             smp_mb__after_atomic();
0358         }
0359 
0360         add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
0361         if (softlockup_panic)
0362             panic("softlockup: hung tasks");
0363         __this_cpu_write(soft_watchdog_warn, true);
0364     } else
0365         __this_cpu_write(soft_watchdog_warn, false);
0366 
0367     return HRTIMER_RESTART;
0368 }
0369 
0370 static void watchdog_set_prio(unsigned int policy, unsigned int prio)
0371 {
0372     struct sched_param param = { .sched_priority = prio };
0373 
0374     sched_setscheduler(current, policy, &param);
0375 }
0376 
0377 static void watchdog_enable(unsigned int cpu)
0378 {
0379     struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
0380 
0381     /* kick off the timer for the hardlockup detector */
0382     hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
0383     hrtimer->function = watchdog_timer_fn;
0384 
0385     /* Enable the perf event */
0386     watchdog_nmi_enable(cpu);
0387 
0388     /* done here because hrtimer_start can only pin to smp_processor_id() */
0389     hrtimer_start(hrtimer, ns_to_ktime(sample_period),
0390               HRTIMER_MODE_REL_PINNED);
0391 
0392     /* initialize timestamp */
0393     watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
0394     __touch_watchdog();
0395 }
0396 
0397 static void watchdog_disable(unsigned int cpu)
0398 {
0399     struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
0400 
0401     watchdog_set_prio(SCHED_NORMAL, 0);
0402     hrtimer_cancel(hrtimer);
0403     /* disable the perf event */
0404     watchdog_nmi_disable(cpu);
0405 }
0406 
0407 static void watchdog_cleanup(unsigned int cpu, bool online)
0408 {
0409     watchdog_disable(cpu);
0410 }
0411 
0412 static int watchdog_should_run(unsigned int cpu)
0413 {
0414     return __this_cpu_read(hrtimer_interrupts) !=
0415         __this_cpu_read(soft_lockup_hrtimer_cnt);
0416 }
0417 
0418 /*
0419  * The watchdog thread function - touches the timestamp.
0420  *
0421  * It only runs once every sample_period seconds (4 seconds by
0422  * default) to reset the softlockup timestamp. If this gets delayed
0423  * for more than 2*watchdog_thresh seconds then the debug-printout
0424  * triggers in watchdog_timer_fn().
0425  */
0426 static void watchdog(unsigned int cpu)
0427 {
0428     __this_cpu_write(soft_lockup_hrtimer_cnt,
0429              __this_cpu_read(hrtimer_interrupts));
0430     __touch_watchdog();
0431 
0432     /*
0433      * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
0434      * failure path. Check for failures that can occur asynchronously -
0435      * for example, when CPUs are on-lined - and shut down the hardware
0436      * perf event on each CPU accordingly.
0437      *
0438      * The only non-obvious place this bit can be cleared is through
0439      * watchdog_nmi_enable(), so a pr_info() is placed there.  Placing a
0440      * pr_info here would be too noisy as it would result in a message
0441      * every few seconds if the hardlockup was disabled but the softlockup
0442      * enabled.
0443      */
0444     if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
0445         watchdog_nmi_disable(cpu);
0446 }
0447 
0448 static struct smp_hotplug_thread watchdog_threads = {
0449     .store          = &softlockup_watchdog,
0450     .thread_should_run  = watchdog_should_run,
0451     .thread_fn      = watchdog,
0452     .thread_comm        = "watchdog/%u",
0453     .setup          = watchdog_enable,
0454     .cleanup        = watchdog_cleanup,
0455     .park           = watchdog_disable,
0456     .unpark         = watchdog_enable,
0457 };
0458 
0459 /*
0460  * park all watchdog threads that are specified in 'watchdog_cpumask'
0461  *
0462  * This function returns an error if kthread_park() of a watchdog thread
0463  * fails. In this situation, the watchdog threads of some CPUs can already
0464  * be parked and the watchdog threads of other CPUs can still be runnable.
0465  * Callers are expected to handle this special condition as appropriate in
0466  * their context.
0467  *
0468  * This function may only be called in a context that is protected against
0469  * races with CPU hotplug - for example, via get_online_cpus().
0470  */
0471 static int watchdog_park_threads(void)
0472 {
0473     int cpu, ret = 0;
0474 
0475     atomic_set(&watchdog_park_in_progress, 1);
0476 
0477     for_each_watchdog_cpu(cpu) {
0478         ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
0479         if (ret)
0480             break;
0481     }
0482 
0483     atomic_set(&watchdog_park_in_progress, 0);
0484 
0485     return ret;
0486 }
0487 
0488 /*
0489  * unpark all watchdog threads that are specified in 'watchdog_cpumask'
0490  *
0491  * This function may only be called in a context that is protected against
0492  * races with CPU hotplug - for example, via get_online_cpus().
0493  */
0494 static void watchdog_unpark_threads(void)
0495 {
0496     int cpu;
0497 
0498     for_each_watchdog_cpu(cpu)
0499         kthread_unpark(per_cpu(softlockup_watchdog, cpu));
0500 }
0501 
0502 /*
0503  * Suspend the hard and soft lockup detector by parking the watchdog threads.
0504  */
0505 int lockup_detector_suspend(void)
0506 {
0507     int ret = 0;
0508 
0509     get_online_cpus();
0510     mutex_lock(&watchdog_proc_mutex);
0511     /*
0512      * Multiple suspend requests can be active in parallel (counted by
0513      * the 'watchdog_suspended' variable). If the watchdog threads are
0514      * running, the first caller takes care that they will be parked.
0515      * The state of 'watchdog_running' cannot change while a suspend
0516      * request is active (see related code in 'proc' handlers).
0517      */
0518     if (watchdog_running && !watchdog_suspended)
0519         ret = watchdog_park_threads();
0520 
0521     if (ret == 0)
0522         watchdog_suspended++;
0523     else {
0524         watchdog_disable_all_cpus();
0525         pr_err("Failed to suspend lockup detectors, disabled\n");
0526         watchdog_enabled = 0;
0527     }
0528 
0529     mutex_unlock(&watchdog_proc_mutex);
0530 
0531     return ret;
0532 }
0533 
0534 /*
0535  * Resume the hard and soft lockup detector by unparking the watchdog threads.
0536  */
0537 void lockup_detector_resume(void)
0538 {
0539     mutex_lock(&watchdog_proc_mutex);
0540 
0541     watchdog_suspended--;
0542     /*
0543      * The watchdog threads are unparked if they were previously running
0544      * and if there is no more active suspend request.
0545      */
0546     if (watchdog_running && !watchdog_suspended)
0547         watchdog_unpark_threads();
0548 
0549     mutex_unlock(&watchdog_proc_mutex);
0550     put_online_cpus();
0551 }
0552 
0553 static int update_watchdog_all_cpus(void)
0554 {
0555     int ret;
0556 
0557     ret = watchdog_park_threads();
0558     if (ret)
0559         return ret;
0560 
0561     watchdog_unpark_threads();
0562 
0563     return 0;
0564 }
0565 
0566 static int watchdog_enable_all_cpus(void)
0567 {
0568     int err = 0;
0569 
0570     if (!watchdog_running) {
0571         err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
0572                                  &watchdog_cpumask);
0573         if (err)
0574             pr_err("Failed to create watchdog threads, disabled\n");
0575         else
0576             watchdog_running = 1;
0577     } else {
0578         /*
0579          * Enable/disable the lockup detectors or
0580          * change the sample period 'on the fly'.
0581          */
0582         err = update_watchdog_all_cpus();
0583 
0584         if (err) {
0585             watchdog_disable_all_cpus();
0586             pr_err("Failed to update lockup detectors, disabled\n");
0587         }
0588     }
0589 
0590     if (err)
0591         watchdog_enabled = 0;
0592 
0593     return err;
0594 }
0595 
0596 static void watchdog_disable_all_cpus(void)
0597 {
0598     if (watchdog_running) {
0599         watchdog_running = 0;
0600         smpboot_unregister_percpu_thread(&watchdog_threads);
0601     }
0602 }
0603 
0604 #ifdef CONFIG_SYSCTL
0605 
0606 /*
0607  * Update the run state of the lockup detectors.
0608  */
0609 static int proc_watchdog_update(void)
0610 {
0611     int err = 0;
0612 
0613     /*
0614      * Watchdog threads won't be started if they are already active.
0615      * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
0616      * care of this. If those threads are already active, the sample
0617      * period will be updated and the lockup detectors will be enabled
0618      * or disabled 'on the fly'.
0619      */
0620     if (watchdog_enabled && watchdog_thresh)
0621         err = watchdog_enable_all_cpus();
0622     else
0623         watchdog_disable_all_cpus();
0624 
0625     return err;
0626 
0627 }
0628 
0629 /*
0630  * common function for watchdog, nmi_watchdog and soft_watchdog parameter
0631  *
0632  * caller             | table->data points to | 'which' contains the flag(s)
0633  * -------------------|-----------------------|-----------------------------
0634  * proc_watchdog      | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
0635  *                    |                       | with SOFT_WATCHDOG_ENABLED
0636  * -------------------|-----------------------|-----------------------------
0637  * proc_nmi_watchdog  | nmi_watchdog_enabled  | NMI_WATCHDOG_ENABLED
0638  * -------------------|-----------------------|-----------------------------
0639  * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
0640  */
0641 static int proc_watchdog_common(int which, struct ctl_table *table, int write,
0642                 void __user *buffer, size_t *lenp, loff_t *ppos)
0643 {
0644     int err, old, new;
0645     int *watchdog_param = (int *)table->data;
0646 
0647     get_online_cpus();
0648     mutex_lock(&watchdog_proc_mutex);
0649 
0650     if (watchdog_suspended) {
0651         /* no parameter changes allowed while watchdog is suspended */
0652         err = -EAGAIN;
0653         goto out;
0654     }
0655 
0656     /*
0657      * If the parameter is being read return the state of the corresponding
0658      * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
0659      * run state of the lockup detectors.
0660      */
0661     if (!write) {
0662         *watchdog_param = (watchdog_enabled & which) != 0;
0663         err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
0664     } else {
0665         err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
0666         if (err)
0667             goto out;
0668 
0669         /*
0670          * There is a race window between fetching the current value
0671          * from 'watchdog_enabled' and storing the new value. During
0672          * this race window, watchdog_nmi_enable() can sneak in and
0673          * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
0674          * The 'cmpxchg' detects this race and the loop retries.
0675          */
0676         do {
0677             old = watchdog_enabled;
0678             /*
0679              * If the parameter value is not zero set the
0680              * corresponding bit(s), else clear it(them).
0681              */
0682             if (*watchdog_param)
0683                 new = old | which;
0684             else
0685                 new = old & ~which;
0686         } while (cmpxchg(&watchdog_enabled, old, new) != old);
0687 
0688         /*
0689          * Update the run state of the lockup detectors. There is _no_
0690          * need to check the value returned by proc_watchdog_update()
0691          * and to restore the previous value of 'watchdog_enabled' as
0692          * both lockup detectors are disabled if proc_watchdog_update()
0693          * returns an error.
0694          */
0695         if (old == new)
0696             goto out;
0697 
0698         err = proc_watchdog_update();
0699     }
0700 out:
0701     mutex_unlock(&watchdog_proc_mutex);
0702     put_online_cpus();
0703     return err;
0704 }
0705 
0706 /*
0707  * /proc/sys/kernel/watchdog
0708  */
0709 int proc_watchdog(struct ctl_table *table, int write,
0710           void __user *buffer, size_t *lenp, loff_t *ppos)
0711 {
0712     return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
0713                     table, write, buffer, lenp, ppos);
0714 }
0715 
0716 /*
0717  * /proc/sys/kernel/nmi_watchdog
0718  */
0719 int proc_nmi_watchdog(struct ctl_table *table, int write,
0720               void __user *buffer, size_t *lenp, loff_t *ppos)
0721 {
0722     return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
0723                     table, write, buffer, lenp, ppos);
0724 }
0725 
0726 /*
0727  * /proc/sys/kernel/soft_watchdog
0728  */
0729 int proc_soft_watchdog(struct ctl_table *table, int write,
0730             void __user *buffer, size_t *lenp, loff_t *ppos)
0731 {
0732     return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
0733                     table, write, buffer, lenp, ppos);
0734 }
0735 
0736 /*
0737  * /proc/sys/kernel/watchdog_thresh
0738  */
0739 int proc_watchdog_thresh(struct ctl_table *table, int write,
0740              void __user *buffer, size_t *lenp, loff_t *ppos)
0741 {
0742     int err, old, new;
0743 
0744     get_online_cpus();
0745     mutex_lock(&watchdog_proc_mutex);
0746 
0747     if (watchdog_suspended) {
0748         /* no parameter changes allowed while watchdog is suspended */
0749         err = -EAGAIN;
0750         goto out;
0751     }
0752 
0753     old = ACCESS_ONCE(watchdog_thresh);
0754     err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
0755 
0756     if (err || !write)
0757         goto out;
0758 
0759     /*
0760      * Update the sample period. Restore on failure.
0761      */
0762     new = ACCESS_ONCE(watchdog_thresh);
0763     if (old == new)
0764         goto out;
0765 
0766     set_sample_period();
0767     err = proc_watchdog_update();
0768     if (err) {
0769         watchdog_thresh = old;
0770         set_sample_period();
0771     }
0772 out:
0773     mutex_unlock(&watchdog_proc_mutex);
0774     put_online_cpus();
0775     return err;
0776 }
0777 
0778 /*
0779  * The cpumask is the mask of possible cpus that the watchdog can run
0780  * on, not the mask of cpus it is actually running on.  This allows the
0781  * user to specify a mask that will include cpus that have not yet
0782  * been brought online, if desired.
0783  */
0784 int proc_watchdog_cpumask(struct ctl_table *table, int write,
0785               void __user *buffer, size_t *lenp, loff_t *ppos)
0786 {
0787     int err;
0788 
0789     get_online_cpus();
0790     mutex_lock(&watchdog_proc_mutex);
0791 
0792     if (watchdog_suspended) {
0793         /* no parameter changes allowed while watchdog is suspended */
0794         err = -EAGAIN;
0795         goto out;
0796     }
0797 
0798     err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
0799     if (!err && write) {
0800         /* Remove impossible cpus to keep sysctl output cleaner. */
0801         cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
0802                 cpu_possible_mask);
0803 
0804         if (watchdog_running) {
0805             /*
0806              * Failure would be due to being unable to allocate
0807              * a temporary cpumask, so we are likely not in a
0808              * position to do much else to make things better.
0809              */
0810             if (smpboot_update_cpumask_percpu_thread(
0811                     &watchdog_threads, &watchdog_cpumask) != 0)
0812                 pr_err("cpumask update failed\n");
0813         }
0814     }
0815 out:
0816     mutex_unlock(&watchdog_proc_mutex);
0817     put_online_cpus();
0818     return err;
0819 }
0820 
0821 #endif /* CONFIG_SYSCTL */
0822 
0823 void __init lockup_detector_init(void)
0824 {
0825     set_sample_period();
0826 
0827 #ifdef CONFIG_NO_HZ_FULL
0828     if (tick_nohz_full_enabled()) {
0829         pr_info("Disabling watchdog on nohz_full cores by default\n");
0830         cpumask_copy(&watchdog_cpumask, housekeeping_mask);
0831     } else
0832         cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
0833 #else
0834     cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
0835 #endif
0836 
0837     if (watchdog_enabled)
0838         watchdog_enable_all_cpus();
0839 }