0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/mm.h>
0010 #include <linux/cpu.h>
0011 #include <linux/nmi.h>
0012 #include <linux/init.h>
0013 #include <linux/delay.h>
0014 #include <linux/freezer.h>
0015 #include <linux/kthread.h>
0016 #include <linux/lockdep.h>
0017 #include <linux/export.h>
0018 #include <linux/panic_notifier.h>
0019 #include <linux/sysctl.h>
0020 #include <linux/suspend.h>
0021 #include <linux/utsname.h>
0022 #include <linux/sched/signal.h>
0023 #include <linux/sched/debug.h>
0024 #include <linux/sched/sysctl.h>
0025
0026 #include <trace/events/sched.h>
0027
0028
0029
0030
0031 int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
0032
0033
0034
0035
0036
0037
0038
0039
0040 #define HUNG_TASK_LOCK_BREAK (HZ / 10)
0041
0042
0043
0044
0045 unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
0046
0047
0048
0049
0050 unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
0051
0052 int __read_mostly sysctl_hung_task_warnings = 10;
0053
0054 static int __read_mostly did_panic;
0055 static bool hung_task_show_lock;
0056 static bool hung_task_call_panic;
0057 static bool hung_task_show_all_bt;
0058
0059 static struct task_struct *watchdog_task;
0060
0061 #ifdef CONFIG_SMP
0062
0063
0064
0065
0066 static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
0067 #else
0068 #define sysctl_hung_task_all_cpu_backtrace 0
0069 #endif
0070
0071
0072
0073
0074
0075 unsigned int __read_mostly sysctl_hung_task_panic =
0076 IS_ENABLED(CONFIG_BOOTPARAM_HUNG_TASK_PANIC);
0077
0078 static int
0079 hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
0080 {
0081 did_panic = 1;
0082
0083 return NOTIFY_DONE;
0084 }
0085
0086 static struct notifier_block panic_block = {
0087 .notifier_call = hung_task_panic,
0088 };
0089
0090 static void check_hung_task(struct task_struct *t, unsigned long timeout)
0091 {
0092 unsigned long switch_count = t->nvcsw + t->nivcsw;
0093
0094
0095
0096
0097
0098 if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP)))
0099 return;
0100
0101
0102
0103
0104
0105
0106 if (unlikely(!switch_count))
0107 return;
0108
0109 if (switch_count != t->last_switch_count) {
0110 t->last_switch_count = switch_count;
0111 t->last_switch_time = jiffies;
0112 return;
0113 }
0114 if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
0115 return;
0116
0117 trace_sched_process_hang(t);
0118
0119 if (sysctl_hung_task_panic) {
0120 console_verbose();
0121 hung_task_show_lock = true;
0122 hung_task_call_panic = true;
0123 }
0124
0125
0126
0127
0128
0129 if (sysctl_hung_task_warnings) {
0130 if (sysctl_hung_task_warnings > 0)
0131 sysctl_hung_task_warnings--;
0132 pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
0133 t->comm, t->pid, (jiffies - t->last_switch_time) / HZ);
0134 pr_err(" %s %s %.*s\n",
0135 print_tainted(), init_utsname()->release,
0136 (int)strcspn(init_utsname()->version, " "),
0137 init_utsname()->version);
0138 pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
0139 " disables this message.\n");
0140 sched_show_task(t);
0141 hung_task_show_lock = true;
0142
0143 if (sysctl_hung_task_all_cpu_backtrace)
0144 hung_task_show_all_bt = true;
0145 }
0146
0147 touch_nmi_watchdog();
0148 }
0149
0150
0151
0152
0153
0154
0155
0156
0157 static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
0158 {
0159 bool can_cont;
0160
0161 get_task_struct(g);
0162 get_task_struct(t);
0163 rcu_read_unlock();
0164 cond_resched();
0165 rcu_read_lock();
0166 can_cont = pid_alive(g) && pid_alive(t);
0167 put_task_struct(t);
0168 put_task_struct(g);
0169
0170 return can_cont;
0171 }
0172
0173
0174
0175
0176
0177
0178 static void check_hung_uninterruptible_tasks(unsigned long timeout)
0179 {
0180 int max_count = sysctl_hung_task_check_count;
0181 unsigned long last_break = jiffies;
0182 struct task_struct *g, *t;
0183
0184
0185
0186
0187
0188 if (test_taint(TAINT_DIE) || did_panic)
0189 return;
0190
0191 hung_task_show_lock = false;
0192 rcu_read_lock();
0193 for_each_process_thread(g, t) {
0194 if (!max_count--)
0195 goto unlock;
0196 if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) {
0197 if (!rcu_lock_break(g, t))
0198 goto unlock;
0199 last_break = jiffies;
0200 }
0201
0202 if (READ_ONCE(t->__state) == TASK_UNINTERRUPTIBLE)
0203 check_hung_task(t, timeout);
0204 }
0205 unlock:
0206 rcu_read_unlock();
0207 if (hung_task_show_lock)
0208 debug_show_all_locks();
0209
0210 if (hung_task_show_all_bt) {
0211 hung_task_show_all_bt = false;
0212 trigger_all_cpu_backtrace();
0213 }
0214
0215 if (hung_task_call_panic)
0216 panic("hung_task: blocked tasks");
0217 }
0218
0219 static long hung_timeout_jiffies(unsigned long last_checked,
0220 unsigned long timeout)
0221 {
0222
0223 return timeout ? last_checked - jiffies + timeout * HZ :
0224 MAX_SCHEDULE_TIMEOUT;
0225 }
0226
0227 #ifdef CONFIG_SYSCTL
0228
0229
0230
0231 static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
0232 void *buffer,
0233 size_t *lenp, loff_t *ppos)
0234 {
0235 int ret;
0236
0237 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
0238
0239 if (ret || !write)
0240 goto out;
0241
0242 wake_up_process(watchdog_task);
0243
0244 out:
0245 return ret;
0246 }
0247
0248
0249
0250
0251
0252 static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
0253 static struct ctl_table hung_task_sysctls[] = {
0254 #ifdef CONFIG_SMP
0255 {
0256 .procname = "hung_task_all_cpu_backtrace",
0257 .data = &sysctl_hung_task_all_cpu_backtrace,
0258 .maxlen = sizeof(int),
0259 .mode = 0644,
0260 .proc_handler = proc_dointvec_minmax,
0261 .extra1 = SYSCTL_ZERO,
0262 .extra2 = SYSCTL_ONE,
0263 },
0264 #endif
0265 {
0266 .procname = "hung_task_panic",
0267 .data = &sysctl_hung_task_panic,
0268 .maxlen = sizeof(int),
0269 .mode = 0644,
0270 .proc_handler = proc_dointvec_minmax,
0271 .extra1 = SYSCTL_ZERO,
0272 .extra2 = SYSCTL_ONE,
0273 },
0274 {
0275 .procname = "hung_task_check_count",
0276 .data = &sysctl_hung_task_check_count,
0277 .maxlen = sizeof(int),
0278 .mode = 0644,
0279 .proc_handler = proc_dointvec_minmax,
0280 .extra1 = SYSCTL_ZERO,
0281 },
0282 {
0283 .procname = "hung_task_timeout_secs",
0284 .data = &sysctl_hung_task_timeout_secs,
0285 .maxlen = sizeof(unsigned long),
0286 .mode = 0644,
0287 .proc_handler = proc_dohung_task_timeout_secs,
0288 .extra2 = (void *)&hung_task_timeout_max,
0289 },
0290 {
0291 .procname = "hung_task_check_interval_secs",
0292 .data = &sysctl_hung_task_check_interval_secs,
0293 .maxlen = sizeof(unsigned long),
0294 .mode = 0644,
0295 .proc_handler = proc_dohung_task_timeout_secs,
0296 .extra2 = (void *)&hung_task_timeout_max,
0297 },
0298 {
0299 .procname = "hung_task_warnings",
0300 .data = &sysctl_hung_task_warnings,
0301 .maxlen = sizeof(int),
0302 .mode = 0644,
0303 .proc_handler = proc_dointvec_minmax,
0304 .extra1 = SYSCTL_NEG_ONE,
0305 },
0306 {}
0307 };
0308
0309 static void __init hung_task_sysctl_init(void)
0310 {
0311 register_sysctl_init("kernel", hung_task_sysctls);
0312 }
0313 #else
0314 #define hung_task_sysctl_init() do { } while (0)
0315 #endif
0316
0317
0318 static atomic_t reset_hung_task = ATOMIC_INIT(0);
0319
0320 void reset_hung_task_detector(void)
0321 {
0322 atomic_set(&reset_hung_task, 1);
0323 }
0324 EXPORT_SYMBOL_GPL(reset_hung_task_detector);
0325
0326 static bool hung_detector_suspended;
0327
0328 static int hungtask_pm_notify(struct notifier_block *self,
0329 unsigned long action, void *hcpu)
0330 {
0331 switch (action) {
0332 case PM_SUSPEND_PREPARE:
0333 case PM_HIBERNATION_PREPARE:
0334 case PM_RESTORE_PREPARE:
0335 hung_detector_suspended = true;
0336 break;
0337 case PM_POST_SUSPEND:
0338 case PM_POST_HIBERNATION:
0339 case PM_POST_RESTORE:
0340 hung_detector_suspended = false;
0341 break;
0342 default:
0343 break;
0344 }
0345 return NOTIFY_OK;
0346 }
0347
0348
0349
0350
0351 static int watchdog(void *dummy)
0352 {
0353 unsigned long hung_last_checked = jiffies;
0354
0355 set_user_nice(current, 0);
0356
0357 for ( ; ; ) {
0358 unsigned long timeout = sysctl_hung_task_timeout_secs;
0359 unsigned long interval = sysctl_hung_task_check_interval_secs;
0360 long t;
0361
0362 if (interval == 0)
0363 interval = timeout;
0364 interval = min_t(unsigned long, interval, timeout);
0365 t = hung_timeout_jiffies(hung_last_checked, interval);
0366 if (t <= 0) {
0367 if (!atomic_xchg(&reset_hung_task, 0) &&
0368 !hung_detector_suspended)
0369 check_hung_uninterruptible_tasks(timeout);
0370 hung_last_checked = jiffies;
0371 continue;
0372 }
0373 schedule_timeout_interruptible(t);
0374 }
0375
0376 return 0;
0377 }
0378
0379 static int __init hung_task_init(void)
0380 {
0381 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
0382
0383
0384 pm_notifier(hungtask_pm_notify, 0);
0385
0386 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
0387 hung_task_sysctl_init();
0388
0389 return 0;
0390 }
0391 subsys_initcall(hung_task_init);