Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * kernel/sched/debug.c
0004  *
0005  * Print the CFS rbtree and other debugging details
0006  *
0007  * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
0008  */
0009 
0010 /*
0011  * This allows printing both to /proc/sched_debug and
0012  * to the console
0013  */
0014 #define SEQ_printf(m, x...)         \
0015  do {                       \
0016     if (m)                  \
0017         seq_printf(m, x);       \
0018     else                    \
0019         pr_cont(x);         \
0020  } while (0)
0021 
0022 /*
0023  * Ease the printing of nsec fields:
0024  */
0025 static long long nsec_high(unsigned long long nsec)
0026 {
0027     if ((long long)nsec < 0) {
0028         nsec = -nsec;
0029         do_div(nsec, 1000000);
0030         return -nsec;
0031     }
0032     do_div(nsec, 1000000);
0033 
0034     return nsec;
0035 }
0036 
0037 static unsigned long nsec_low(unsigned long long nsec)
0038 {
0039     if ((long long)nsec < 0)
0040         nsec = -nsec;
0041 
0042     return do_div(nsec, 1000000);
0043 }
0044 
0045 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
0046 
0047 #define SCHED_FEAT(name, enabled)   \
0048     #name ,
0049 
0050 static const char * const sched_feat_names[] = {
0051 #include "features.h"
0052 };
0053 
0054 #undef SCHED_FEAT
0055 
0056 static int sched_feat_show(struct seq_file *m, void *v)
0057 {
0058     int i;
0059 
0060     for (i = 0; i < __SCHED_FEAT_NR; i++) {
0061         if (!(sysctl_sched_features & (1UL << i)))
0062             seq_puts(m, "NO_");
0063         seq_printf(m, "%s ", sched_feat_names[i]);
0064     }
0065     seq_puts(m, "\n");
0066 
0067     return 0;
0068 }
0069 
0070 #ifdef CONFIG_JUMP_LABEL
0071 
0072 #define jump_label_key__true  STATIC_KEY_INIT_TRUE
0073 #define jump_label_key__false STATIC_KEY_INIT_FALSE
0074 
0075 #define SCHED_FEAT(name, enabled)   \
0076     jump_label_key__##enabled ,
0077 
0078 struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
0079 #include "features.h"
0080 };
0081 
0082 #undef SCHED_FEAT
0083 
0084 static void sched_feat_disable(int i)
0085 {
0086     static_key_disable_cpuslocked(&sched_feat_keys[i]);
0087 }
0088 
0089 static void sched_feat_enable(int i)
0090 {
0091     static_key_enable_cpuslocked(&sched_feat_keys[i]);
0092 }
0093 #else
0094 static void sched_feat_disable(int i) { };
0095 static void sched_feat_enable(int i) { };
0096 #endif /* CONFIG_JUMP_LABEL */
0097 
0098 static int sched_feat_set(char *cmp)
0099 {
0100     int i;
0101     int neg = 0;
0102 
0103     if (strncmp(cmp, "NO_", 3) == 0) {
0104         neg = 1;
0105         cmp += 3;
0106     }
0107 
0108     i = match_string(sched_feat_names, __SCHED_FEAT_NR, cmp);
0109     if (i < 0)
0110         return i;
0111 
0112     if (neg) {
0113         sysctl_sched_features &= ~(1UL << i);
0114         sched_feat_disable(i);
0115     } else {
0116         sysctl_sched_features |= (1UL << i);
0117         sched_feat_enable(i);
0118     }
0119 
0120     return 0;
0121 }
0122 
0123 static ssize_t
0124 sched_feat_write(struct file *filp, const char __user *ubuf,
0125         size_t cnt, loff_t *ppos)
0126 {
0127     char buf[64];
0128     char *cmp;
0129     int ret;
0130     struct inode *inode;
0131 
0132     if (cnt > 63)
0133         cnt = 63;
0134 
0135     if (copy_from_user(&buf, ubuf, cnt))
0136         return -EFAULT;
0137 
0138     buf[cnt] = 0;
0139     cmp = strstrip(buf);
0140 
0141     /* Ensure the static_key remains in a consistent state */
0142     inode = file_inode(filp);
0143     cpus_read_lock();
0144     inode_lock(inode);
0145     ret = sched_feat_set(cmp);
0146     inode_unlock(inode);
0147     cpus_read_unlock();
0148     if (ret < 0)
0149         return ret;
0150 
0151     *ppos += cnt;
0152 
0153     return cnt;
0154 }
0155 
0156 static int sched_feat_open(struct inode *inode, struct file *filp)
0157 {
0158     return single_open(filp, sched_feat_show, NULL);
0159 }
0160 
0161 static const struct file_operations sched_feat_fops = {
0162     .open       = sched_feat_open,
0163     .write      = sched_feat_write,
0164     .read       = seq_read,
0165     .llseek     = seq_lseek,
0166     .release    = single_release,
0167 };
0168 
0169 #ifdef CONFIG_SMP
0170 
0171 static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
0172                    size_t cnt, loff_t *ppos)
0173 {
0174     char buf[16];
0175     unsigned int scaling;
0176 
0177     if (cnt > 15)
0178         cnt = 15;
0179 
0180     if (copy_from_user(&buf, ubuf, cnt))
0181         return -EFAULT;
0182     buf[cnt] = '\0';
0183 
0184     if (kstrtouint(buf, 10, &scaling))
0185         return -EINVAL;
0186 
0187     if (scaling >= SCHED_TUNABLESCALING_END)
0188         return -EINVAL;
0189 
0190     sysctl_sched_tunable_scaling = scaling;
0191     if (sched_update_scaling())
0192         return -EINVAL;
0193 
0194     *ppos += cnt;
0195     return cnt;
0196 }
0197 
0198 static int sched_scaling_show(struct seq_file *m, void *v)
0199 {
0200     seq_printf(m, "%d\n", sysctl_sched_tunable_scaling);
0201     return 0;
0202 }
0203 
0204 static int sched_scaling_open(struct inode *inode, struct file *filp)
0205 {
0206     return single_open(filp, sched_scaling_show, NULL);
0207 }
0208 
0209 static const struct file_operations sched_scaling_fops = {
0210     .open       = sched_scaling_open,
0211     .write      = sched_scaling_write,
0212     .read       = seq_read,
0213     .llseek     = seq_lseek,
0214     .release    = single_release,
0215 };
0216 
0217 #endif /* SMP */
0218 
0219 #ifdef CONFIG_PREEMPT_DYNAMIC
0220 
0221 static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf,
0222                    size_t cnt, loff_t *ppos)
0223 {
0224     char buf[16];
0225     int mode;
0226 
0227     if (cnt > 15)
0228         cnt = 15;
0229 
0230     if (copy_from_user(&buf, ubuf, cnt))
0231         return -EFAULT;
0232 
0233     buf[cnt] = 0;
0234     mode = sched_dynamic_mode(strstrip(buf));
0235     if (mode < 0)
0236         return mode;
0237 
0238     sched_dynamic_update(mode);
0239 
0240     *ppos += cnt;
0241 
0242     return cnt;
0243 }
0244 
0245 static int sched_dynamic_show(struct seq_file *m, void *v)
0246 {
0247     static const char * preempt_modes[] = {
0248         "none", "voluntary", "full"
0249     };
0250     int i;
0251 
0252     for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) {
0253         if (preempt_dynamic_mode == i)
0254             seq_puts(m, "(");
0255         seq_puts(m, preempt_modes[i]);
0256         if (preempt_dynamic_mode == i)
0257             seq_puts(m, ")");
0258 
0259         seq_puts(m, " ");
0260     }
0261 
0262     seq_puts(m, "\n");
0263     return 0;
0264 }
0265 
0266 static int sched_dynamic_open(struct inode *inode, struct file *filp)
0267 {
0268     return single_open(filp, sched_dynamic_show, NULL);
0269 }
0270 
0271 static const struct file_operations sched_dynamic_fops = {
0272     .open       = sched_dynamic_open,
0273     .write      = sched_dynamic_write,
0274     .read       = seq_read,
0275     .llseek     = seq_lseek,
0276     .release    = single_release,
0277 };
0278 
0279 #endif /* CONFIG_PREEMPT_DYNAMIC */
0280 
0281 __read_mostly bool sched_debug_verbose;
0282 
0283 static const struct seq_operations sched_debug_sops;
0284 
0285 static int sched_debug_open(struct inode *inode, struct file *filp)
0286 {
0287     return seq_open(filp, &sched_debug_sops);
0288 }
0289 
0290 static const struct file_operations sched_debug_fops = {
0291     .open       = sched_debug_open,
0292     .read       = seq_read,
0293     .llseek     = seq_lseek,
0294     .release    = seq_release,
0295 };
0296 
0297 static struct dentry *debugfs_sched;
0298 
0299 static __init int sched_init_debug(void)
0300 {
0301     struct dentry __maybe_unused *numa;
0302 
0303     debugfs_sched = debugfs_create_dir("sched", NULL);
0304 
0305     debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops);
0306     debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose);
0307 #ifdef CONFIG_PREEMPT_DYNAMIC
0308     debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
0309 #endif
0310 
0311     debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency);
0312     debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity);
0313     debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity);
0314     debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity);
0315 
0316     debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
0317     debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
0318 
0319 #ifdef CONFIG_SMP
0320     debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
0321     debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
0322     debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
0323 
0324     mutex_lock(&sched_domains_mutex);
0325     update_sched_domain_debugfs();
0326     mutex_unlock(&sched_domains_mutex);
0327 #endif
0328 
0329 #ifdef CONFIG_NUMA_BALANCING
0330     numa = debugfs_create_dir("numa_balancing", debugfs_sched);
0331 
0332     debugfs_create_u32("scan_delay_ms", 0644, numa, &sysctl_numa_balancing_scan_delay);
0333     debugfs_create_u32("scan_period_min_ms", 0644, numa, &sysctl_numa_balancing_scan_period_min);
0334     debugfs_create_u32("scan_period_max_ms", 0644, numa, &sysctl_numa_balancing_scan_period_max);
0335     debugfs_create_u32("scan_size_mb", 0644, numa, &sysctl_numa_balancing_scan_size);
0336 #endif
0337 
0338     debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
0339 
0340     return 0;
0341 }
0342 late_initcall(sched_init_debug);
0343 
0344 #ifdef CONFIG_SMP
0345 
0346 static cpumask_var_t        sd_sysctl_cpus;
0347 static struct dentry        *sd_dentry;
0348 
0349 static int sd_flags_show(struct seq_file *m, void *v)
0350 {
0351     unsigned long flags = *(unsigned int *)m->private;
0352     int idx;
0353 
0354     for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
0355         seq_puts(m, sd_flag_debug[idx].name);
0356         seq_puts(m, " ");
0357     }
0358     seq_puts(m, "\n");
0359 
0360     return 0;
0361 }
0362 
0363 static int sd_flags_open(struct inode *inode, struct file *file)
0364 {
0365     return single_open(file, sd_flags_show, inode->i_private);
0366 }
0367 
0368 static const struct file_operations sd_flags_fops = {
0369     .open       = sd_flags_open,
0370     .read       = seq_read,
0371     .llseek     = seq_lseek,
0372     .release    = single_release,
0373 };
0374 
0375 static void register_sd(struct sched_domain *sd, struct dentry *parent)
0376 {
0377 #define SDM(type, mode, member) \
0378     debugfs_create_##type(#member, mode, parent, &sd->member)
0379 
0380     SDM(ulong, 0644, min_interval);
0381     SDM(ulong, 0644, max_interval);
0382     SDM(u64,   0644, max_newidle_lb_cost);
0383     SDM(u32,   0644, busy_factor);
0384     SDM(u32,   0644, imbalance_pct);
0385     SDM(u32,   0644, cache_nice_tries);
0386     SDM(str,   0444, name);
0387 
0388 #undef SDM
0389 
0390     debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
0391 }
0392 
0393 void update_sched_domain_debugfs(void)
0394 {
0395     int cpu, i;
0396 
0397     /*
0398      * This can unfortunately be invoked before sched_debug_init() creates
0399      * the debug directory. Don't touch sd_sysctl_cpus until then.
0400      */
0401     if (!debugfs_sched)
0402         return;
0403 
0404     if (!cpumask_available(sd_sysctl_cpus)) {
0405         if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
0406             return;
0407         cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
0408     }
0409 
0410     if (!sd_dentry)
0411         sd_dentry = debugfs_create_dir("domains", debugfs_sched);
0412 
0413     for_each_cpu(cpu, sd_sysctl_cpus) {
0414         struct sched_domain *sd;
0415         struct dentry *d_cpu;
0416         char buf[32];
0417 
0418         snprintf(buf, sizeof(buf), "cpu%d", cpu);
0419         debugfs_lookup_and_remove(buf, sd_dentry);
0420         d_cpu = debugfs_create_dir(buf, sd_dentry);
0421 
0422         i = 0;
0423         for_each_domain(cpu, sd) {
0424             struct dentry *d_sd;
0425 
0426             snprintf(buf, sizeof(buf), "domain%d", i);
0427             d_sd = debugfs_create_dir(buf, d_cpu);
0428 
0429             register_sd(sd, d_sd);
0430             i++;
0431         }
0432 
0433         __cpumask_clear_cpu(cpu, sd_sysctl_cpus);
0434     }
0435 }
0436 
0437 void dirty_sched_domain_sysctl(int cpu)
0438 {
0439     if (cpumask_available(sd_sysctl_cpus))
0440         __cpumask_set_cpu(cpu, sd_sysctl_cpus);
0441 }
0442 
0443 #endif /* CONFIG_SMP */
0444 
0445 #ifdef CONFIG_FAIR_GROUP_SCHED
0446 static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
0447 {
0448     struct sched_entity *se = tg->se[cpu];
0449 
0450 #define P(F)        SEQ_printf(m, "  .%-30s: %lld\n",   #F, (long long)F)
0451 #define P_SCHEDSTAT(F)  SEQ_printf(m, "  .%-30s: %lld\n",   \
0452         #F, (long long)schedstat_val(stats->F))
0453 #define PN(F)       SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
0454 #define PN_SCHEDSTAT(F) SEQ_printf(m, "  .%-30s: %lld.%06ld\n", \
0455         #F, SPLIT_NS((long long)schedstat_val(stats->F)))
0456 
0457     if (!se)
0458         return;
0459 
0460     PN(se->exec_start);
0461     PN(se->vruntime);
0462     PN(se->sum_exec_runtime);
0463 
0464     if (schedstat_enabled()) {
0465         struct sched_statistics *stats;
0466         stats = __schedstats_from_se(se);
0467 
0468         PN_SCHEDSTAT(wait_start);
0469         PN_SCHEDSTAT(sleep_start);
0470         PN_SCHEDSTAT(block_start);
0471         PN_SCHEDSTAT(sleep_max);
0472         PN_SCHEDSTAT(block_max);
0473         PN_SCHEDSTAT(exec_max);
0474         PN_SCHEDSTAT(slice_max);
0475         PN_SCHEDSTAT(wait_max);
0476         PN_SCHEDSTAT(wait_sum);
0477         P_SCHEDSTAT(wait_count);
0478     }
0479 
0480     P(se->load.weight);
0481 #ifdef CONFIG_SMP
0482     P(se->avg.load_avg);
0483     P(se->avg.util_avg);
0484     P(se->avg.runnable_avg);
0485 #endif
0486 
0487 #undef PN_SCHEDSTAT
0488 #undef PN
0489 #undef P_SCHEDSTAT
0490 #undef P
0491 }
0492 #endif
0493 
0494 #ifdef CONFIG_CGROUP_SCHED
0495 static DEFINE_SPINLOCK(sched_debug_lock);
0496 static char group_path[PATH_MAX];
0497 
0498 static void task_group_path(struct task_group *tg, char *path, int plen)
0499 {
0500     if (autogroup_path(tg, path, plen))
0501         return;
0502 
0503     cgroup_path(tg->css.cgroup, path, plen);
0504 }
0505 
0506 /*
0507  * Only 1 SEQ_printf_task_group_path() caller can use the full length
0508  * group_path[] for cgroup path. Other simultaneous callers will have
0509  * to use a shorter stack buffer. A "..." suffix is appended at the end
0510  * of the stack buffer so that it will show up in case the output length
0511  * matches the given buffer size to indicate possible path name truncation.
0512  */
0513 #define SEQ_printf_task_group_path(m, tg, fmt...)           \
0514 {                                   \
0515     if (spin_trylock(&sched_debug_lock)) {              \
0516         task_group_path(tg, group_path, sizeof(group_path));    \
0517         SEQ_printf(m, fmt, group_path);             \
0518         spin_unlock(&sched_debug_lock);             \
0519     } else {                            \
0520         char buf[128];                      \
0521         char *bufend = buf + sizeof(buf) - 3;           \
0522         task_group_path(tg, buf, bufend - buf);         \
0523         strcpy(bufend - 1, "...");              \
0524         SEQ_printf(m, fmt, buf);                \
0525     }                               \
0526 }
0527 #endif
0528 
0529 static void
0530 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
0531 {
0532     if (task_current(rq, p))
0533         SEQ_printf(m, ">R");
0534     else
0535         SEQ_printf(m, " %c", task_state_to_char(p));
0536 
0537     SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ",
0538         p->comm, task_pid_nr(p),
0539         SPLIT_NS(p->se.vruntime),
0540         (long long)(p->nvcsw + p->nivcsw),
0541         p->prio);
0542 
0543     SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld %9lld.%06ld",
0544         SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)),
0545         SPLIT_NS(p->se.sum_exec_runtime),
0546         SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
0547         SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
0548 
0549 #ifdef CONFIG_NUMA_BALANCING
0550     SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
0551 #endif
0552 #ifdef CONFIG_CGROUP_SCHED
0553     SEQ_printf_task_group_path(m, task_group(p), " %s")
0554 #endif
0555 
0556     SEQ_printf(m, "\n");
0557 }
0558 
0559 static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
0560 {
0561     struct task_struct *g, *p;
0562 
0563     SEQ_printf(m, "\n");
0564     SEQ_printf(m, "runnable tasks:\n");
0565     SEQ_printf(m, " S            task   PID         tree-key  switches  prio"
0566            "     wait-time             sum-exec        sum-sleep\n");
0567     SEQ_printf(m, "-------------------------------------------------------"
0568            "------------------------------------------------------\n");
0569 
0570     rcu_read_lock();
0571     for_each_process_thread(g, p) {
0572         if (task_cpu(p) != rq_cpu)
0573             continue;
0574 
0575         print_task(m, rq, p);
0576     }
0577     rcu_read_unlock();
0578 }
0579 
0580 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
0581 {
0582     s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
0583         spread, rq0_min_vruntime, spread0;
0584     struct rq *rq = cpu_rq(cpu);
0585     struct sched_entity *last;
0586     unsigned long flags;
0587 
0588 #ifdef CONFIG_FAIR_GROUP_SCHED
0589     SEQ_printf(m, "\n");
0590     SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu);
0591 #else
0592     SEQ_printf(m, "\n");
0593     SEQ_printf(m, "cfs_rq[%d]:\n", cpu);
0594 #endif
0595     SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
0596             SPLIT_NS(cfs_rq->exec_clock));
0597 
0598     raw_spin_rq_lock_irqsave(rq, flags);
0599     if (rb_first_cached(&cfs_rq->tasks_timeline))
0600         MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
0601     last = __pick_last_entity(cfs_rq);
0602     if (last)
0603         max_vruntime = last->vruntime;
0604     min_vruntime = cfs_rq->min_vruntime;
0605     rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
0606     raw_spin_rq_unlock_irqrestore(rq, flags);
0607     SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
0608             SPLIT_NS(MIN_vruntime));
0609     SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
0610             SPLIT_NS(min_vruntime));
0611     SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
0612             SPLIT_NS(max_vruntime));
0613     spread = max_vruntime - MIN_vruntime;
0614     SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
0615             SPLIT_NS(spread));
0616     spread0 = min_vruntime - rq0_min_vruntime;
0617     SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
0618             SPLIT_NS(spread0));
0619     SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
0620             cfs_rq->nr_spread_over);
0621     SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
0622     SEQ_printf(m, "  .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
0623     SEQ_printf(m, "  .%-30s: %d\n", "idle_nr_running",
0624             cfs_rq->idle_nr_running);
0625     SEQ_printf(m, "  .%-30s: %d\n", "idle_h_nr_running",
0626             cfs_rq->idle_h_nr_running);
0627     SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
0628 #ifdef CONFIG_SMP
0629     SEQ_printf(m, "  .%-30s: %lu\n", "load_avg",
0630             cfs_rq->avg.load_avg);
0631     SEQ_printf(m, "  .%-30s: %lu\n", "runnable_avg",
0632             cfs_rq->avg.runnable_avg);
0633     SEQ_printf(m, "  .%-30s: %lu\n", "util_avg",
0634             cfs_rq->avg.util_avg);
0635     SEQ_printf(m, "  .%-30s: %u\n", "util_est_enqueued",
0636             cfs_rq->avg.util_est.enqueued);
0637     SEQ_printf(m, "  .%-30s: %ld\n", "removed.load_avg",
0638             cfs_rq->removed.load_avg);
0639     SEQ_printf(m, "  .%-30s: %ld\n", "removed.util_avg",
0640             cfs_rq->removed.util_avg);
0641     SEQ_printf(m, "  .%-30s: %ld\n", "removed.runnable_avg",
0642             cfs_rq->removed.runnable_avg);
0643 #ifdef CONFIG_FAIR_GROUP_SCHED
0644     SEQ_printf(m, "  .%-30s: %lu\n", "tg_load_avg_contrib",
0645             cfs_rq->tg_load_avg_contrib);
0646     SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_avg",
0647             atomic_long_read(&cfs_rq->tg->load_avg));
0648 #endif
0649 #endif
0650 #ifdef CONFIG_CFS_BANDWIDTH
0651     SEQ_printf(m, "  .%-30s: %d\n", "throttled",
0652             cfs_rq->throttled);
0653     SEQ_printf(m, "  .%-30s: %d\n", "throttle_count",
0654             cfs_rq->throttle_count);
0655 #endif
0656 
0657 #ifdef CONFIG_FAIR_GROUP_SCHED
0658     print_cfs_group_stats(m, cpu, cfs_rq->tg);
0659 #endif
0660 }
0661 
0662 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
0663 {
0664 #ifdef CONFIG_RT_GROUP_SCHED
0665     SEQ_printf(m, "\n");
0666     SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n", cpu);
0667 #else
0668     SEQ_printf(m, "\n");
0669     SEQ_printf(m, "rt_rq[%d]:\n", cpu);
0670 #endif
0671 
0672 #define P(x) \
0673     SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
0674 #define PU(x) \
0675     SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
0676 #define PN(x) \
0677     SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
0678 
0679     PU(rt_nr_running);
0680 #ifdef CONFIG_SMP
0681     PU(rt_nr_migratory);
0682 #endif
0683     P(rt_throttled);
0684     PN(rt_time);
0685     PN(rt_runtime);
0686 
0687 #undef PN
0688 #undef PU
0689 #undef P
0690 }
0691 
0692 void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
0693 {
0694     struct dl_bw *dl_bw;
0695 
0696     SEQ_printf(m, "\n");
0697     SEQ_printf(m, "dl_rq[%d]:\n", cpu);
0698 
0699 #define PU(x) \
0700     SEQ_printf(m, "  .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
0701 
0702     PU(dl_nr_running);
0703 #ifdef CONFIG_SMP
0704     PU(dl_nr_migratory);
0705     dl_bw = &cpu_rq(cpu)->rd->dl_bw;
0706 #else
0707     dl_bw = &dl_rq->dl_bw;
0708 #endif
0709     SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
0710     SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
0711 
0712 #undef PU
0713 }
0714 
0715 static void print_cpu(struct seq_file *m, int cpu)
0716 {
0717     struct rq *rq = cpu_rq(cpu);
0718 
0719 #ifdef CONFIG_X86
0720     {
0721         unsigned int freq = cpu_khz ? : 1;
0722 
0723         SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
0724                cpu, freq / 1000, (freq % 1000));
0725     }
0726 #else
0727     SEQ_printf(m, "cpu#%d\n", cpu);
0728 #endif
0729 
0730 #define P(x)                                \
0731 do {                                    \
0732     if (sizeof(rq->x) == 4)                     \
0733         SEQ_printf(m, "  .%-30s: %ld\n", #x, (long)(rq->x));    \
0734     else                                \
0735         SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x));\
0736 } while (0)
0737 
0738 #define PN(x) \
0739     SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
0740 
0741     P(nr_running);
0742     P(nr_switches);
0743     P(nr_uninterruptible);
0744     PN(next_balance);
0745     SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
0746     PN(clock);
0747     PN(clock_task);
0748 #undef P
0749 #undef PN
0750 
0751 #ifdef CONFIG_SMP
0752 #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
0753     P64(avg_idle);
0754     P64(max_idle_balance_cost);
0755 #undef P64
0756 #endif
0757 
0758 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
0759     if (schedstat_enabled()) {
0760         P(yld_count);
0761         P(sched_count);
0762         P(sched_goidle);
0763         P(ttwu_count);
0764         P(ttwu_local);
0765     }
0766 #undef P
0767 
0768     print_cfs_stats(m, cpu);
0769     print_rt_stats(m, cpu);
0770     print_dl_stats(m, cpu);
0771 
0772     print_rq(m, rq, cpu);
0773     SEQ_printf(m, "\n");
0774 }
0775 
0776 static const char *sched_tunable_scaling_names[] = {
0777     "none",
0778     "logarithmic",
0779     "linear"
0780 };
0781 
0782 static void sched_debug_header(struct seq_file *m)
0783 {
0784     u64 ktime, sched_clk, cpu_clk;
0785     unsigned long flags;
0786 
0787     local_irq_save(flags);
0788     ktime = ktime_to_ns(ktime_get());
0789     sched_clk = sched_clock();
0790     cpu_clk = local_clock();
0791     local_irq_restore(flags);
0792 
0793     SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
0794         init_utsname()->release,
0795         (int)strcspn(init_utsname()->version, " "),
0796         init_utsname()->version);
0797 
0798 #define P(x) \
0799     SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
0800 #define PN(x) \
0801     SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
0802     PN(ktime);
0803     PN(sched_clk);
0804     PN(cpu_clk);
0805     P(jiffies);
0806 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
0807     P(sched_clock_stable());
0808 #endif
0809 #undef PN
0810 #undef P
0811 
0812     SEQ_printf(m, "\n");
0813     SEQ_printf(m, "sysctl_sched\n");
0814 
0815 #define P(x) \
0816     SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
0817 #define PN(x) \
0818     SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
0819     PN(sysctl_sched_latency);
0820     PN(sysctl_sched_min_granularity);
0821     PN(sysctl_sched_idle_min_granularity);
0822     PN(sysctl_sched_wakeup_granularity);
0823     P(sysctl_sched_child_runs_first);
0824     P(sysctl_sched_features);
0825 #undef PN
0826 #undef P
0827 
0828     SEQ_printf(m, "  .%-40s: %d (%s)\n",
0829         "sysctl_sched_tunable_scaling",
0830         sysctl_sched_tunable_scaling,
0831         sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
0832     SEQ_printf(m, "\n");
0833 }
0834 
0835 static int sched_debug_show(struct seq_file *m, void *v)
0836 {
0837     int cpu = (unsigned long)(v - 2);
0838 
0839     if (cpu != -1)
0840         print_cpu(m, cpu);
0841     else
0842         sched_debug_header(m);
0843 
0844     return 0;
0845 }
0846 
0847 void sysrq_sched_debug_show(void)
0848 {
0849     int cpu;
0850 
0851     sched_debug_header(NULL);
0852     for_each_online_cpu(cpu) {
0853         /*
0854          * Need to reset softlockup watchdogs on all CPUs, because
0855          * another CPU might be blocked waiting for us to process
0856          * an IPI or stop_machine.
0857          */
0858         touch_nmi_watchdog();
0859         touch_all_softlockup_watchdogs();
0860         print_cpu(NULL, cpu);
0861     }
0862 }
0863 
0864 /*
0865  * This iterator needs some explanation.
0866  * It returns 1 for the header position.
0867  * This means 2 is CPU 0.
0868  * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
0869  * to use cpumask_* to iterate over the CPUs.
0870  */
0871 static void *sched_debug_start(struct seq_file *file, loff_t *offset)
0872 {
0873     unsigned long n = *offset;
0874 
0875     if (n == 0)
0876         return (void *) 1;
0877 
0878     n--;
0879 
0880     if (n > 0)
0881         n = cpumask_next(n - 1, cpu_online_mask);
0882     else
0883         n = cpumask_first(cpu_online_mask);
0884 
0885     *offset = n + 1;
0886 
0887     if (n < nr_cpu_ids)
0888         return (void *)(unsigned long)(n + 2);
0889 
0890     return NULL;
0891 }
0892 
0893 static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
0894 {
0895     (*offset)++;
0896     return sched_debug_start(file, offset);
0897 }
0898 
0899 static void sched_debug_stop(struct seq_file *file, void *data)
0900 {
0901 }
0902 
0903 static const struct seq_operations sched_debug_sops = {
0904     .start      = sched_debug_start,
0905     .next       = sched_debug_next,
0906     .stop       = sched_debug_stop,
0907     .show       = sched_debug_show,
0908 };
0909 
0910 #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
0911 #define __P(F) __PS(#F, F)
0912 #define   P(F) __PS(#F, p->F)
0913 #define   PM(F, M) __PS(#F, p->F & (M))
0914 #define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
0915 #define __PN(F) __PSN(#F, F)
0916 #define   PN(F) __PSN(#F, p->F)
0917 
0918 
0919 #ifdef CONFIG_NUMA_BALANCING
0920 void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
0921         unsigned long tpf, unsigned long gsf, unsigned long gpf)
0922 {
0923     SEQ_printf(m, "numa_faults node=%d ", node);
0924     SEQ_printf(m, "task_private=%lu task_shared=%lu ", tpf, tsf);
0925     SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gpf, gsf);
0926 }
0927 #endif
0928 
0929 
0930 static void sched_show_numa(struct task_struct *p, struct seq_file *m)
0931 {
0932 #ifdef CONFIG_NUMA_BALANCING
0933     if (p->mm)
0934         P(mm->numa_scan_seq);
0935 
0936     P(numa_pages_migrated);
0937     P(numa_preferred_nid);
0938     P(total_numa_faults);
0939     SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
0940             task_node(p), task_numa_group_id(p));
0941     show_numa_stats(p, m);
0942 #endif
0943 }
0944 
0945 void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
0946                           struct seq_file *m)
0947 {
0948     unsigned long nr_switches;
0949 
0950     SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
0951                         get_nr_threads(p));
0952     SEQ_printf(m,
0953         "---------------------------------------------------------"
0954         "----------\n");
0955 
0956 #define P_SCHEDSTAT(F)  __PS(#F, schedstat_val(p->stats.F))
0957 #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F))
0958 
0959     PN(se.exec_start);
0960     PN(se.vruntime);
0961     PN(se.sum_exec_runtime);
0962 
0963     nr_switches = p->nvcsw + p->nivcsw;
0964 
0965     P(se.nr_migrations);
0966 
0967     if (schedstat_enabled()) {
0968         u64 avg_atom, avg_per_cpu;
0969 
0970         PN_SCHEDSTAT(sum_sleep_runtime);
0971         PN_SCHEDSTAT(sum_block_runtime);
0972         PN_SCHEDSTAT(wait_start);
0973         PN_SCHEDSTAT(sleep_start);
0974         PN_SCHEDSTAT(block_start);
0975         PN_SCHEDSTAT(sleep_max);
0976         PN_SCHEDSTAT(block_max);
0977         PN_SCHEDSTAT(exec_max);
0978         PN_SCHEDSTAT(slice_max);
0979         PN_SCHEDSTAT(wait_max);
0980         PN_SCHEDSTAT(wait_sum);
0981         P_SCHEDSTAT(wait_count);
0982         PN_SCHEDSTAT(iowait_sum);
0983         P_SCHEDSTAT(iowait_count);
0984         P_SCHEDSTAT(nr_migrations_cold);
0985         P_SCHEDSTAT(nr_failed_migrations_affine);
0986         P_SCHEDSTAT(nr_failed_migrations_running);
0987         P_SCHEDSTAT(nr_failed_migrations_hot);
0988         P_SCHEDSTAT(nr_forced_migrations);
0989         P_SCHEDSTAT(nr_wakeups);
0990         P_SCHEDSTAT(nr_wakeups_sync);
0991         P_SCHEDSTAT(nr_wakeups_migrate);
0992         P_SCHEDSTAT(nr_wakeups_local);
0993         P_SCHEDSTAT(nr_wakeups_remote);
0994         P_SCHEDSTAT(nr_wakeups_affine);
0995         P_SCHEDSTAT(nr_wakeups_affine_attempts);
0996         P_SCHEDSTAT(nr_wakeups_passive);
0997         P_SCHEDSTAT(nr_wakeups_idle);
0998 
0999         avg_atom = p->se.sum_exec_runtime;
1000         if (nr_switches)
1001             avg_atom = div64_ul(avg_atom, nr_switches);
1002         else
1003             avg_atom = -1LL;
1004 
1005         avg_per_cpu = p->se.sum_exec_runtime;
1006         if (p->se.nr_migrations) {
1007             avg_per_cpu = div64_u64(avg_per_cpu,
1008                         p->se.nr_migrations);
1009         } else {
1010             avg_per_cpu = -1LL;
1011         }
1012 
1013         __PN(avg_atom);
1014         __PN(avg_per_cpu);
1015 
1016 #ifdef CONFIG_SCHED_CORE
1017         PN_SCHEDSTAT(core_forceidle_sum);
1018 #endif
1019     }
1020 
1021     __P(nr_switches);
1022     __PS("nr_voluntary_switches", p->nvcsw);
1023     __PS("nr_involuntary_switches", p->nivcsw);
1024 
1025     P(se.load.weight);
1026 #ifdef CONFIG_SMP
1027     P(se.avg.load_sum);
1028     P(se.avg.runnable_sum);
1029     P(se.avg.util_sum);
1030     P(se.avg.load_avg);
1031     P(se.avg.runnable_avg);
1032     P(se.avg.util_avg);
1033     P(se.avg.last_update_time);
1034     P(se.avg.util_est.ewma);
1035     PM(se.avg.util_est.enqueued, ~UTIL_AVG_UNCHANGED);
1036 #endif
1037 #ifdef CONFIG_UCLAMP_TASK
1038     __PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value);
1039     __PS("uclamp.max", p->uclamp_req[UCLAMP_MAX].value);
1040     __PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN));
1041     __PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX));
1042 #endif
1043     P(policy);
1044     P(prio);
1045     if (task_has_dl_policy(p)) {
1046         P(dl.runtime);
1047         P(dl.deadline);
1048     }
1049 #undef PN_SCHEDSTAT
1050 #undef P_SCHEDSTAT
1051 
1052     {
1053         unsigned int this_cpu = raw_smp_processor_id();
1054         u64 t0, t1;
1055 
1056         t0 = cpu_clock(this_cpu);
1057         t1 = cpu_clock(this_cpu);
1058         __PS("clock-delta", t1-t0);
1059     }
1060 
1061     sched_show_numa(p, m);
1062 }
1063 
1064 void proc_sched_set_task(struct task_struct *p)
1065 {
1066 #ifdef CONFIG_SCHEDSTATS
1067     memset(&p->stats, 0, sizeof(p->stats));
1068 #endif
1069 }
1070 
1071 void resched_latency_warn(int cpu, u64 latency)
1072 {
1073     static DEFINE_RATELIMIT_STATE(latency_check_ratelimit, 60 * 60 * HZ, 1);
1074 
1075     WARN(__ratelimit(&latency_check_ratelimit),
1076          "sched: CPU %d need_resched set for > %llu ns (%d ticks) "
1077          "without schedule\n",
1078          cpu, latency, cpu_rq(cpu)->ticks_without_resched);
1079 }