Back to home page

LXR

 
 

    


0001 /*
0002  * latencytop.c: Latency display infrastructure
0003  *
0004  * (C) Copyright 2008 Intel Corporation
0005  * Author: Arjan van de Ven <arjan@linux.intel.com>
0006  *
0007  * This program is free software; you can redistribute it and/or
0008  * modify it under the terms of the GNU General Public License
0009  * as published by the Free Software Foundation; version 2
0010  * of the License.
0011  */
0012 
0013 /*
0014  * CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is
0015  * used by the "latencytop" userspace tool. The latency that is tracked is not
0016  * the 'traditional' interrupt latency (which is primarily caused by something
0017  * else consuming CPU), but instead, it is the latency an application encounters
0018  * because the kernel sleeps on its behalf for various reasons.
0019  *
0020  * This code tracks 2 levels of statistics:
0021  * 1) System level latency
0022  * 2) Per process latency
0023  *
0024  * The latency is stored in fixed sized data structures in an accumulated form;
0025  * if the "same" latency cause is hit twice, this will be tracked as one entry
0026  * in the data structure. Both the count, total accumulated latency and maximum
0027  * latency are tracked in this data structure. When the fixed size structure is
0028  * full, no new causes are tracked until the buffer is flushed by writing to
0029  * the /proc file; the userspace tool does this on a regular basis.
0030  *
0031  * A latency cause is identified by a stringified backtrace at the point that
0032  * the scheduler gets invoked. The userland tool will use this string to
0033  * identify the cause of the latency in human readable form.
0034  *
0035  * The information is exported via /proc/latency_stats and /proc/<pid>/latency.
0036  * These files look like this:
0037  *
0038  * Latency Top version : v0.1
0039  * 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl
0040  * |    |    |    |
0041  * |    |    |    +----> the stringified backtrace
0042  * |    |    +---------> The maximum latency for this entry in microseconds
0043  * |    +--------------> The accumulated latency for this entry (microseconds)
0044  * +-------------------> The number of times this entry is hit
0045  *
0046  * (note: the average latency is the accumulated latency divided by the number
0047  * of times)
0048  */
0049 
0050 #include <linux/kallsyms.h>
0051 #include <linux/seq_file.h>
0052 #include <linux/notifier.h>
0053 #include <linux/spinlock.h>
0054 #include <linux/proc_fs.h>
0055 #include <linux/latencytop.h>
0056 #include <linux/export.h>
0057 #include <linux/sched.h>
0058 #include <linux/list.h>
0059 #include <linux/stacktrace.h>
0060 
0061 static DEFINE_RAW_SPINLOCK(latency_lock);
0062 
0063 #define MAXLR 128
0064 static struct latency_record latency_record[MAXLR];
0065 
0066 int latencytop_enabled;
0067 
0068 void clear_all_latency_tracing(struct task_struct *p)
0069 {
0070     unsigned long flags;
0071 
0072     if (!latencytop_enabled)
0073         return;
0074 
0075     raw_spin_lock_irqsave(&latency_lock, flags);
0076     memset(&p->latency_record, 0, sizeof(p->latency_record));
0077     p->latency_record_count = 0;
0078     raw_spin_unlock_irqrestore(&latency_lock, flags);
0079 }
0080 
0081 static void clear_global_latency_tracing(void)
0082 {
0083     unsigned long flags;
0084 
0085     raw_spin_lock_irqsave(&latency_lock, flags);
0086     memset(&latency_record, 0, sizeof(latency_record));
0087     raw_spin_unlock_irqrestore(&latency_lock, flags);
0088 }
0089 
0090 static void __sched
0091 account_global_scheduler_latency(struct task_struct *tsk,
0092                  struct latency_record *lat)
0093 {
0094     int firstnonnull = MAXLR + 1;
0095     int i;
0096 
0097     if (!latencytop_enabled)
0098         return;
0099 
0100     /* skip kernel threads for now */
0101     if (!tsk->mm)
0102         return;
0103 
0104     for (i = 0; i < MAXLR; i++) {
0105         int q, same = 1;
0106 
0107         /* Nothing stored: */
0108         if (!latency_record[i].backtrace[0]) {
0109             if (firstnonnull > i)
0110                 firstnonnull = i;
0111             continue;
0112         }
0113         for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
0114             unsigned long record = lat->backtrace[q];
0115 
0116             if (latency_record[i].backtrace[q] != record) {
0117                 same = 0;
0118                 break;
0119             }
0120 
0121             /* 0 and ULONG_MAX entries mean end of backtrace: */
0122             if (record == 0 || record == ULONG_MAX)
0123                 break;
0124         }
0125         if (same) {
0126             latency_record[i].count++;
0127             latency_record[i].time += lat->time;
0128             if (lat->time > latency_record[i].max)
0129                 latency_record[i].max = lat->time;
0130             return;
0131         }
0132     }
0133 
0134     i = firstnonnull;
0135     if (i >= MAXLR - 1)
0136         return;
0137 
0138     /* Allocted a new one: */
0139     memcpy(&latency_record[i], lat, sizeof(struct latency_record));
0140 }
0141 
0142 /*
0143  * Iterator to store a backtrace into a latency record entry
0144  */
0145 static inline void store_stacktrace(struct task_struct *tsk,
0146                     struct latency_record *lat)
0147 {
0148     struct stack_trace trace;
0149 
0150     memset(&trace, 0, sizeof(trace));
0151     trace.max_entries = LT_BACKTRACEDEPTH;
0152     trace.entries = &lat->backtrace[0];
0153     save_stack_trace_tsk(tsk, &trace);
0154 }
0155 
0156 /**
0157  * __account_scheduler_latency - record an occurred latency
0158  * @tsk - the task struct of the task hitting the latency
0159  * @usecs - the duration of the latency in microseconds
0160  * @inter - 1 if the sleep was interruptible, 0 if uninterruptible
0161  *
0162  * This function is the main entry point for recording latency entries
0163  * as called by the scheduler.
0164  *
0165  * This function has a few special cases to deal with normal 'non-latency'
0166  * sleeps: specifically, interruptible sleep longer than 5 msec is skipped
0167  * since this usually is caused by waiting for events via select() and co.
0168  *
0169  * Negative latencies (caused by time going backwards) are also explicitly
0170  * skipped.
0171  */
0172 void __sched
0173 __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
0174 {
0175     unsigned long flags;
0176     int i, q;
0177     struct latency_record lat;
0178 
0179     /* Long interruptible waits are generally user requested... */
0180     if (inter && usecs > 5000)
0181         return;
0182 
0183     /* Negative sleeps are time going backwards */
0184     /* Zero-time sleeps are non-interesting */
0185     if (usecs <= 0)
0186         return;
0187 
0188     memset(&lat, 0, sizeof(lat));
0189     lat.count = 1;
0190     lat.time = usecs;
0191     lat.max = usecs;
0192     store_stacktrace(tsk, &lat);
0193 
0194     raw_spin_lock_irqsave(&latency_lock, flags);
0195 
0196     account_global_scheduler_latency(tsk, &lat);
0197 
0198     for (i = 0; i < tsk->latency_record_count; i++) {
0199         struct latency_record *mylat;
0200         int same = 1;
0201 
0202         mylat = &tsk->latency_record[i];
0203         for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
0204             unsigned long record = lat.backtrace[q];
0205 
0206             if (mylat->backtrace[q] != record) {
0207                 same = 0;
0208                 break;
0209             }
0210 
0211             /* 0 and ULONG_MAX entries mean end of backtrace: */
0212             if (record == 0 || record == ULONG_MAX)
0213                 break;
0214         }
0215         if (same) {
0216             mylat->count++;
0217             mylat->time += lat.time;
0218             if (lat.time > mylat->max)
0219                 mylat->max = lat.time;
0220             goto out_unlock;
0221         }
0222     }
0223 
0224     /*
0225      * short term hack; if we're > 32 we stop; future we recycle:
0226      */
0227     if (tsk->latency_record_count >= LT_SAVECOUNT)
0228         goto out_unlock;
0229 
0230     /* Allocated a new one: */
0231     i = tsk->latency_record_count++;
0232     memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
0233 
0234 out_unlock:
0235     raw_spin_unlock_irqrestore(&latency_lock, flags);
0236 }
0237 
0238 static int lstats_show(struct seq_file *m, void *v)
0239 {
0240     int i;
0241 
0242     seq_puts(m, "Latency Top version : v0.1\n");
0243 
0244     for (i = 0; i < MAXLR; i++) {
0245         struct latency_record *lr = &latency_record[i];
0246 
0247         if (lr->backtrace[0]) {
0248             int q;
0249             seq_printf(m, "%i %lu %lu",
0250                    lr->count, lr->time, lr->max);
0251             for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
0252                 unsigned long bt = lr->backtrace[q];
0253                 if (!bt)
0254                     break;
0255                 if (bt == ULONG_MAX)
0256                     break;
0257                 seq_printf(m, " %ps", (void *)bt);
0258             }
0259             seq_puts(m, "\n");
0260         }
0261     }
0262     return 0;
0263 }
0264 
0265 static ssize_t
0266 lstats_write(struct file *file, const char __user *buf, size_t count,
0267          loff_t *offs)
0268 {
0269     clear_global_latency_tracing();
0270 
0271     return count;
0272 }
0273 
0274 static int lstats_open(struct inode *inode, struct file *filp)
0275 {
0276     return single_open(filp, lstats_show, NULL);
0277 }
0278 
0279 static const struct file_operations lstats_fops = {
0280     .open       = lstats_open,
0281     .read       = seq_read,
0282     .write      = lstats_write,
0283     .llseek     = seq_lseek,
0284     .release    = single_release,
0285 };
0286 
0287 static int __init init_lstats_procfs(void)
0288 {
0289     proc_create("latency_stats", 0644, NULL, &lstats_fops);
0290     return 0;
0291 }
0292 
0293 int sysctl_latencytop(struct ctl_table *table, int write,
0294             void __user *buffer, size_t *lenp, loff_t *ppos)
0295 {
0296     int err;
0297 
0298     err = proc_dointvec(table, write, buffer, lenp, ppos);
0299     if (latencytop_enabled)
0300         force_schedstat_enabled();
0301 
0302     return err;
0303 }
0304 device_initcall(init_lstats_procfs);