Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH
0004  * Author: Viktor Rosendahl (viktor.rosendahl@bmw.de)
0005  */
0006 
0007 #define _GNU_SOURCE
0008 #define _POSIX_C_SOURCE 200809L
0009 
0010 #include <ctype.h>
0011 #include <stdbool.h>
0012 #include <stdio.h>
0013 #include <stdlib.h>
0014 #include <string.h>
0015 
0016 #include <err.h>
0017 #include <errno.h>
0018 #include <fcntl.h>
0019 #include <getopt.h>
0020 #include <sched.h>
0021 #include <linux/unistd.h>
0022 #include <signal.h>
0023 #include <sys/inotify.h>
0024 #include <unistd.h>
0025 #include <pthread.h>
0026 #include <tracefs.h>
0027 
0028 static const char *prg_name;
0029 static const char *prg_unknown = "unknown program name";
0030 
0031 static int fd_stdout;
0032 
0033 static int sched_policy;
0034 static bool sched_policy_set;
0035 
0036 static int sched_pri;
0037 static bool sched_pri_set;
0038 
0039 static bool trace_enable = true;
0040 static bool setup_ftrace = true;
0041 static bool use_random_sleep;
0042 
0043 #define TRACE_OPTS              \
0044     C(FUNC_TR, "function-trace"),       \
0045     C(DISP_GR, "display-graph"),        \
0046     C(NR,       NULL)
0047 
0048 #undef C
0049 #define C(a, b) OPTIDX_##a
0050 
0051 enum traceopt {
0052     TRACE_OPTS
0053 };
0054 
0055 #undef C
0056 #define C(a, b)  b
0057 
0058 static const char *const optstr[] = {
0059     TRACE_OPTS
0060 };
0061 
0062 enum errhandling {
0063     ERR_EXIT = 0,
0064     ERR_WARN,
0065     ERR_CLEANUP,
0066 };
0067 
0068 static bool use_options[OPTIDX_NR];
0069 
0070 static char inotify_buffer[655360];
0071 
0072 #define likely(x)      __builtin_expect(!!(x), 1)
0073 #define unlikely(x)    __builtin_expect(!!(x), 0)
0074 #define bool2str(x)    (x ? "true":"false")
0075 
0076 #define DEFAULT_NR_PRINTER_THREADS (3)
0077 static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS;
0078 
0079 #define DEFAULT_TABLE_SIZE (2)
0080 static unsigned int table_startsize = DEFAULT_TABLE_SIZE;
0081 
0082 static int verbosity;
0083 
0084 #define verbose_sizechange() (verbosity >= 1)
0085 #define verbose_lostevent()  (verbosity >= 2)
0086 #define verbose_ftrace()     (verbosity >= 1)
0087 
0088 #define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0)
0089 #define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0)
0090 
0091 static const char *debug_tracefile;
0092 static const char *debug_tracefile_dflt;
0093 static const char *debug_maxlat;
0094 static const char *debug_maxlat_dflt;
0095 static const char * const DEBUG_NOFILE = "[file not found]";
0096 
0097 static const char * const TR_MAXLAT  = "tracing_max_latency";
0098 static const char * const TR_THRESH  = "tracing_thresh";
0099 static const char * const TR_CURRENT = "current_tracer";
0100 static const char * const TR_OPTIONS = "trace_options";
0101 
0102 static const char * const NOP_TRACER = "nop";
0103 
0104 static const char * const OPT_NO_PREFIX = "no";
0105 
0106 #define DFLT_THRESHOLD_US "0"
0107 static const char *threshold = DFLT_THRESHOLD_US;
0108 
0109 #define DEV_URANDOM     "/dev/urandom"
0110 #define RT_DEFAULT_PRI (99)
0111 #define DEFAULT_PRI    (0)
0112 
0113 #define USEC_PER_MSEC (1000L)
0114 #define NSEC_PER_USEC (1000L)
0115 #define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC)
0116 
0117 #define MSEC_PER_SEC (1000L)
0118 #define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC)
0119 #define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC)
0120 
0121 #define SLEEP_TIME_MS_DEFAULT (1000L)
0122 #define TRY_PRINTMUTEX_MS (1000)
0123 
0124 static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT);
0125 
0126 static const char * const queue_full_warning =
0127 "Could not queue trace for printing. It is likely that events happen faster\n"
0128 "than what they can be printed. Probably partly because of random sleeping\n";
0129 
0130 static const char * const no_tracer_msg =
0131 "Could not find any tracers! Running this program as root may help!\n";
0132 
0133 static const char * const no_latency_tr_msg =
0134 "No latency tracers are supported by your kernel!\n";
0135 
0136 struct policy {
0137     const char *name;
0138     int policy;
0139     int default_pri;
0140 };
0141 
0142 static const struct policy policies[] = {
0143     { "other", SCHED_OTHER, DEFAULT_PRI    },
0144     { "batch", SCHED_BATCH, DEFAULT_PRI    },
0145     { "idle",  SCHED_IDLE,  DEFAULT_PRI    },
0146     { "rr",    SCHED_RR,    RT_DEFAULT_PRI },
0147     { "fifo",  SCHED_FIFO,  RT_DEFAULT_PRI },
0148     { NULL,    0,           DEFAULT_PRI    }
0149 };
0150 
0151 /*
0152  * The default tracer will be the first on this list that is supported by the
0153  * currently running Linux kernel.
0154  */
0155 static const char * const relevant_tracers[] = {
0156     "preemptirqsoff",
0157     "preemptoff",
0158     "irqsoff",
0159     "wakeup",
0160     "wakeup_rt",
0161     "wakeup_dl",
0162     NULL
0163 };
0164 
0165 /* This is the list of tracers for which random sleep makes sense */
0166 static const char * const random_tracers[] = {
0167     "preemptirqsoff",
0168     "preemptoff",
0169     "irqsoff",
0170     NULL
0171 };
0172 
0173 static const char *current_tracer;
0174 static bool force_tracer;
0175 
0176 struct ftrace_state {
0177     char *tracer;
0178     char *thresh;
0179     bool opt[OPTIDX_NR];
0180     bool opt_valid[OPTIDX_NR];
0181     pthread_mutex_t mutex;
0182 };
0183 
0184 struct entry {
0185     int ticket;
0186     int ticket_completed_ref;
0187 };
0188 
0189 struct print_state {
0190     int ticket_counter;
0191     int ticket_completed;
0192     pthread_mutex_t mutex;
0193     pthread_cond_t cond;
0194     int cnt;
0195     pthread_mutex_t cnt_mutex;
0196 };
0197 
0198 struct short_msg {
0199     char buf[160];
0200     int len;
0201 };
0202 
0203 static struct print_state printstate;
0204 static struct ftrace_state save_state;
0205 volatile sig_atomic_t signal_flag;
0206 
0207 #define PROB_TABLE_MAX_SIZE (1000)
0208 
0209 int probabilities[PROB_TABLE_MAX_SIZE];
0210 
0211 struct sleep_table {
0212     int *table;
0213     int size;
0214     pthread_mutex_t mutex;
0215 };
0216 
0217 static struct sleep_table sleeptable;
0218 
0219 #define QUEUE_SIZE (10)
0220 
0221 struct queue {
0222     struct entry entries[QUEUE_SIZE];
0223     int next_prod_idx;
0224     int next_cons_idx;
0225     pthread_mutex_t mutex;
0226     pthread_cond_t cond;
0227 };
0228 
0229 #define MAX_THREADS (40)
0230 
0231 struct queue printqueue;
0232 pthread_t printthread[MAX_THREADS];
0233 pthread_mutex_t print_mtx;
0234 #define PRINT_BUFFER_SIZE (16 * 1024 * 1024)
0235 
0236 static void cleanup_exit(int status);
0237 static int set_trace_opt(const char *opt, bool value);
0238 
0239 static __always_inline void *malloc_or_die(size_t size)
0240 {
0241     void *ptr = malloc(size);
0242 
0243     if (unlikely(ptr == NULL)) {
0244         warn("malloc() failed");
0245         cleanup_exit(EXIT_FAILURE);
0246     }
0247     return ptr;
0248 }
0249 
0250 static __always_inline void *malloc_or_die_nocleanup(size_t size)
0251 {
0252     void *ptr = malloc(size);
0253 
0254     if (unlikely(ptr == NULL))
0255         err(0, "malloc() failed");
0256     return ptr;
0257 }
0258 
0259 static __always_inline void write_or_die(int fd, const char *buf, size_t count)
0260 {
0261     ssize_t r;
0262 
0263     do {
0264         r = write(fd, buf, count);
0265         if (unlikely(r < 0)) {
0266             if (errno == EINTR)
0267                 continue;
0268             warn("write() failed");
0269             cleanup_exit(EXIT_FAILURE);
0270         }
0271         count -= r;
0272         buf += r;
0273     } while (count > 0);
0274 }
0275 
0276 static __always_inline void clock_gettime_or_die(clockid_t clk_id,
0277                          struct timespec *tp)
0278 {
0279     int r = clock_gettime(clk_id, tp);
0280 
0281     if (unlikely(r != 0))
0282         err(EXIT_FAILURE, "clock_gettime() failed");
0283 }
0284 
0285 static __always_inline void sigemptyset_or_die(sigset_t *s)
0286 {
0287     if (unlikely(sigemptyset(s) != 0)) {
0288         warn("sigemptyset() failed");
0289         cleanup_exit(EXIT_FAILURE);
0290     }
0291 }
0292 
0293 static __always_inline void sigaddset_or_die(sigset_t *s, int signum)
0294 {
0295     if (unlikely(sigaddset(s, signum) != 0)) {
0296         warn("sigemptyset() failed");
0297         cleanup_exit(EXIT_FAILURE);
0298     }
0299 }
0300 
0301 static __always_inline void sigaction_or_die(int signum,
0302                          const struct sigaction *act,
0303                          struct sigaction *oldact)
0304 {
0305     if (unlikely(sigaction(signum, act, oldact) != 0)) {
0306         warn("sigaction() failed");
0307         cleanup_exit(EXIT_FAILURE);
0308     }
0309 }
0310 
0311 static void open_stdout(void)
0312 {
0313     if (setvbuf(stdout, NULL, _IONBF, 0) != 0)
0314         err(EXIT_FAILURE, "setvbuf() failed");
0315     fd_stdout = fileno(stdout);
0316     if (fd_stdout < 0)
0317         err(EXIT_FAILURE, "fileno() failed");
0318 }
0319 
0320 /*
0321  * It's not worth it to call cleanup_exit() from mutex functions because
0322  * cleanup_exit() uses mutexes.
0323  */
0324 static __always_inline void mutex_lock(pthread_mutex_t *mtx)
0325 {
0326     errno = pthread_mutex_lock(mtx);
0327     if (unlikely(errno))
0328         err(EXIT_FAILURE, "pthread_mutex_lock() failed");
0329 }
0330 
0331 
0332 static __always_inline void mutex_unlock(pthread_mutex_t *mtx)
0333 {
0334     errno = pthread_mutex_unlock(mtx);
0335     if (unlikely(errno))
0336         err(EXIT_FAILURE, "pthread_mutex_unlock() failed");
0337 }
0338 
0339 static __always_inline void cond_signal(pthread_cond_t *cond)
0340 {
0341     errno = pthread_cond_signal(cond);
0342     if (unlikely(errno))
0343         err(EXIT_FAILURE, "pthread_cond_signal() failed");
0344 }
0345 
0346 static __always_inline void cond_wait(pthread_cond_t *restrict cond,
0347                       pthread_mutex_t *restrict mutex)
0348 {
0349     errno = pthread_cond_wait(cond, mutex);
0350     if (unlikely(errno))
0351         err(EXIT_FAILURE, "pthread_cond_wait() failed");
0352 }
0353 
0354 static __always_inline void cond_broadcast(pthread_cond_t *cond)
0355 {
0356     errno = pthread_cond_broadcast(cond);
0357     if (unlikely(errno))
0358         err(EXIT_FAILURE, "pthread_cond_broadcast() failed");
0359 }
0360 
0361 static __always_inline void
0362 mutex_init(pthread_mutex_t *mutex,
0363        const pthread_mutexattr_t *attr)
0364 {
0365     errno = pthread_mutex_init(mutex, attr);
0366     if (errno)
0367         err(EXIT_FAILURE, "pthread_mutex_init() failed");
0368 }
0369 
0370 static __always_inline void mutexattr_init(pthread_mutexattr_t *attr)
0371 {
0372     errno = pthread_mutexattr_init(attr);
0373     if (errno)
0374         err(EXIT_FAILURE, "pthread_mutexattr_init() failed");
0375 }
0376 
0377 static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr)
0378 {
0379     errno = pthread_mutexattr_destroy(attr);
0380     if (errno)
0381         err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed");
0382 }
0383 
0384 static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr,
0385                           int type)
0386 {
0387     errno = pthread_mutexattr_settype(attr, type);
0388     if (errno)
0389         err(EXIT_FAILURE, "pthread_mutexattr_settype() failed");
0390 }
0391 
0392 static __always_inline void condattr_init(pthread_condattr_t *attr)
0393 {
0394     errno = pthread_condattr_init(attr);
0395     if (errno)
0396         err(EXIT_FAILURE, "pthread_condattr_init() failed");
0397 }
0398 
0399 static __always_inline void condattr_destroy(pthread_condattr_t *attr)
0400 {
0401     errno = pthread_condattr_destroy(attr);
0402     if (errno)
0403         err(EXIT_FAILURE, "pthread_condattr_destroy() failed");
0404 }
0405 
0406 static __always_inline void condattr_setclock(pthread_condattr_t *attr,
0407                           clockid_t clock_id)
0408 {
0409     errno = pthread_condattr_setclock(attr, clock_id);
0410     if (unlikely(errno))
0411         err(EXIT_FAILURE, "pthread_condattr_setclock() failed");
0412 }
0413 
0414 static __always_inline void cond_init(pthread_cond_t *cond,
0415                       const pthread_condattr_t *attr)
0416 {
0417     errno = pthread_cond_init(cond, attr);
0418     if (errno)
0419         err(EXIT_FAILURE, "pthread_cond_init() failed");
0420 }
0421 
0422 static __always_inline int
0423 cond_timedwait(pthread_cond_t *restrict cond,
0424            pthread_mutex_t *restrict mutex,
0425            const struct timespec *restrict abstime)
0426 {
0427     errno = pthread_cond_timedwait(cond, mutex, abstime);
0428     if (errno && errno != ETIMEDOUT)
0429         err(EXIT_FAILURE, "pthread_cond_timedwait() failed");
0430     return errno;
0431 }
0432 
0433 static void init_printstate(void)
0434 {
0435     pthread_condattr_t cattr;
0436 
0437     printstate.ticket_counter = 0;
0438     printstate.ticket_completed = 0;
0439     printstate.cnt = 0;
0440 
0441     mutex_init(&printstate.mutex, NULL);
0442 
0443     condattr_init(&cattr);
0444     condattr_setclock(&cattr, CLOCK_MONOTONIC);
0445     cond_init(&printstate.cond, &cattr);
0446     condattr_destroy(&cattr);
0447 }
0448 
0449 static void init_print_mtx(void)
0450 {
0451     pthread_mutexattr_t mattr;
0452 
0453     mutexattr_init(&mattr);
0454     mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
0455     mutex_init(&print_mtx, &mattr);
0456     mutexattr_destroy(&mattr);
0457 
0458 }
0459 
0460 static void signal_blocking(int how)
0461 {
0462     sigset_t s;
0463 
0464     sigemptyset_or_die(&s);
0465     sigaddset_or_die(&s, SIGHUP);
0466     sigaddset_or_die(&s, SIGTERM);
0467     sigaddset_or_die(&s, SIGINT);
0468 
0469     errno = pthread_sigmask(how, &s, NULL);
0470     if (unlikely(errno)) {
0471         warn("pthread_sigmask() failed");
0472         cleanup_exit(EXIT_FAILURE);
0473     }
0474 }
0475 
0476 static void signal_handler(int num)
0477 {
0478     signal_flag = num;
0479 }
0480 
0481 static void setup_sig_handler(void)
0482 {
0483     struct sigaction sa;
0484 
0485     memset(&sa, 0, sizeof(sa));
0486     sa.sa_handler = signal_handler;
0487 
0488     sigaction_or_die(SIGHUP, &sa, NULL);
0489     sigaction_or_die(SIGTERM, &sa, NULL);
0490     sigaction_or_die(SIGINT, &sa, NULL);
0491 }
0492 
0493 static void process_signal(int signal)
0494 {
0495     char *name;
0496 
0497     name = strsignal(signal);
0498     if (name == NULL)
0499         printf("Received signal %d\n", signal);
0500     else
0501         printf("Received signal %d (%s)\n", signal, name);
0502     cleanup_exit(EXIT_SUCCESS);
0503 }
0504 
0505 static __always_inline void check_signals(void)
0506 {
0507     int signal = signal_flag;
0508 
0509     if (unlikely(signal))
0510         process_signal(signal);
0511 }
0512 
0513 static __always_inline void get_time_in_future(struct timespec *future,
0514                            long time_us)
0515 {
0516     long nsec;
0517 
0518     clock_gettime_or_die(CLOCK_MONOTONIC, future);
0519     future->tv_sec += time_us / USEC_PER_SEC;
0520     nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC;
0521     if (nsec >= NSEC_PER_SEC) {
0522         future->tv_nsec = nsec % NSEC_PER_SEC;
0523         future->tv_sec += 1;
0524     }
0525 }
0526 
0527 static __always_inline bool time_has_passed(const struct timespec *time)
0528 {
0529     struct timespec now;
0530 
0531     clock_gettime_or_die(CLOCK_MONOTONIC, &now);
0532     if (now.tv_sec > time->tv_sec)
0533         return true;
0534     if (now.tv_sec < time->tv_sec)
0535         return false;
0536     return (now.tv_nsec >= time->tv_nsec);
0537 }
0538 
0539 static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms)
0540 {
0541     long time_us = time_ms * USEC_PER_MSEC;
0542     struct timespec limit;
0543 
0544     get_time_in_future(&limit, time_us);
0545     do {
0546         errno =  pthread_mutex_trylock(mutex);
0547         if (errno && errno != EBUSY)
0548             err(EXIT_FAILURE, "pthread_mutex_trylock() failed");
0549     } while (errno && !time_has_passed(&limit));
0550     return errno == 0;
0551 }
0552 
0553 static void restore_trace_opts(const struct ftrace_state *state,
0554                 const bool *cur)
0555 {
0556     int i;
0557     int r;
0558 
0559     for (i = 0; i < OPTIDX_NR; i++)
0560         if (state->opt_valid[i] && state->opt[i] != cur[i]) {
0561             r = set_trace_opt(optstr[i], state->opt[i]);
0562             if (r < 0)
0563                 warnx("Failed to restore the %s option to %s",
0564                       optstr[i], bool2str(state->opt[i]));
0565             else if (verbose_ftrace())
0566                 printf("Restored the %s option in %s to %s\n",
0567                        optstr[i], TR_OPTIONS,
0568                        bool2str(state->opt[i]));
0569         }
0570 }
0571 
0572 static char *read_file(const char *file, enum errhandling h)
0573 {
0574     int psize;
0575     char *r;
0576     static const char *emsg = "Failed to read the %s file";
0577 
0578     r = tracefs_instance_file_read(NULL, file, &psize);
0579     if (!r) {
0580         if (h) {
0581             warn(emsg, file);
0582             if (h == ERR_CLEANUP)
0583                 cleanup_exit(EXIT_FAILURE);
0584         } else
0585             errx(EXIT_FAILURE, emsg, file);
0586     }
0587 
0588     if (r && r[psize - 1] == '\n')
0589         r[psize - 1] = '\0';
0590     return r;
0591 }
0592 
0593 static void restore_file(const char *file, char **saved, const char *cur)
0594 {
0595     if (*saved && was_changed(*saved, cur)) {
0596         if (tracefs_instance_file_write(NULL, file, *saved) < 0)
0597             warnx("Failed to restore %s to %s!", file, *saved);
0598         else if (verbose_ftrace())
0599             printf("Restored %s to %s\n", file, *saved);
0600         free(*saved);
0601         *saved = NULL;
0602     }
0603 }
0604 
0605 static void restore_ftrace(void)
0606 {
0607     mutex_lock(&save_state.mutex);
0608 
0609     restore_file(TR_CURRENT, &save_state.tracer, current_tracer);
0610     restore_file(TR_THRESH, &save_state.thresh, threshold);
0611     restore_trace_opts(&save_state, use_options);
0612 
0613     mutex_unlock(&save_state.mutex);
0614 }
0615 
0616 static void cleanup_exit(int status)
0617 {
0618     char *maxlat;
0619 
0620     if (!setup_ftrace)
0621         exit(status);
0622 
0623     /*
0624      * We try the print_mtx for 1 sec in order to avoid garbled
0625      * output if possible, but if it cannot be obtained we proceed anyway.
0626      */
0627     mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS);
0628 
0629     maxlat = read_file(TR_MAXLAT, ERR_WARN);
0630     if (maxlat) {
0631         printf("The maximum detected latency was: %sus\n", maxlat);
0632         free(maxlat);
0633     }
0634 
0635     restore_ftrace();
0636     /*
0637      * We do not need to unlock the print_mtx here because we will exit at
0638      * the end of this function. Unlocking print_mtx causes problems if a
0639      * print thread happens to be waiting for the mutex because we have
0640      * just changed the ftrace settings to the original and thus the
0641      * print thread would output incorrect data from ftrace.
0642      */
0643     exit(status);
0644 }
0645 
0646 static void init_save_state(void)
0647 {
0648     pthread_mutexattr_t mattr;
0649 
0650     mutexattr_init(&mattr);
0651     mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
0652     mutex_init(&save_state.mutex, &mattr);
0653     mutexattr_destroy(&mattr);
0654 
0655     save_state.tracer = NULL;
0656     save_state.thresh = NULL;
0657     save_state.opt_valid[OPTIDX_FUNC_TR] = false;
0658     save_state.opt_valid[OPTIDX_DISP_GR] = false;
0659 }
0660 
0661 static int printstate_next_ticket(struct entry *req)
0662 {
0663     int r;
0664 
0665     r = ++(printstate.ticket_counter);
0666     req->ticket = r;
0667     req->ticket_completed_ref = printstate.ticket_completed;
0668     cond_broadcast(&printstate.cond);
0669     return r;
0670 }
0671 
0672 static __always_inline
0673 void printstate_mark_req_completed(const struct entry *req)
0674 {
0675     if (req->ticket > printstate.ticket_completed)
0676         printstate.ticket_completed = req->ticket;
0677 }
0678 
0679 static __always_inline
0680 bool printstate_has_new_req_arrived(const struct entry *req)
0681 {
0682     return (printstate.ticket_counter != req->ticket);
0683 }
0684 
0685 static __always_inline int printstate_cnt_inc(void)
0686 {
0687     int value;
0688 
0689     mutex_lock(&printstate.cnt_mutex);
0690     value = ++printstate.cnt;
0691     mutex_unlock(&printstate.cnt_mutex);
0692     return value;
0693 }
0694 
0695 static __always_inline int printstate_cnt_dec(void)
0696 {
0697     int value;
0698 
0699     mutex_lock(&printstate.cnt_mutex);
0700     value = --printstate.cnt;
0701     mutex_unlock(&printstate.cnt_mutex);
0702     return value;
0703 }
0704 
0705 static __always_inline int printstate_cnt_read(void)
0706 {
0707     int value;
0708 
0709     mutex_lock(&printstate.cnt_mutex);
0710     value = printstate.cnt;
0711     mutex_unlock(&printstate.cnt_mutex);
0712     return value;
0713 }
0714 
0715 static __always_inline
0716 bool prev_req_won_race(const struct entry *req)
0717 {
0718     return (printstate.ticket_completed != req->ticket_completed_ref);
0719 }
0720 
0721 static void sleeptable_resize(int size, bool printout, struct short_msg *msg)
0722 {
0723     int bytes;
0724 
0725     if (printout) {
0726         msg->len = 0;
0727         if (unlikely(size > PROB_TABLE_MAX_SIZE))
0728             bytes = snprintf(msg->buf, sizeof(msg->buf),
0729 "Cannot increase probability table to %d (maximum size reached)\n", size);
0730         else
0731             bytes = snprintf(msg->buf, sizeof(msg->buf),
0732 "Increasing probability table to %d\n", size);
0733         if (bytes < 0)
0734             warn("snprintf() failed");
0735         else
0736             msg->len = bytes;
0737     }
0738 
0739     if (unlikely(size < 0)) {
0740         /* Should never happen */
0741         warnx("Bad program state at %s:%d", __FILE__, __LINE__);
0742         cleanup_exit(EXIT_FAILURE);
0743         return;
0744     }
0745     sleeptable.size = size;
0746     sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size];
0747 }
0748 
0749 static void init_probabilities(void)
0750 {
0751     int i;
0752     int j = 1000;
0753 
0754     for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) {
0755         probabilities[i] = 1000 / j;
0756         j--;
0757     }
0758     mutex_init(&sleeptable.mutex, NULL);
0759 }
0760 
0761 static int table_get_probability(const struct entry *req,
0762                  struct short_msg *msg)
0763 {
0764     int diff = req->ticket - req->ticket_completed_ref;
0765     int rval = 0;
0766 
0767     msg->len = 0;
0768     diff--;
0769     /* Should never happen...*/
0770     if (unlikely(diff < 0)) {
0771         warnx("Programmer assumption error at %s:%d\n", __FILE__,
0772               __LINE__);
0773         cleanup_exit(EXIT_FAILURE);
0774     }
0775     mutex_lock(&sleeptable.mutex);
0776     if (diff >= (sleeptable.size - 1)) {
0777         rval = sleeptable.table[sleeptable.size - 1];
0778         sleeptable_resize(sleeptable.size + 1, verbose_sizechange(),
0779                   msg);
0780     } else {
0781         rval = sleeptable.table[diff];
0782     }
0783     mutex_unlock(&sleeptable.mutex);
0784     return rval;
0785 }
0786 
0787 static void init_queue(struct queue *q)
0788 {
0789     q->next_prod_idx = 0;
0790     q->next_cons_idx = 0;
0791     mutex_init(&q->mutex, NULL);
0792     errno = pthread_cond_init(&q->cond, NULL);
0793     if (errno)
0794         err(EXIT_FAILURE, "pthread_cond_init() failed");
0795 }
0796 
0797 static __always_inline int queue_len(const struct queue *q)
0798 {
0799     if (q->next_prod_idx >= q->next_cons_idx)
0800         return q->next_prod_idx - q->next_cons_idx;
0801     else
0802         return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx;
0803 }
0804 
0805 static __always_inline int queue_nr_free(const struct queue *q)
0806 {
0807     int nr_free = QUEUE_SIZE - queue_len(q);
0808 
0809     /*
0810      * If there is only one slot left we will anyway lie and claim that the
0811      * queue is full because adding an element will make it appear empty
0812      */
0813     if (nr_free == 1)
0814         nr_free = 0;
0815     return nr_free;
0816 }
0817 
0818 static __always_inline void queue_idx_inc(int *idx)
0819 {
0820     *idx = (*idx + 1) % QUEUE_SIZE;
0821 }
0822 
0823 static __always_inline void queue_push_to_back(struct queue *q,
0824                           const struct entry *e)
0825 {
0826     q->entries[q->next_prod_idx] = *e;
0827     queue_idx_inc(&q->next_prod_idx);
0828 }
0829 
0830 static __always_inline struct entry queue_pop_from_front(struct queue *q)
0831 {
0832     struct entry e = q->entries[q->next_cons_idx];
0833 
0834     queue_idx_inc(&q->next_cons_idx);
0835     return e;
0836 }
0837 
0838 static __always_inline void queue_cond_signal(struct queue *q)
0839 {
0840     cond_signal(&q->cond);
0841 }
0842 
0843 static __always_inline void queue_cond_wait(struct queue *q)
0844 {
0845     cond_wait(&q->cond, &q->mutex);
0846 }
0847 
0848 static __always_inline int queue_try_to_add_entry(struct queue *q,
0849                           const struct entry *e)
0850 {
0851     int r = 0;
0852 
0853     mutex_lock(&q->mutex);
0854     if (queue_nr_free(q) > 0) {
0855         queue_push_to_back(q, e);
0856         cond_signal(&q->cond);
0857     } else
0858         r = -1;
0859     mutex_unlock(&q->mutex);
0860     return r;
0861 }
0862 
0863 static struct entry queue_wait_for_entry(struct queue *q)
0864 {
0865     struct entry e;
0866 
0867     mutex_lock(&q->mutex);
0868     while (true) {
0869         if (queue_len(&printqueue) > 0) {
0870             e = queue_pop_from_front(q);
0871             break;
0872         }
0873         queue_cond_wait(q);
0874     }
0875     mutex_unlock(&q->mutex);
0876 
0877     return e;
0878 }
0879 
0880 static const struct policy *policy_from_name(const char *name)
0881 {
0882     const struct policy *p = &policies[0];
0883 
0884     while (p->name != NULL) {
0885         if (!strcmp(name, p->name))
0886             return p;
0887         p++;
0888     }
0889     return NULL;
0890 }
0891 
0892 static const char *policy_name(int policy)
0893 {
0894     const struct policy *p = &policies[0];
0895     static const char *rval = "unknown";
0896 
0897     while (p->name != NULL) {
0898         if (p->policy == policy)
0899             return p->name;
0900         p++;
0901     }
0902     return rval;
0903 }
0904 
0905 static bool is_relevant_tracer(const char *name)
0906 {
0907     unsigned int i;
0908 
0909     for (i = 0; relevant_tracers[i]; i++)
0910         if (!strcmp(name, relevant_tracers[i]))
0911             return true;
0912     return false;
0913 }
0914 
0915 static bool random_makes_sense(const char *name)
0916 {
0917     unsigned int i;
0918 
0919     for (i = 0; random_tracers[i]; i++)
0920         if (!strcmp(name, random_tracers[i]))
0921             return true;
0922     return false;
0923 }
0924 
0925 static void show_available(void)
0926 {
0927     char **tracers;
0928     int found = 0;
0929     int i;
0930 
0931     tracers = tracefs_tracers(NULL);
0932     for (i = 0; tracers && tracers[i]; i++) {
0933         if (is_relevant_tracer(tracers[i]))
0934             found++;
0935     }
0936 
0937     if (!tracers) {
0938         warnx(no_tracer_msg);
0939         return;
0940     }
0941 
0942     if (!found) {
0943         warnx(no_latency_tr_msg);
0944         tracefs_list_free(tracers);
0945         return;
0946     }
0947 
0948     printf("The following latency tracers are available on your system:\n");
0949     for (i = 0; tracers[i]; i++) {
0950         if (is_relevant_tracer(tracers[i]))
0951             printf("%s\n", tracers[i]);
0952     }
0953     tracefs_list_free(tracers);
0954 }
0955 
0956 static bool tracer_valid(const char *name, bool *notracer)
0957 {
0958     char **tracers;
0959     int i;
0960     bool rval = false;
0961 
0962     *notracer = false;
0963     tracers = tracefs_tracers(NULL);
0964     if (!tracers) {
0965         *notracer = true;
0966         return false;
0967     }
0968     for (i = 0; tracers[i]; i++)
0969         if (!strcmp(tracers[i], name)) {
0970             rval = true;
0971             break;
0972         }
0973     tracefs_list_free(tracers);
0974     return rval;
0975 }
0976 
0977 static const char *find_default_tracer(void)
0978 {
0979     int i;
0980     bool notracer;
0981     bool valid;
0982 
0983     for (i = 0; relevant_tracers[i]; i++) {
0984         valid = tracer_valid(relevant_tracers[i], &notracer);
0985         if (notracer)
0986             errx(EXIT_FAILURE, no_tracer_msg);
0987         if (valid)
0988             return relevant_tracers[i];
0989     }
0990     return NULL;
0991 }
0992 
0993 static bool toss_coin(struct drand48_data *buffer, unsigned int prob)
0994 {
0995     long r;
0996 
0997     if (unlikely(lrand48_r(buffer, &r))) {
0998         warnx("lrand48_r() failed");
0999         cleanup_exit(EXIT_FAILURE);
1000     }
1001     r = r % 1000L;
1002     if (r < prob)
1003         return true;
1004     else
1005         return false;
1006 }
1007 
1008 
1009 static long go_to_sleep(const struct entry *req)
1010 {
1011     struct timespec future;
1012     long delay = sleep_time;
1013 
1014     get_time_in_future(&future, delay);
1015 
1016     mutex_lock(&printstate.mutex);
1017     while (!printstate_has_new_req_arrived(req)) {
1018         cond_timedwait(&printstate.cond, &printstate.mutex, &future);
1019         if (time_has_passed(&future))
1020             break;
1021     }
1022 
1023     if (printstate_has_new_req_arrived(req))
1024         delay = -1;
1025     mutex_unlock(&printstate.mutex);
1026 
1027     return delay;
1028 }
1029 
1030 
1031 static void set_priority(void)
1032 {
1033     int r;
1034     pid_t pid;
1035     struct sched_param param;
1036 
1037     memset(&param, 0, sizeof(param));
1038     param.sched_priority = sched_pri;
1039 
1040     pid = getpid();
1041     r = sched_setscheduler(pid, sched_policy, &param);
1042 
1043     if (r != 0)
1044         err(EXIT_FAILURE, "sched_setscheduler() failed");
1045 }
1046 
1047 pid_t latency_collector_gettid(void)
1048 {
1049     return (pid_t) syscall(__NR_gettid);
1050 }
1051 
1052 static void print_priority(void)
1053 {
1054     pid_t tid;
1055     int policy;
1056     int r;
1057     struct sched_param param;
1058 
1059     tid = latency_collector_gettid();
1060     r = pthread_getschedparam(pthread_self(), &policy, &param);
1061     if (r != 0) {
1062         warn("pthread_getschedparam() failed");
1063         cleanup_exit(EXIT_FAILURE);
1064     }
1065     mutex_lock(&print_mtx);
1066     printf("Thread %d runs with scheduling policy %s and priority %d\n",
1067            tid, policy_name(policy), param.sched_priority);
1068     mutex_unlock(&print_mtx);
1069 }
1070 
1071 static __always_inline
1072 void __print_skipmessage(const struct short_msg *resize_msg,
1073              const struct timespec *timestamp, char *buffer,
1074              size_t bufspace, const struct entry *req, bool excuse,
1075              const char *str)
1076 {
1077     ssize_t bytes = 0;
1078     char *p = &buffer[0];
1079     long us, sec;
1080     int r;
1081 
1082     sec = timestamp->tv_sec;
1083     us = timestamp->tv_nsec / 1000;
1084 
1085     if (resize_msg != NULL && resize_msg->len > 0) {
1086         strncpy(p, resize_msg->buf, resize_msg->len);
1087         bytes += resize_msg->len;
1088         p += resize_msg->len;
1089         bufspace -= resize_msg->len;
1090     }
1091 
1092     if (excuse)
1093         r = snprintf(p, bufspace,
1094 "%ld.%06ld Latency %d printout skipped due to %s\n",
1095                  sec, us, req->ticket, str);
1096     else
1097         r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n",
1098                 sec, us, req->ticket);
1099 
1100     if (r < 0)
1101         warn("snprintf() failed");
1102     else
1103         bytes += r;
1104 
1105     /* These prints could happen concurrently */
1106     mutex_lock(&print_mtx);
1107     write_or_die(fd_stdout, buffer, bytes);
1108     mutex_unlock(&print_mtx);
1109 }
1110 
1111 static void print_skipmessage(const struct short_msg *resize_msg,
1112                   const struct timespec *timestamp, char *buffer,
1113                   size_t bufspace, const struct entry *req,
1114                   bool excuse)
1115 {
1116     __print_skipmessage(resize_msg, timestamp, buffer, bufspace, req,
1117                 excuse, "random delay");
1118 }
1119 
1120 static void print_lostmessage(const struct timespec *timestamp, char *buffer,
1121                   size_t bufspace, const struct entry *req,
1122                   const char *reason)
1123 {
1124     __print_skipmessage(NULL, timestamp, buffer, bufspace, req, true,
1125                 reason);
1126 }
1127 
1128 static void print_tracefile(const struct short_msg *resize_msg,
1129                 const struct timespec *timestamp, char *buffer,
1130                 size_t bufspace, long slept,
1131                 const struct entry *req)
1132 {
1133     static const int reserve = 256;
1134     char *p = &buffer[0];
1135     ssize_t bytes = 0;
1136     ssize_t bytes_tot = 0;
1137     long us, sec;
1138     long slept_ms;
1139     int trace_fd;
1140 
1141     /* Save some space for the final string and final null char */
1142     bufspace = bufspace - reserve - 1;
1143 
1144     if (resize_msg != NULL && resize_msg->len > 0) {
1145         bytes = resize_msg->len;
1146         strncpy(p, resize_msg->buf, bytes);
1147         bytes_tot += bytes;
1148         p += bytes;
1149         bufspace -= bytes;
1150     }
1151 
1152     trace_fd = open(debug_tracefile, O_RDONLY);
1153 
1154     if (trace_fd < 0) {
1155         warn("open() failed on %s", debug_tracefile);
1156         return;
1157     }
1158 
1159     sec = timestamp->tv_sec;
1160     us = timestamp->tv_nsec / 1000;
1161 
1162     if (slept != 0) {
1163         slept_ms = slept / 1000;
1164         bytes = snprintf(p, bufspace,
1165 "%ld.%06ld Latency %d randomly sleep for %ld ms before print\n",
1166                  sec, us, req->ticket, slept_ms);
1167     } else {
1168         bytes = snprintf(p, bufspace,
1169                  "%ld.%06ld Latency %d immediate print\n", sec,
1170                  us, req->ticket);
1171     }
1172 
1173     if (bytes < 0) {
1174         warn("snprintf() failed");
1175         return;
1176     }
1177     p += bytes;
1178     bufspace -= bytes;
1179     bytes_tot += bytes;
1180 
1181     bytes = snprintf(p, bufspace,
1182 ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"
1183         );
1184 
1185     if (bytes < 0) {
1186         warn("snprintf() failed");
1187         return;
1188     }
1189 
1190     p += bytes;
1191     bufspace -= bytes;
1192     bytes_tot += bytes;
1193 
1194     do {
1195         bytes = read(trace_fd, p, bufspace);
1196         if (bytes < 0) {
1197             if (errno == EINTR)
1198                 continue;
1199             warn("read() failed on %s", debug_tracefile);
1200             if (unlikely(close(trace_fd) != 0))
1201                 warn("close() failed on %s", debug_tracefile);
1202             return;
1203         }
1204         if (bytes == 0)
1205             break;
1206         p += bytes;
1207         bufspace -= bytes;
1208         bytes_tot += bytes;
1209     } while (true);
1210 
1211     if (unlikely(close(trace_fd) != 0))
1212         warn("close() failed on %s", debug_tracefile);
1213 
1214     printstate_cnt_dec();
1215     /* Add the reserve space back to the budget for the final string */
1216     bufspace += reserve;
1217 
1218     bytes = snprintf(p, bufspace,
1219              ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n");
1220 
1221     if (bytes < 0) {
1222         warn("snprintf() failed");
1223         return;
1224     }
1225 
1226     bytes_tot += bytes;
1227 
1228     /* These prints could happen concurrently */
1229     mutex_lock(&print_mtx);
1230     write_or_die(fd_stdout, buffer, bytes_tot);
1231     mutex_unlock(&print_mtx);
1232 }
1233 
1234 static char *get_no_opt(const char *opt)
1235 {
1236     char *no_opt;
1237     int s;
1238 
1239     s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1;
1240     /* We may be called from cleanup_exit() via set_trace_opt() */
1241     no_opt = malloc_or_die_nocleanup(s);
1242     strcpy(no_opt, OPT_NO_PREFIX);
1243     strcat(no_opt, opt);
1244     return no_opt;
1245 }
1246 
1247 static char *find_next_optstr(const char *allopt, const char **next)
1248 {
1249     const char *begin;
1250     const char *end;
1251     char *r;
1252     int s = 0;
1253 
1254     if (allopt == NULL)
1255         return NULL;
1256 
1257     for (begin = allopt; *begin != '\0'; begin++) {
1258         if (isgraph(*begin))
1259             break;
1260     }
1261 
1262     if (*begin == '\0')
1263         return NULL;
1264 
1265     for (end = begin; *end != '\0' && isgraph(*end); end++)
1266         s++;
1267 
1268     r = malloc_or_die_nocleanup(s + 1);
1269     strncpy(r, begin, s);
1270     r[s] = '\0';
1271     *next = begin + s;
1272     return r;
1273 }
1274 
1275 static bool get_trace_opt(const char *allopt, const char *opt, bool *found)
1276 {
1277     *found = false;
1278     char *no_opt;
1279     char *str;
1280     const char *next = allopt;
1281     bool rval = false;
1282 
1283     no_opt = get_no_opt(opt);
1284 
1285     do {
1286         str = find_next_optstr(next, &next);
1287         if (str == NULL)
1288             break;
1289         if (!strcmp(str, opt)) {
1290             *found = true;
1291             rval = true;
1292             free(str);
1293             break;
1294         }
1295         if (!strcmp(str, no_opt)) {
1296             *found = true;
1297             rval = false;
1298             free(str);
1299             break;
1300         }
1301         free(str);
1302     } while (true);
1303     free(no_opt);
1304 
1305     return rval;
1306 }
1307 
1308 static int set_trace_opt(const char *opt, bool value)
1309 {
1310     char *str;
1311     int r;
1312 
1313     if (value)
1314         str = strdup(opt);
1315     else
1316         str = get_no_opt(opt);
1317 
1318     r = tracefs_instance_file_write(NULL, TR_OPTIONS, str);
1319     free(str);
1320     return r;
1321 }
1322 
1323 void save_trace_opts(struct ftrace_state *state)
1324 {
1325     char *allopt;
1326     int psize;
1327     int i;
1328 
1329     allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize);
1330     if (!allopt)
1331         errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS);
1332 
1333     for (i = 0; i < OPTIDX_NR; i++)
1334         state->opt[i] = get_trace_opt(allopt, optstr[i],
1335                           &state->opt_valid[i]);
1336 
1337     free(allopt);
1338 }
1339 
1340 static void write_file(const char *file, const char *cur, const char *new,
1341                enum errhandling h)
1342 {
1343     int r;
1344     static const char *emsg = "Failed to write to the %s file!";
1345 
1346     /* Do nothing if we now that the current and new value are equal */
1347     if (cur && !needs_change(cur, new))
1348         return;
1349 
1350     r = tracefs_instance_file_write(NULL, file, new);
1351     if (r < 0) {
1352         if (h) {
1353             warnx(emsg, file);
1354             if (h == ERR_CLEANUP)
1355                 cleanup_exit(EXIT_FAILURE);
1356         } else
1357             errx(EXIT_FAILURE, emsg, file);
1358     }
1359     if (verbose_ftrace()) {
1360         mutex_lock(&print_mtx);
1361         printf("%s was set to %s\n", file, new);
1362         mutex_unlock(&print_mtx);
1363     }
1364 }
1365 
1366 static void reset_max_latency(void)
1367 {
1368     write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP);
1369 }
1370 
1371 static void save_and_disable_tracer(void)
1372 {
1373     char *orig_th;
1374     char *tracer;
1375     bool need_nop = false;
1376 
1377     mutex_lock(&save_state.mutex);
1378 
1379     save_trace_opts(&save_state);
1380     tracer = read_file(TR_CURRENT, ERR_EXIT);
1381     orig_th = read_file(TR_THRESH, ERR_EXIT);
1382 
1383     if (needs_change(tracer, NOP_TRACER)) {
1384         mutex_lock(&print_mtx);
1385         if (force_tracer) {
1386             printf(
1387                 "The %s tracer is already in use but proceeding anyway!\n",
1388                 tracer);
1389         } else {
1390             printf(
1391                 "The %s tracer is already in use, cowardly bailing out!\n"
1392                 "This could indicate that another program or instance is tracing.\n"
1393                 "Use the -F [--force] option to disregard the current tracer.\n", tracer);
1394             exit(0);
1395         }
1396         mutex_unlock(&print_mtx);
1397         need_nop = true;
1398     }
1399 
1400     save_state.tracer =  tracer;
1401     save_state.thresh = orig_th;
1402 
1403     if (need_nop)
1404         write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT);
1405 
1406     mutex_unlock(&save_state.mutex);
1407 }
1408 
1409 void set_trace_opts(struct ftrace_state *state, bool *new)
1410 {
1411     int i;
1412     int r;
1413 
1414     /*
1415      * We only set options if we earlier detected that the option exists in
1416      * the trace_options file and that the wanted setting is different from
1417      * the one we saw in save_and_disable_tracer()
1418      */
1419     for (i = 0; i < OPTIDX_NR; i++)
1420         if (state->opt_valid[i] &&
1421             state->opt[i] != new[i]) {
1422             r = set_trace_opt(optstr[i], new[i]);
1423             if (r < 0) {
1424                 warnx("Failed to set the %s option to %s",
1425                       optstr[i], bool2str(new[i]));
1426                 cleanup_exit(EXIT_FAILURE);
1427             }
1428             if (verbose_ftrace()) {
1429                 mutex_lock(&print_mtx);
1430                 printf("%s in %s was set to %s\n", optstr[i],
1431                        TR_OPTIONS, bool2str(new[i]));
1432                 mutex_unlock(&print_mtx);
1433             }
1434         }
1435 }
1436 
1437 static void enable_tracer(void)
1438 {
1439     mutex_lock(&save_state.mutex);
1440     set_trace_opts(&save_state, use_options);
1441 
1442     write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP);
1443     write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP);
1444 
1445     mutex_unlock(&save_state.mutex);
1446 }
1447 
1448 static void tracing_loop(void)
1449 {
1450     int ifd = inotify_init();
1451     int wd;
1452     const ssize_t bufsize = sizeof(inotify_buffer);
1453     const ssize_t istructsize = sizeof(struct inotify_event);
1454     char *buf = &inotify_buffer[0];
1455     ssize_t nr_read;
1456     char *p;
1457     int modified;
1458     struct inotify_event *event;
1459     struct entry req;
1460     char *buffer;
1461     const size_t bufspace = PRINT_BUFFER_SIZE;
1462     struct timespec timestamp;
1463 
1464     print_priority();
1465 
1466     buffer = malloc_or_die(bufspace);
1467 
1468     if (ifd < 0)
1469         err(EXIT_FAILURE, "inotify_init() failed!");
1470 
1471 
1472     if (setup_ftrace) {
1473         /*
1474          * We must disable the tracer before resetting the max_latency
1475          */
1476         save_and_disable_tracer();
1477         /*
1478          * We must reset the max_latency before the inotify_add_watch()
1479          * call.
1480          */
1481         reset_max_latency();
1482     }
1483 
1484     wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY);
1485     if (wd < 0)
1486         err(EXIT_FAILURE, "inotify_add_watch() failed!");
1487 
1488     if (setup_ftrace)
1489         enable_tracer();
1490 
1491     signal_blocking(SIG_UNBLOCK);
1492 
1493     while (true) {
1494         modified = 0;
1495         check_signals();
1496         nr_read = read(ifd, buf, bufsize);
1497         check_signals();
1498         if (nr_read < 0) {
1499             if (errno == EINTR)
1500                 continue;
1501             warn("read() failed on inotify fd!");
1502             cleanup_exit(EXIT_FAILURE);
1503         }
1504         if (nr_read == bufsize)
1505             warnx("inotify() buffer filled, skipping events");
1506         if (nr_read < istructsize) {
1507             warnx("read() returned too few bytes on inotify fd");
1508             cleanup_exit(EXIT_FAILURE);
1509         }
1510 
1511         for (p = buf; p < buf + nr_read;) {
1512             event = (struct inotify_event *) p;
1513             if ((event->mask & IN_MODIFY) != 0)
1514                 modified++;
1515             p += istructsize + event->len;
1516         }
1517         while (modified > 0) {
1518             check_signals();
1519             mutex_lock(&printstate.mutex);
1520             check_signals();
1521             printstate_next_ticket(&req);
1522             if (printstate_cnt_read() > 0) {
1523                 printstate_mark_req_completed(&req);
1524                 mutex_unlock(&printstate.mutex);
1525                 if (verbose_lostevent()) {
1526                     clock_gettime_or_die(CLOCK_MONOTONIC,
1527                                  &timestamp);
1528                     print_lostmessage(&timestamp, buffer,
1529                               bufspace, &req,
1530                               "inotify loop");
1531                 }
1532                 break;
1533             }
1534             mutex_unlock(&printstate.mutex);
1535             if (queue_try_to_add_entry(&printqueue, &req) != 0) {
1536                 /* These prints could happen concurrently */
1537                 check_signals();
1538                 mutex_lock(&print_mtx);
1539                 check_signals();
1540                 write_or_die(fd_stdout, queue_full_warning,
1541                          strlen(queue_full_warning));
1542                 mutex_unlock(&print_mtx);
1543             }
1544             modified--;
1545         }
1546     }
1547 }
1548 
1549 static void *do_printloop(void *arg)
1550 {
1551     const size_t bufspace = PRINT_BUFFER_SIZE;
1552     char *buffer;
1553     long *rseed = (long *) arg;
1554     struct drand48_data drandbuf;
1555     long slept = 0;
1556     struct entry req;
1557     int prob = 0;
1558     struct timespec timestamp;
1559     struct short_msg resize_msg;
1560 
1561     print_priority();
1562 
1563     if (srand48_r(*rseed, &drandbuf) != 0) {
1564         warn("srand48_r() failed!\n");
1565         cleanup_exit(EXIT_FAILURE);
1566     }
1567 
1568     buffer = malloc_or_die(bufspace);
1569 
1570     while (true) {
1571         req = queue_wait_for_entry(&printqueue);
1572         clock_gettime_or_die(CLOCK_MONOTONIC, &timestamp);
1573         mutex_lock(&printstate.mutex);
1574         if (prev_req_won_race(&req)) {
1575             printstate_mark_req_completed(&req);
1576             mutex_unlock(&printstate.mutex);
1577             if (verbose_lostevent())
1578                 print_lostmessage(&timestamp, buffer, bufspace,
1579                           &req, "print loop");
1580             continue;
1581         }
1582         mutex_unlock(&printstate.mutex);
1583 
1584         /*
1585          * Toss a coin to decide if we want to sleep before printing
1586          * out the backtrace. The reason for this is that opening
1587          * /sys/kernel/debug/tracing/trace will cause a blackout of
1588          * hundreds of ms, where no latencies will be noted by the
1589          * latency tracer. Thus by randomly sleeping we try to avoid
1590          * missing traces systematically due to this. With this option
1591          * we will sometimes get the first latency, some other times
1592          * some of the later ones, in case of closely spaced traces.
1593          */
1594         if (trace_enable && use_random_sleep) {
1595             slept = 0;
1596             prob = table_get_probability(&req, &resize_msg);
1597             if (!toss_coin(&drandbuf, prob))
1598                 slept = go_to_sleep(&req);
1599             if (slept >= 0) {
1600                 /* A print is ongoing */
1601                 printstate_cnt_inc();
1602                 /*
1603                  * We will do the printout below so we have to
1604                  * mark it as completed while we still have the
1605                  * mutex.
1606                  */
1607                 mutex_lock(&printstate.mutex);
1608                 printstate_mark_req_completed(&req);
1609                 mutex_unlock(&printstate.mutex);
1610             }
1611         }
1612         if (trace_enable) {
1613             /*
1614              * slept < 0  means that we detected another
1615              * notification in go_to_sleep() above
1616              */
1617             if (slept >= 0)
1618                 /*
1619                  * N.B. printstate_cnt_dec(); will be called
1620                  * inside print_tracefile()
1621                  */
1622                 print_tracefile(&resize_msg, &timestamp, buffer,
1623                         bufspace, slept, &req);
1624             else
1625                 print_skipmessage(&resize_msg, &timestamp,
1626                           buffer, bufspace, &req, true);
1627         } else {
1628             print_skipmessage(&resize_msg, &timestamp, buffer,
1629                       bufspace, &req, false);
1630         }
1631     }
1632     return NULL;
1633 }
1634 
1635 static void start_printthread(void)
1636 {
1637     unsigned int i;
1638     long *seed;
1639     int ufd;
1640 
1641     ufd = open(DEV_URANDOM, O_RDONLY);
1642     if (nr_threads > MAX_THREADS) {
1643         warnx(
1644 "Number of requested print threads was %d, max number is %d\n",
1645               nr_threads, MAX_THREADS);
1646         nr_threads = MAX_THREADS;
1647     }
1648     for (i = 0; i < nr_threads; i++) {
1649         seed = malloc_or_die(sizeof(*seed));
1650         if (ufd <  0 ||
1651             read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) {
1652             printf(
1653 "Warning! Using trivial random number seed, since %s not available\n",
1654             DEV_URANDOM);
1655             fflush(stdout);
1656             *seed = i;
1657         }
1658         errno = pthread_create(&printthread[i], NULL, do_printloop,
1659                        seed);
1660         if (errno)
1661             err(EXIT_FAILURE, "pthread_create()");
1662     }
1663     if (ufd > 0 && close(ufd) != 0)
1664         warn("close() failed");
1665 }
1666 
1667 static void show_usage(void)
1668 {
1669     printf(
1670 "Usage: %s [OPTION]...\n\n"
1671 "Collect closely occurring latencies from %s\n"
1672 "with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, "
1673 "wakeup,\nwakeup_dl, or wakeup_rt.\n\n"
1674 
1675 "The occurrence of a latency is detected by monitoring the file\n"
1676 "%s with inotify.\n\n"
1677 
1678 "The following options are supported:\n\n"
1679 
1680 "-l, --list\t\tList the latency tracers that are supported by the\n"
1681 "\t\t\tcurrently running Linux kernel. If you don't see the\n"
1682 "\t\t\ttracer that you want, you will probably need to\n"
1683 "\t\t\tchange your kernel config and build a new kernel.\n\n"
1684 
1685 "-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n"
1686 "\t\t\ttracer that is supported by the kernel in the following\n"
1687 "\t\t\torder of precedence:\n\n"
1688 "\t\t\tpreemptirqsoff\n"
1689 "\t\t\tpreemptoff\n"
1690 "\t\t\tirqsoff\n"
1691 "\t\t\twakeup\n"
1692 "\t\t\twakeup_rt\n"
1693 "\t\t\twakeup_dl\n"
1694 "\n"
1695 "\t\t\tIf TR is not on the list above, then a warning will be\n"
1696 "\t\t\tprinted.\n\n"
1697 
1698 "-F, --force\t\tProceed even if another ftrace tracer is active. Without\n"
1699 "\t\t\tthis option, the program will refuse to start tracing if\n"
1700 "\t\t\tany other tracer than the nop tracer is active.\n\n"
1701 
1702 "-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n"
1703 "\t\t\tfor the tracer. The default is 0, which means that\n"
1704 "\t\t\ttracing_max_latency will be used. tracing_max_latency is\n"
1705 "\t\t\tset to 0 when the program is started and contains the\n"
1706 "\t\t\tmaximum of the latencies that have been encountered.\n\n"
1707 
1708 "-f, --function\t\tEnable the function-trace option in trace_options. With\n"
1709 "\t\t\tthis option, ftrace will trace the functions that are\n"
1710 "\t\t\texecuted during a latency, without it we only get the\n"
1711 "\t\t\tbeginning, end, and backtrace.\n\n"
1712 
1713 "-g, --graph\t\tEnable the display-graph option in trace_option. This\n"
1714 "\t\t\toption causes ftrace to show the graph of how functions\n"
1715 "\t\t\tare calling other functions.\n\n"
1716 
1717 "-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n"
1718 "\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n"
1719 "\t\t\tusing rr or fifo, remember that these policies may cause\n"
1720 "\t\t\tother tasks to experience latencies.\n\n"
1721 
1722 "-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n"
1723 "\t\t\tof PRI depends on the scheduling policy.\n\n"
1724 
1725 "-n, --notrace\t\tIf latency is detected, do not print out the content of\n"
1726 "\t\t\tthe trace file to standard output\n\n"
1727 
1728 "-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n"
1729 
1730 "-r, --random\t\tArbitrarily sleep a certain amount of time, default\n"
1731 "\t\t\t%ld ms, before reading the trace file. The\n"
1732 "\t\t\tprobabilities for sleep are chosen so that the\n"
1733 "\t\t\tprobability of obtaining any of a cluster of closely\n"
1734 "\t\t\toccurring latencies are equal, i.e. we will randomly\n"
1735 "\t\t\tchoose which one we collect from the trace file.\n\n"
1736 "\t\t\tThis option is probably only useful with the irqsoff,\n"
1737 "\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n"
1738 
1739 "-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n"
1740 "\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n"
1741 "\t\t\tlatencies are detected during a run, this value will\n"
1742 "\t\t\tautomatically be increased to NRLAT + 1 and then to\n"
1743 "\t\t\tNRLAT + 2 and so on. The default is %d. This option\n"
1744 "\t\t\timplies -r. We need to know this number in order to\n"
1745 "\t\t\tbe able to calculate the probabilities of sleeping.\n"
1746 "\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n"
1747 "\t\t\tdo an immediate printout will be:\n\n"
1748 "\t\t\t1/NRLAT  1/(NRLAT - 1) ... 1/3  1/2  1\n\n"
1749 "\t\t\tThe probability of sleeping will be:\n\n"
1750 "\t\t\t1 - P, where P is from the series above\n\n"
1751 "\t\t\tThis descending probability will cause us to choose\n"
1752 "\t\t\tan occurrence at random. Observe that the final\n"
1753 "\t\t\tprobability is 0, it is when we reach this probability\n"
1754 "\t\t\tthat we increase NRLAT automatically. As an example,\n"
1755 "\t\t\twith the default value of 2, the probabilities will be:\n\n"
1756 "\t\t\t1/2  0\n\n"
1757 "\t\t\tThis means, when a latency is detected we will sleep\n"
1758 "\t\t\twith 50%% probability. If we ever detect another latency\n"
1759 "\t\t\tduring the sleep period, then the probability of sleep\n"
1760 "\t\t\twill be 0%% and the table will be expanded to:\n\n"
1761 "\t\t\t1/3  1/2  0\n\n"
1762 
1763 "-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n"
1764 "\t\t\tthen print a message every time that the NRLAT value\n"
1765 "\t\t\tis automatically increased. It also causes a message to\n"
1766 "\t\t\tbe printed when the ftrace settings are changed. If this\n"
1767 "\t\t\toption is given at least twice, then also print a\n"
1768 "\t\t\twarning for lost events.\n\n"
1769 
1770 "-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n"
1771 "\t\t\tprinting out the trace from the trace file. The default\n"
1772 "\t\t\tis %ld ms. This option implies -r.\n\n"
1773 
1774 "-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n"
1775 "\t\t\tconfigures the ftrace files in sysfs such as\n"
1776 "\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n"
1777 
1778 "-i, --tracefile FILE\tUse FILE as trace file. The default is\n"
1779 "\t\t\t%s.\n"
1780 "\t\t\tThis options implies -x\n\n"
1781 
1782 "-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n"
1783 "\t\t\t%s.\n"
1784 "\t\t\tThis options implies -x\n\n"
1785 ,
1786 prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS,
1787 SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT,
1788 debug_tracefile_dflt, debug_maxlat_dflt);
1789 }
1790 
1791 static void find_tracefiles(void)
1792 {
1793     debug_tracefile_dflt = tracefs_get_tracing_file("trace");
1794     if (debug_tracefile_dflt == NULL) {
1795         /* This is needed in show_usage() */
1796         debug_tracefile_dflt = DEBUG_NOFILE;
1797     }
1798 
1799     debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency");
1800     if (debug_maxlat_dflt == NULL) {
1801         /* This is needed in show_usage() */
1802         debug_maxlat_dflt = DEBUG_NOFILE;
1803     }
1804 
1805     debug_tracefile = debug_tracefile_dflt;
1806     debug_maxlat = debug_maxlat_dflt;
1807 }
1808 
1809 bool alldigits(const char *s)
1810 {
1811     for (; *s != '\0'; s++)
1812         if (!isdigit(*s))
1813             return false;
1814     return true;
1815 }
1816 
1817 void check_alldigits(const char *optarg, const char *argname)
1818 {
1819     if (!alldigits(optarg))
1820         errx(EXIT_FAILURE,
1821              "The %s parameter expects a decimal argument\n", argname);
1822 }
1823 
1824 static void scan_arguments(int argc, char *argv[])
1825 {
1826     int c;
1827     int i;
1828     int option_idx = 0;
1829 
1830     static struct option long_options[] = {
1831         { "list",       no_argument,            0, 'l' },
1832         { "tracer", required_argument,  0, 't' },
1833         { "force",      no_argument,            0, 'F' },
1834         { "threshold",  required_argument,      0, 's' },
1835         { "function",   no_argument,            0, 'f' },
1836         { "graph",      no_argument,            0, 'g' },
1837         { "policy", required_argument,  0, 'c' },
1838         { "priority",   required_argument,  0, 'p' },
1839         { "help",   no_argument,        0, 'h' },
1840         { "notrace",    no_argument,        0, 'n' },
1841         { "random", no_argument,        0, 'r' },
1842         { "nrlat",  required_argument,  0, 'a' },
1843         { "threads",    required_argument,  0, 'e' },
1844         { "time",   required_argument,  0, 'u' },
1845         { "verbose",    no_argument,        0, 'v' },
1846         { "no-ftrace",  no_argument,            0, 'x' },
1847         { "tracefile",  required_argument,  0, 'i' },
1848         { "max-lat",    required_argument,  0, 'm' },
1849         { 0,        0,          0,  0  }
1850     };
1851     const struct policy *p;
1852     int max, min;
1853     int value;
1854     bool notracer, valid;
1855 
1856     /*
1857      * We must do this before parsing the arguments because show_usage()
1858      * needs to display these.
1859      */
1860     find_tracefiles();
1861 
1862     while (true) {
1863         c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:",
1864                 long_options, &option_idx);
1865         if (c == -1)
1866             break;
1867 
1868         switch (c) {
1869         case 'l':
1870             show_available();
1871             exit(0);
1872             break;
1873         case 't':
1874             current_tracer = strdup(optarg);
1875             if (!is_relevant_tracer(current_tracer)) {
1876                 warnx("%s is not a known latency tracer!\n",
1877                       current_tracer);
1878             }
1879             valid = tracer_valid(current_tracer, &notracer);
1880             if (notracer)
1881                 errx(EXIT_FAILURE, no_tracer_msg);
1882             if (!valid)
1883                 errx(EXIT_FAILURE,
1884 "The tracer %s is not supported by your kernel!\n", current_tracer);
1885             break;
1886         case 'F':
1887             force_tracer = true;
1888             break;
1889         case 's':
1890             check_alldigits(optarg, "-s [--threshold]");
1891             threshold = strdup(optarg);
1892             break;
1893         case 'f':
1894             use_options[OPTIDX_FUNC_TR] = true;
1895             break;
1896         case 'g':
1897             use_options[OPTIDX_DISP_GR] = true;
1898             break;
1899         case 'c':
1900             p = policy_from_name(optarg);
1901             if (p != NULL) {
1902                 sched_policy = p->policy;
1903                 sched_policy_set = true;
1904                 if (!sched_pri_set) {
1905                     sched_pri = p->default_pri;
1906                     sched_pri_set = true;
1907                 }
1908             } else {
1909                 warnx("Unknown scheduling %s\n", optarg);
1910                 show_usage();
1911                 exit(0);
1912             }
1913             break;
1914         case 'p':
1915             check_alldigits(optarg, "-p [--priority]");
1916             sched_pri = atoi(optarg);
1917             sched_pri_set = true;
1918             break;
1919         case 'h':
1920             show_usage();
1921             exit(0);
1922             break;
1923         case 'n':
1924             trace_enable = false;
1925             use_random_sleep = false;
1926             break;
1927         case 'e':
1928             check_alldigits(optarg, "-e [--threads]");
1929             value = atoi(optarg);
1930             if (value > 0)
1931                 nr_threads = value;
1932             else {
1933                 warnx("NRTHR must be > 0\n");
1934                 show_usage();
1935                 exit(0);
1936             }
1937             break;
1938         case 'u':
1939             check_alldigits(optarg, "-u [--time]");
1940             value = atoi(optarg);
1941             if (value < 0) {
1942                 warnx("TIME must be >= 0\n");
1943                 show_usage();
1944                 exit(0);
1945             }
1946             trace_enable = true;
1947             use_random_sleep = true;
1948             sleep_time = value * USEC_PER_MSEC;
1949             break;
1950         case 'v':
1951             verbosity++;
1952             break;
1953         case 'r':
1954             trace_enable = true;
1955             use_random_sleep = true;
1956             break;
1957         case 'a':
1958             check_alldigits(optarg, "-a [--nrlat]");
1959             value = atoi(optarg);
1960             if (value <= 0) {
1961                 warnx("NRLAT must be > 0\n");
1962                 show_usage();
1963                 exit(0);
1964             }
1965             trace_enable = true;
1966             use_random_sleep = true;
1967             table_startsize = value;
1968             break;
1969         case 'x':
1970             setup_ftrace = false;
1971             break;
1972         case 'i':
1973             setup_ftrace = false;
1974             debug_tracefile = strdup(optarg);
1975             break;
1976         case 'm':
1977             setup_ftrace = false;
1978             debug_maxlat = strdup(optarg);
1979             break;
1980         default:
1981             show_usage();
1982             exit(0);
1983             break;
1984         }
1985     }
1986 
1987     if (setup_ftrace) {
1988         if (!current_tracer) {
1989             current_tracer = find_default_tracer();
1990             if (!current_tracer)
1991                 errx(EXIT_FAILURE,
1992 "No default tracer found and tracer not specified\n");
1993         }
1994 
1995         if (use_random_sleep && !random_makes_sense(current_tracer)) {
1996             warnx("WARNING: The tracer is %s and random sleep has",
1997                   current_tracer);
1998             fprintf(stderr,
1999 "been enabled. Random sleep is intended for the following tracers:\n");
2000             for (i = 0; random_tracers[i]; i++)
2001                 fprintf(stderr, "%s\n", random_tracers[i]);
2002             fprintf(stderr, "\n");
2003         }
2004     }
2005 
2006     if (debug_tracefile == DEBUG_NOFILE ||
2007         debug_maxlat == DEBUG_NOFILE)
2008         errx(EXIT_FAILURE,
2009 "Could not find tracing directory e.g. /sys/kernel/tracing\n");
2010 
2011     if (!sched_policy_set) {
2012         sched_policy = SCHED_RR;
2013         sched_policy_set = true;
2014         if (!sched_pri_set) {
2015             sched_pri = RT_DEFAULT_PRI;
2016             sched_pri_set = true;
2017         }
2018     }
2019 
2020     max = sched_get_priority_max(sched_policy);
2021     min = sched_get_priority_min(sched_policy);
2022 
2023     if (sched_pri < min) {
2024         printf(
2025 "ATTENTION: Increasing priority to minimum, which is %d\n", min);
2026         sched_pri = min;
2027     }
2028     if (sched_pri > max) {
2029         printf(
2030 "ATTENTION: Reducing priority to maximum, which is %d\n", max);
2031         sched_pri = max;
2032     }
2033 }
2034 
2035 static void show_params(void)
2036 {
2037     printf(
2038 "\n"
2039 "Running with scheduling policy %s and priority %d. Using %d print threads.\n",
2040         policy_name(sched_policy), sched_pri, nr_threads);
2041     if (trace_enable) {
2042         if (use_random_sleep) {
2043             printf(
2044 "%s will be printed with random delay\n"
2045 "Start size of the probability table:\t\t\t%d\n"
2046 "Print a message when the prob. table changes size:\t%s\n"
2047 "Print a warning when an event has been lost:\t\t%s\n"
2048 "Sleep time is:\t\t\t\t\t\t%ld ms\n",
2049 debug_tracefile,
2050 table_startsize,
2051 bool2str(verbose_sizechange()),
2052 bool2str(verbose_lostevent()),
2053 sleep_time / USEC_PER_MSEC);
2054         } else {
2055             printf("%s will be printed immediately\n",
2056                    debug_tracefile);
2057         }
2058     } else {
2059         printf("%s will not be printed\n",
2060                debug_tracefile);
2061     }
2062     if (setup_ftrace) {
2063         printf("Tracer:\t\t\t\t\t\t\t%s\n"
2064                "%s option:\t\t\t\t\t%s\n"
2065                "%s option:\t\t\t\t\t%s\n",
2066                current_tracer,
2067                optstr[OPTIDX_FUNC_TR],
2068                bool2str(use_options[OPTIDX_FUNC_TR]),
2069                optstr[OPTIDX_DISP_GR],
2070                bool2str(use_options[OPTIDX_DISP_GR]));
2071         if (!strcmp(threshold, "0"))
2072             printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n");
2073         else
2074             printf("Threshold:\t\t\t\t\t\t%s\n", threshold);
2075     }
2076     printf("\n");
2077 }
2078 
2079 int main(int argc, char *argv[])
2080 {
2081     init_save_state();
2082     signal_blocking(SIG_BLOCK);
2083     setup_sig_handler();
2084     open_stdout();
2085 
2086     if (argc >= 1)
2087         prg_name = argv[0];
2088     else
2089         prg_name = prg_unknown;
2090 
2091     scan_arguments(argc, argv);
2092     show_params();
2093 
2094     init_printstate();
2095     init_print_mtx();
2096     if (use_random_sleep) {
2097         init_probabilities();
2098         if (verbose_sizechange())
2099             printf("Initializing probability table to %d\n",
2100                    table_startsize);
2101         sleeptable_resize(table_startsize, false, NULL);
2102     }
2103     set_priority();
2104     init_queue(&printqueue);
2105     start_printthread();
2106     tracing_loop();
2107     return 0;
2108 }