0001
0002
0003
0004
0005
0006
0007 #define _GNU_SOURCE
0008 #define _POSIX_C_SOURCE 200809L
0009
0010 #include <ctype.h>
0011 #include <stdbool.h>
0012 #include <stdio.h>
0013 #include <stdlib.h>
0014 #include <string.h>
0015
0016 #include <err.h>
0017 #include <errno.h>
0018 #include <fcntl.h>
0019 #include <getopt.h>
0020 #include <sched.h>
0021 #include <linux/unistd.h>
0022 #include <signal.h>
0023 #include <sys/inotify.h>
0024 #include <unistd.h>
0025 #include <pthread.h>
0026 #include <tracefs.h>
0027
0028 static const char *prg_name;
0029 static const char *prg_unknown = "unknown program name";
0030
0031 static int fd_stdout;
0032
0033 static int sched_policy;
0034 static bool sched_policy_set;
0035
0036 static int sched_pri;
0037 static bool sched_pri_set;
0038
0039 static bool trace_enable = true;
0040 static bool setup_ftrace = true;
0041 static bool use_random_sleep;
0042
0043 #define TRACE_OPTS \
0044 C(FUNC_TR, "function-trace"), \
0045 C(DISP_GR, "display-graph"), \
0046 C(NR, NULL)
0047
0048 #undef C
0049 #define C(a, b) OPTIDX_##a
0050
0051 enum traceopt {
0052 TRACE_OPTS
0053 };
0054
0055 #undef C
0056 #define C(a, b) b
0057
0058 static const char *const optstr[] = {
0059 TRACE_OPTS
0060 };
0061
0062 enum errhandling {
0063 ERR_EXIT = 0,
0064 ERR_WARN,
0065 ERR_CLEANUP,
0066 };
0067
0068 static bool use_options[OPTIDX_NR];
0069
0070 static char inotify_buffer[655360];
0071
0072 #define likely(x) __builtin_expect(!!(x), 1)
0073 #define unlikely(x) __builtin_expect(!!(x), 0)
0074 #define bool2str(x) (x ? "true":"false")
0075
0076 #define DEFAULT_NR_PRINTER_THREADS (3)
0077 static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS;
0078
0079 #define DEFAULT_TABLE_SIZE (2)
0080 static unsigned int table_startsize = DEFAULT_TABLE_SIZE;
0081
0082 static int verbosity;
0083
0084 #define verbose_sizechange() (verbosity >= 1)
0085 #define verbose_lostevent() (verbosity >= 2)
0086 #define verbose_ftrace() (verbosity >= 1)
0087
0088 #define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0)
0089 #define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0)
0090
0091 static const char *debug_tracefile;
0092 static const char *debug_tracefile_dflt;
0093 static const char *debug_maxlat;
0094 static const char *debug_maxlat_dflt;
0095 static const char * const DEBUG_NOFILE = "[file not found]";
0096
0097 static const char * const TR_MAXLAT = "tracing_max_latency";
0098 static const char * const TR_THRESH = "tracing_thresh";
0099 static const char * const TR_CURRENT = "current_tracer";
0100 static const char * const TR_OPTIONS = "trace_options";
0101
0102 static const char * const NOP_TRACER = "nop";
0103
0104 static const char * const OPT_NO_PREFIX = "no";
0105
0106 #define DFLT_THRESHOLD_US "0"
0107 static const char *threshold = DFLT_THRESHOLD_US;
0108
0109 #define DEV_URANDOM "/dev/urandom"
0110 #define RT_DEFAULT_PRI (99)
0111 #define DEFAULT_PRI (0)
0112
0113 #define USEC_PER_MSEC (1000L)
0114 #define NSEC_PER_USEC (1000L)
0115 #define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC)
0116
0117 #define MSEC_PER_SEC (1000L)
0118 #define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC)
0119 #define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC)
0120
0121 #define SLEEP_TIME_MS_DEFAULT (1000L)
0122 #define TRY_PRINTMUTEX_MS (1000)
0123
0124 static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT);
0125
0126 static const char * const queue_full_warning =
0127 "Could not queue trace for printing. It is likely that events happen faster\n"
0128 "than what they can be printed. Probably partly because of random sleeping\n";
0129
0130 static const char * const no_tracer_msg =
0131 "Could not find any tracers! Running this program as root may help!\n";
0132
0133 static const char * const no_latency_tr_msg =
0134 "No latency tracers are supported by your kernel!\n";
0135
0136 struct policy {
0137 const char *name;
0138 int policy;
0139 int default_pri;
0140 };
0141
0142 static const struct policy policies[] = {
0143 { "other", SCHED_OTHER, DEFAULT_PRI },
0144 { "batch", SCHED_BATCH, DEFAULT_PRI },
0145 { "idle", SCHED_IDLE, DEFAULT_PRI },
0146 { "rr", SCHED_RR, RT_DEFAULT_PRI },
0147 { "fifo", SCHED_FIFO, RT_DEFAULT_PRI },
0148 { NULL, 0, DEFAULT_PRI }
0149 };
0150
0151
0152
0153
0154
0155 static const char * const relevant_tracers[] = {
0156 "preemptirqsoff",
0157 "preemptoff",
0158 "irqsoff",
0159 "wakeup",
0160 "wakeup_rt",
0161 "wakeup_dl",
0162 NULL
0163 };
0164
0165
0166 static const char * const random_tracers[] = {
0167 "preemptirqsoff",
0168 "preemptoff",
0169 "irqsoff",
0170 NULL
0171 };
0172
0173 static const char *current_tracer;
0174 static bool force_tracer;
0175
0176 struct ftrace_state {
0177 char *tracer;
0178 char *thresh;
0179 bool opt[OPTIDX_NR];
0180 bool opt_valid[OPTIDX_NR];
0181 pthread_mutex_t mutex;
0182 };
0183
0184 struct entry {
0185 int ticket;
0186 int ticket_completed_ref;
0187 };
0188
0189 struct print_state {
0190 int ticket_counter;
0191 int ticket_completed;
0192 pthread_mutex_t mutex;
0193 pthread_cond_t cond;
0194 int cnt;
0195 pthread_mutex_t cnt_mutex;
0196 };
0197
0198 struct short_msg {
0199 char buf[160];
0200 int len;
0201 };
0202
0203 static struct print_state printstate;
0204 static struct ftrace_state save_state;
0205 volatile sig_atomic_t signal_flag;
0206
0207 #define PROB_TABLE_MAX_SIZE (1000)
0208
0209 int probabilities[PROB_TABLE_MAX_SIZE];
0210
0211 struct sleep_table {
0212 int *table;
0213 int size;
0214 pthread_mutex_t mutex;
0215 };
0216
0217 static struct sleep_table sleeptable;
0218
0219 #define QUEUE_SIZE (10)
0220
0221 struct queue {
0222 struct entry entries[QUEUE_SIZE];
0223 int next_prod_idx;
0224 int next_cons_idx;
0225 pthread_mutex_t mutex;
0226 pthread_cond_t cond;
0227 };
0228
0229 #define MAX_THREADS (40)
0230
0231 struct queue printqueue;
0232 pthread_t printthread[MAX_THREADS];
0233 pthread_mutex_t print_mtx;
0234 #define PRINT_BUFFER_SIZE (16 * 1024 * 1024)
0235
0236 static void cleanup_exit(int status);
0237 static int set_trace_opt(const char *opt, bool value);
0238
0239 static __always_inline void *malloc_or_die(size_t size)
0240 {
0241 void *ptr = malloc(size);
0242
0243 if (unlikely(ptr == NULL)) {
0244 warn("malloc() failed");
0245 cleanup_exit(EXIT_FAILURE);
0246 }
0247 return ptr;
0248 }
0249
0250 static __always_inline void *malloc_or_die_nocleanup(size_t size)
0251 {
0252 void *ptr = malloc(size);
0253
0254 if (unlikely(ptr == NULL))
0255 err(0, "malloc() failed");
0256 return ptr;
0257 }
0258
0259 static __always_inline void write_or_die(int fd, const char *buf, size_t count)
0260 {
0261 ssize_t r;
0262
0263 do {
0264 r = write(fd, buf, count);
0265 if (unlikely(r < 0)) {
0266 if (errno == EINTR)
0267 continue;
0268 warn("write() failed");
0269 cleanup_exit(EXIT_FAILURE);
0270 }
0271 count -= r;
0272 buf += r;
0273 } while (count > 0);
0274 }
0275
0276 static __always_inline void clock_gettime_or_die(clockid_t clk_id,
0277 struct timespec *tp)
0278 {
0279 int r = clock_gettime(clk_id, tp);
0280
0281 if (unlikely(r != 0))
0282 err(EXIT_FAILURE, "clock_gettime() failed");
0283 }
0284
0285 static __always_inline void sigemptyset_or_die(sigset_t *s)
0286 {
0287 if (unlikely(sigemptyset(s) != 0)) {
0288 warn("sigemptyset() failed");
0289 cleanup_exit(EXIT_FAILURE);
0290 }
0291 }
0292
0293 static __always_inline void sigaddset_or_die(sigset_t *s, int signum)
0294 {
0295 if (unlikely(sigaddset(s, signum) != 0)) {
0296 warn("sigemptyset() failed");
0297 cleanup_exit(EXIT_FAILURE);
0298 }
0299 }
0300
0301 static __always_inline void sigaction_or_die(int signum,
0302 const struct sigaction *act,
0303 struct sigaction *oldact)
0304 {
0305 if (unlikely(sigaction(signum, act, oldact) != 0)) {
0306 warn("sigaction() failed");
0307 cleanup_exit(EXIT_FAILURE);
0308 }
0309 }
0310
0311 static void open_stdout(void)
0312 {
0313 if (setvbuf(stdout, NULL, _IONBF, 0) != 0)
0314 err(EXIT_FAILURE, "setvbuf() failed");
0315 fd_stdout = fileno(stdout);
0316 if (fd_stdout < 0)
0317 err(EXIT_FAILURE, "fileno() failed");
0318 }
0319
0320
0321
0322
0323
0324 static __always_inline void mutex_lock(pthread_mutex_t *mtx)
0325 {
0326 errno = pthread_mutex_lock(mtx);
0327 if (unlikely(errno))
0328 err(EXIT_FAILURE, "pthread_mutex_lock() failed");
0329 }
0330
0331
0332 static __always_inline void mutex_unlock(pthread_mutex_t *mtx)
0333 {
0334 errno = pthread_mutex_unlock(mtx);
0335 if (unlikely(errno))
0336 err(EXIT_FAILURE, "pthread_mutex_unlock() failed");
0337 }
0338
0339 static __always_inline void cond_signal(pthread_cond_t *cond)
0340 {
0341 errno = pthread_cond_signal(cond);
0342 if (unlikely(errno))
0343 err(EXIT_FAILURE, "pthread_cond_signal() failed");
0344 }
0345
0346 static __always_inline void cond_wait(pthread_cond_t *restrict cond,
0347 pthread_mutex_t *restrict mutex)
0348 {
0349 errno = pthread_cond_wait(cond, mutex);
0350 if (unlikely(errno))
0351 err(EXIT_FAILURE, "pthread_cond_wait() failed");
0352 }
0353
0354 static __always_inline void cond_broadcast(pthread_cond_t *cond)
0355 {
0356 errno = pthread_cond_broadcast(cond);
0357 if (unlikely(errno))
0358 err(EXIT_FAILURE, "pthread_cond_broadcast() failed");
0359 }
0360
0361 static __always_inline void
0362 mutex_init(pthread_mutex_t *mutex,
0363 const pthread_mutexattr_t *attr)
0364 {
0365 errno = pthread_mutex_init(mutex, attr);
0366 if (errno)
0367 err(EXIT_FAILURE, "pthread_mutex_init() failed");
0368 }
0369
0370 static __always_inline void mutexattr_init(pthread_mutexattr_t *attr)
0371 {
0372 errno = pthread_mutexattr_init(attr);
0373 if (errno)
0374 err(EXIT_FAILURE, "pthread_mutexattr_init() failed");
0375 }
0376
0377 static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr)
0378 {
0379 errno = pthread_mutexattr_destroy(attr);
0380 if (errno)
0381 err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed");
0382 }
0383
0384 static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr,
0385 int type)
0386 {
0387 errno = pthread_mutexattr_settype(attr, type);
0388 if (errno)
0389 err(EXIT_FAILURE, "pthread_mutexattr_settype() failed");
0390 }
0391
0392 static __always_inline void condattr_init(pthread_condattr_t *attr)
0393 {
0394 errno = pthread_condattr_init(attr);
0395 if (errno)
0396 err(EXIT_FAILURE, "pthread_condattr_init() failed");
0397 }
0398
0399 static __always_inline void condattr_destroy(pthread_condattr_t *attr)
0400 {
0401 errno = pthread_condattr_destroy(attr);
0402 if (errno)
0403 err(EXIT_FAILURE, "pthread_condattr_destroy() failed");
0404 }
0405
0406 static __always_inline void condattr_setclock(pthread_condattr_t *attr,
0407 clockid_t clock_id)
0408 {
0409 errno = pthread_condattr_setclock(attr, clock_id);
0410 if (unlikely(errno))
0411 err(EXIT_FAILURE, "pthread_condattr_setclock() failed");
0412 }
0413
0414 static __always_inline void cond_init(pthread_cond_t *cond,
0415 const pthread_condattr_t *attr)
0416 {
0417 errno = pthread_cond_init(cond, attr);
0418 if (errno)
0419 err(EXIT_FAILURE, "pthread_cond_init() failed");
0420 }
0421
0422 static __always_inline int
0423 cond_timedwait(pthread_cond_t *restrict cond,
0424 pthread_mutex_t *restrict mutex,
0425 const struct timespec *restrict abstime)
0426 {
0427 errno = pthread_cond_timedwait(cond, mutex, abstime);
0428 if (errno && errno != ETIMEDOUT)
0429 err(EXIT_FAILURE, "pthread_cond_timedwait() failed");
0430 return errno;
0431 }
0432
0433 static void init_printstate(void)
0434 {
0435 pthread_condattr_t cattr;
0436
0437 printstate.ticket_counter = 0;
0438 printstate.ticket_completed = 0;
0439 printstate.cnt = 0;
0440
0441 mutex_init(&printstate.mutex, NULL);
0442
0443 condattr_init(&cattr);
0444 condattr_setclock(&cattr, CLOCK_MONOTONIC);
0445 cond_init(&printstate.cond, &cattr);
0446 condattr_destroy(&cattr);
0447 }
0448
0449 static void init_print_mtx(void)
0450 {
0451 pthread_mutexattr_t mattr;
0452
0453 mutexattr_init(&mattr);
0454 mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
0455 mutex_init(&print_mtx, &mattr);
0456 mutexattr_destroy(&mattr);
0457
0458 }
0459
0460 static void signal_blocking(int how)
0461 {
0462 sigset_t s;
0463
0464 sigemptyset_or_die(&s);
0465 sigaddset_or_die(&s, SIGHUP);
0466 sigaddset_or_die(&s, SIGTERM);
0467 sigaddset_or_die(&s, SIGINT);
0468
0469 errno = pthread_sigmask(how, &s, NULL);
0470 if (unlikely(errno)) {
0471 warn("pthread_sigmask() failed");
0472 cleanup_exit(EXIT_FAILURE);
0473 }
0474 }
0475
0476 static void signal_handler(int num)
0477 {
0478 signal_flag = num;
0479 }
0480
0481 static void setup_sig_handler(void)
0482 {
0483 struct sigaction sa;
0484
0485 memset(&sa, 0, sizeof(sa));
0486 sa.sa_handler = signal_handler;
0487
0488 sigaction_or_die(SIGHUP, &sa, NULL);
0489 sigaction_or_die(SIGTERM, &sa, NULL);
0490 sigaction_or_die(SIGINT, &sa, NULL);
0491 }
0492
0493 static void process_signal(int signal)
0494 {
0495 char *name;
0496
0497 name = strsignal(signal);
0498 if (name == NULL)
0499 printf("Received signal %d\n", signal);
0500 else
0501 printf("Received signal %d (%s)\n", signal, name);
0502 cleanup_exit(EXIT_SUCCESS);
0503 }
0504
0505 static __always_inline void check_signals(void)
0506 {
0507 int signal = signal_flag;
0508
0509 if (unlikely(signal))
0510 process_signal(signal);
0511 }
0512
0513 static __always_inline void get_time_in_future(struct timespec *future,
0514 long time_us)
0515 {
0516 long nsec;
0517
0518 clock_gettime_or_die(CLOCK_MONOTONIC, future);
0519 future->tv_sec += time_us / USEC_PER_SEC;
0520 nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC;
0521 if (nsec >= NSEC_PER_SEC) {
0522 future->tv_nsec = nsec % NSEC_PER_SEC;
0523 future->tv_sec += 1;
0524 }
0525 }
0526
0527 static __always_inline bool time_has_passed(const struct timespec *time)
0528 {
0529 struct timespec now;
0530
0531 clock_gettime_or_die(CLOCK_MONOTONIC, &now);
0532 if (now.tv_sec > time->tv_sec)
0533 return true;
0534 if (now.tv_sec < time->tv_sec)
0535 return false;
0536 return (now.tv_nsec >= time->tv_nsec);
0537 }
0538
0539 static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms)
0540 {
0541 long time_us = time_ms * USEC_PER_MSEC;
0542 struct timespec limit;
0543
0544 get_time_in_future(&limit, time_us);
0545 do {
0546 errno = pthread_mutex_trylock(mutex);
0547 if (errno && errno != EBUSY)
0548 err(EXIT_FAILURE, "pthread_mutex_trylock() failed");
0549 } while (errno && !time_has_passed(&limit));
0550 return errno == 0;
0551 }
0552
0553 static void restore_trace_opts(const struct ftrace_state *state,
0554 const bool *cur)
0555 {
0556 int i;
0557 int r;
0558
0559 for (i = 0; i < OPTIDX_NR; i++)
0560 if (state->opt_valid[i] && state->opt[i] != cur[i]) {
0561 r = set_trace_opt(optstr[i], state->opt[i]);
0562 if (r < 0)
0563 warnx("Failed to restore the %s option to %s",
0564 optstr[i], bool2str(state->opt[i]));
0565 else if (verbose_ftrace())
0566 printf("Restored the %s option in %s to %s\n",
0567 optstr[i], TR_OPTIONS,
0568 bool2str(state->opt[i]));
0569 }
0570 }
0571
0572 static char *read_file(const char *file, enum errhandling h)
0573 {
0574 int psize;
0575 char *r;
0576 static const char *emsg = "Failed to read the %s file";
0577
0578 r = tracefs_instance_file_read(NULL, file, &psize);
0579 if (!r) {
0580 if (h) {
0581 warn(emsg, file);
0582 if (h == ERR_CLEANUP)
0583 cleanup_exit(EXIT_FAILURE);
0584 } else
0585 errx(EXIT_FAILURE, emsg, file);
0586 }
0587
0588 if (r && r[psize - 1] == '\n')
0589 r[psize - 1] = '\0';
0590 return r;
0591 }
0592
0593 static void restore_file(const char *file, char **saved, const char *cur)
0594 {
0595 if (*saved && was_changed(*saved, cur)) {
0596 if (tracefs_instance_file_write(NULL, file, *saved) < 0)
0597 warnx("Failed to restore %s to %s!", file, *saved);
0598 else if (verbose_ftrace())
0599 printf("Restored %s to %s\n", file, *saved);
0600 free(*saved);
0601 *saved = NULL;
0602 }
0603 }
0604
0605 static void restore_ftrace(void)
0606 {
0607 mutex_lock(&save_state.mutex);
0608
0609 restore_file(TR_CURRENT, &save_state.tracer, current_tracer);
0610 restore_file(TR_THRESH, &save_state.thresh, threshold);
0611 restore_trace_opts(&save_state, use_options);
0612
0613 mutex_unlock(&save_state.mutex);
0614 }
0615
0616 static void cleanup_exit(int status)
0617 {
0618 char *maxlat;
0619
0620 if (!setup_ftrace)
0621 exit(status);
0622
0623
0624
0625
0626
0627 mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS);
0628
0629 maxlat = read_file(TR_MAXLAT, ERR_WARN);
0630 if (maxlat) {
0631 printf("The maximum detected latency was: %sus\n", maxlat);
0632 free(maxlat);
0633 }
0634
0635 restore_ftrace();
0636
0637
0638
0639
0640
0641
0642
0643 exit(status);
0644 }
0645
0646 static void init_save_state(void)
0647 {
0648 pthread_mutexattr_t mattr;
0649
0650 mutexattr_init(&mattr);
0651 mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE);
0652 mutex_init(&save_state.mutex, &mattr);
0653 mutexattr_destroy(&mattr);
0654
0655 save_state.tracer = NULL;
0656 save_state.thresh = NULL;
0657 save_state.opt_valid[OPTIDX_FUNC_TR] = false;
0658 save_state.opt_valid[OPTIDX_DISP_GR] = false;
0659 }
0660
0661 static int printstate_next_ticket(struct entry *req)
0662 {
0663 int r;
0664
0665 r = ++(printstate.ticket_counter);
0666 req->ticket = r;
0667 req->ticket_completed_ref = printstate.ticket_completed;
0668 cond_broadcast(&printstate.cond);
0669 return r;
0670 }
0671
0672 static __always_inline
0673 void printstate_mark_req_completed(const struct entry *req)
0674 {
0675 if (req->ticket > printstate.ticket_completed)
0676 printstate.ticket_completed = req->ticket;
0677 }
0678
0679 static __always_inline
0680 bool printstate_has_new_req_arrived(const struct entry *req)
0681 {
0682 return (printstate.ticket_counter != req->ticket);
0683 }
0684
0685 static __always_inline int printstate_cnt_inc(void)
0686 {
0687 int value;
0688
0689 mutex_lock(&printstate.cnt_mutex);
0690 value = ++printstate.cnt;
0691 mutex_unlock(&printstate.cnt_mutex);
0692 return value;
0693 }
0694
0695 static __always_inline int printstate_cnt_dec(void)
0696 {
0697 int value;
0698
0699 mutex_lock(&printstate.cnt_mutex);
0700 value = --printstate.cnt;
0701 mutex_unlock(&printstate.cnt_mutex);
0702 return value;
0703 }
0704
0705 static __always_inline int printstate_cnt_read(void)
0706 {
0707 int value;
0708
0709 mutex_lock(&printstate.cnt_mutex);
0710 value = printstate.cnt;
0711 mutex_unlock(&printstate.cnt_mutex);
0712 return value;
0713 }
0714
0715 static __always_inline
0716 bool prev_req_won_race(const struct entry *req)
0717 {
0718 return (printstate.ticket_completed != req->ticket_completed_ref);
0719 }
0720
0721 static void sleeptable_resize(int size, bool printout, struct short_msg *msg)
0722 {
0723 int bytes;
0724
0725 if (printout) {
0726 msg->len = 0;
0727 if (unlikely(size > PROB_TABLE_MAX_SIZE))
0728 bytes = snprintf(msg->buf, sizeof(msg->buf),
0729 "Cannot increase probability table to %d (maximum size reached)\n", size);
0730 else
0731 bytes = snprintf(msg->buf, sizeof(msg->buf),
0732 "Increasing probability table to %d\n", size);
0733 if (bytes < 0)
0734 warn("snprintf() failed");
0735 else
0736 msg->len = bytes;
0737 }
0738
0739 if (unlikely(size < 0)) {
0740
0741 warnx("Bad program state at %s:%d", __FILE__, __LINE__);
0742 cleanup_exit(EXIT_FAILURE);
0743 return;
0744 }
0745 sleeptable.size = size;
0746 sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size];
0747 }
0748
0749 static void init_probabilities(void)
0750 {
0751 int i;
0752 int j = 1000;
0753
0754 for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) {
0755 probabilities[i] = 1000 / j;
0756 j--;
0757 }
0758 mutex_init(&sleeptable.mutex, NULL);
0759 }
0760
0761 static int table_get_probability(const struct entry *req,
0762 struct short_msg *msg)
0763 {
0764 int diff = req->ticket - req->ticket_completed_ref;
0765 int rval = 0;
0766
0767 msg->len = 0;
0768 diff--;
0769
0770 if (unlikely(diff < 0)) {
0771 warnx("Programmer assumption error at %s:%d\n", __FILE__,
0772 __LINE__);
0773 cleanup_exit(EXIT_FAILURE);
0774 }
0775 mutex_lock(&sleeptable.mutex);
0776 if (diff >= (sleeptable.size - 1)) {
0777 rval = sleeptable.table[sleeptable.size - 1];
0778 sleeptable_resize(sleeptable.size + 1, verbose_sizechange(),
0779 msg);
0780 } else {
0781 rval = sleeptable.table[diff];
0782 }
0783 mutex_unlock(&sleeptable.mutex);
0784 return rval;
0785 }
0786
0787 static void init_queue(struct queue *q)
0788 {
0789 q->next_prod_idx = 0;
0790 q->next_cons_idx = 0;
0791 mutex_init(&q->mutex, NULL);
0792 errno = pthread_cond_init(&q->cond, NULL);
0793 if (errno)
0794 err(EXIT_FAILURE, "pthread_cond_init() failed");
0795 }
0796
0797 static __always_inline int queue_len(const struct queue *q)
0798 {
0799 if (q->next_prod_idx >= q->next_cons_idx)
0800 return q->next_prod_idx - q->next_cons_idx;
0801 else
0802 return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx;
0803 }
0804
0805 static __always_inline int queue_nr_free(const struct queue *q)
0806 {
0807 int nr_free = QUEUE_SIZE - queue_len(q);
0808
0809
0810
0811
0812
0813 if (nr_free == 1)
0814 nr_free = 0;
0815 return nr_free;
0816 }
0817
0818 static __always_inline void queue_idx_inc(int *idx)
0819 {
0820 *idx = (*idx + 1) % QUEUE_SIZE;
0821 }
0822
0823 static __always_inline void queue_push_to_back(struct queue *q,
0824 const struct entry *e)
0825 {
0826 q->entries[q->next_prod_idx] = *e;
0827 queue_idx_inc(&q->next_prod_idx);
0828 }
0829
0830 static __always_inline struct entry queue_pop_from_front(struct queue *q)
0831 {
0832 struct entry e = q->entries[q->next_cons_idx];
0833
0834 queue_idx_inc(&q->next_cons_idx);
0835 return e;
0836 }
0837
0838 static __always_inline void queue_cond_signal(struct queue *q)
0839 {
0840 cond_signal(&q->cond);
0841 }
0842
0843 static __always_inline void queue_cond_wait(struct queue *q)
0844 {
0845 cond_wait(&q->cond, &q->mutex);
0846 }
0847
0848 static __always_inline int queue_try_to_add_entry(struct queue *q,
0849 const struct entry *e)
0850 {
0851 int r = 0;
0852
0853 mutex_lock(&q->mutex);
0854 if (queue_nr_free(q) > 0) {
0855 queue_push_to_back(q, e);
0856 cond_signal(&q->cond);
0857 } else
0858 r = -1;
0859 mutex_unlock(&q->mutex);
0860 return r;
0861 }
0862
0863 static struct entry queue_wait_for_entry(struct queue *q)
0864 {
0865 struct entry e;
0866
0867 mutex_lock(&q->mutex);
0868 while (true) {
0869 if (queue_len(&printqueue) > 0) {
0870 e = queue_pop_from_front(q);
0871 break;
0872 }
0873 queue_cond_wait(q);
0874 }
0875 mutex_unlock(&q->mutex);
0876
0877 return e;
0878 }
0879
0880 static const struct policy *policy_from_name(const char *name)
0881 {
0882 const struct policy *p = &policies[0];
0883
0884 while (p->name != NULL) {
0885 if (!strcmp(name, p->name))
0886 return p;
0887 p++;
0888 }
0889 return NULL;
0890 }
0891
0892 static const char *policy_name(int policy)
0893 {
0894 const struct policy *p = &policies[0];
0895 static const char *rval = "unknown";
0896
0897 while (p->name != NULL) {
0898 if (p->policy == policy)
0899 return p->name;
0900 p++;
0901 }
0902 return rval;
0903 }
0904
0905 static bool is_relevant_tracer(const char *name)
0906 {
0907 unsigned int i;
0908
0909 for (i = 0; relevant_tracers[i]; i++)
0910 if (!strcmp(name, relevant_tracers[i]))
0911 return true;
0912 return false;
0913 }
0914
0915 static bool random_makes_sense(const char *name)
0916 {
0917 unsigned int i;
0918
0919 for (i = 0; random_tracers[i]; i++)
0920 if (!strcmp(name, random_tracers[i]))
0921 return true;
0922 return false;
0923 }
0924
0925 static void show_available(void)
0926 {
0927 char **tracers;
0928 int found = 0;
0929 int i;
0930
0931 tracers = tracefs_tracers(NULL);
0932 for (i = 0; tracers && tracers[i]; i++) {
0933 if (is_relevant_tracer(tracers[i]))
0934 found++;
0935 }
0936
0937 if (!tracers) {
0938 warnx(no_tracer_msg);
0939 return;
0940 }
0941
0942 if (!found) {
0943 warnx(no_latency_tr_msg);
0944 tracefs_list_free(tracers);
0945 return;
0946 }
0947
0948 printf("The following latency tracers are available on your system:\n");
0949 for (i = 0; tracers[i]; i++) {
0950 if (is_relevant_tracer(tracers[i]))
0951 printf("%s\n", tracers[i]);
0952 }
0953 tracefs_list_free(tracers);
0954 }
0955
0956 static bool tracer_valid(const char *name, bool *notracer)
0957 {
0958 char **tracers;
0959 int i;
0960 bool rval = false;
0961
0962 *notracer = false;
0963 tracers = tracefs_tracers(NULL);
0964 if (!tracers) {
0965 *notracer = true;
0966 return false;
0967 }
0968 for (i = 0; tracers[i]; i++)
0969 if (!strcmp(tracers[i], name)) {
0970 rval = true;
0971 break;
0972 }
0973 tracefs_list_free(tracers);
0974 return rval;
0975 }
0976
0977 static const char *find_default_tracer(void)
0978 {
0979 int i;
0980 bool notracer;
0981 bool valid;
0982
0983 for (i = 0; relevant_tracers[i]; i++) {
0984 valid = tracer_valid(relevant_tracers[i], ¬racer);
0985 if (notracer)
0986 errx(EXIT_FAILURE, no_tracer_msg);
0987 if (valid)
0988 return relevant_tracers[i];
0989 }
0990 return NULL;
0991 }
0992
0993 static bool toss_coin(struct drand48_data *buffer, unsigned int prob)
0994 {
0995 long r;
0996
0997 if (unlikely(lrand48_r(buffer, &r))) {
0998 warnx("lrand48_r() failed");
0999 cleanup_exit(EXIT_FAILURE);
1000 }
1001 r = r % 1000L;
1002 if (r < prob)
1003 return true;
1004 else
1005 return false;
1006 }
1007
1008
1009 static long go_to_sleep(const struct entry *req)
1010 {
1011 struct timespec future;
1012 long delay = sleep_time;
1013
1014 get_time_in_future(&future, delay);
1015
1016 mutex_lock(&printstate.mutex);
1017 while (!printstate_has_new_req_arrived(req)) {
1018 cond_timedwait(&printstate.cond, &printstate.mutex, &future);
1019 if (time_has_passed(&future))
1020 break;
1021 }
1022
1023 if (printstate_has_new_req_arrived(req))
1024 delay = -1;
1025 mutex_unlock(&printstate.mutex);
1026
1027 return delay;
1028 }
1029
1030
1031 static void set_priority(void)
1032 {
1033 int r;
1034 pid_t pid;
1035 struct sched_param param;
1036
1037 memset(¶m, 0, sizeof(param));
1038 param.sched_priority = sched_pri;
1039
1040 pid = getpid();
1041 r = sched_setscheduler(pid, sched_policy, ¶m);
1042
1043 if (r != 0)
1044 err(EXIT_FAILURE, "sched_setscheduler() failed");
1045 }
1046
1047 pid_t latency_collector_gettid(void)
1048 {
1049 return (pid_t) syscall(__NR_gettid);
1050 }
1051
1052 static void print_priority(void)
1053 {
1054 pid_t tid;
1055 int policy;
1056 int r;
1057 struct sched_param param;
1058
1059 tid = latency_collector_gettid();
1060 r = pthread_getschedparam(pthread_self(), &policy, ¶m);
1061 if (r != 0) {
1062 warn("pthread_getschedparam() failed");
1063 cleanup_exit(EXIT_FAILURE);
1064 }
1065 mutex_lock(&print_mtx);
1066 printf("Thread %d runs with scheduling policy %s and priority %d\n",
1067 tid, policy_name(policy), param.sched_priority);
1068 mutex_unlock(&print_mtx);
1069 }
1070
1071 static __always_inline
1072 void __print_skipmessage(const struct short_msg *resize_msg,
1073 const struct timespec *timestamp, char *buffer,
1074 size_t bufspace, const struct entry *req, bool excuse,
1075 const char *str)
1076 {
1077 ssize_t bytes = 0;
1078 char *p = &buffer[0];
1079 long us, sec;
1080 int r;
1081
1082 sec = timestamp->tv_sec;
1083 us = timestamp->tv_nsec / 1000;
1084
1085 if (resize_msg != NULL && resize_msg->len > 0) {
1086 strncpy(p, resize_msg->buf, resize_msg->len);
1087 bytes += resize_msg->len;
1088 p += resize_msg->len;
1089 bufspace -= resize_msg->len;
1090 }
1091
1092 if (excuse)
1093 r = snprintf(p, bufspace,
1094 "%ld.%06ld Latency %d printout skipped due to %s\n",
1095 sec, us, req->ticket, str);
1096 else
1097 r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n",
1098 sec, us, req->ticket);
1099
1100 if (r < 0)
1101 warn("snprintf() failed");
1102 else
1103 bytes += r;
1104
1105
1106 mutex_lock(&print_mtx);
1107 write_or_die(fd_stdout, buffer, bytes);
1108 mutex_unlock(&print_mtx);
1109 }
1110
1111 static void print_skipmessage(const struct short_msg *resize_msg,
1112 const struct timespec *timestamp, char *buffer,
1113 size_t bufspace, const struct entry *req,
1114 bool excuse)
1115 {
1116 __print_skipmessage(resize_msg, timestamp, buffer, bufspace, req,
1117 excuse, "random delay");
1118 }
1119
1120 static void print_lostmessage(const struct timespec *timestamp, char *buffer,
1121 size_t bufspace, const struct entry *req,
1122 const char *reason)
1123 {
1124 __print_skipmessage(NULL, timestamp, buffer, bufspace, req, true,
1125 reason);
1126 }
1127
1128 static void print_tracefile(const struct short_msg *resize_msg,
1129 const struct timespec *timestamp, char *buffer,
1130 size_t bufspace, long slept,
1131 const struct entry *req)
1132 {
1133 static const int reserve = 256;
1134 char *p = &buffer[0];
1135 ssize_t bytes = 0;
1136 ssize_t bytes_tot = 0;
1137 long us, sec;
1138 long slept_ms;
1139 int trace_fd;
1140
1141
1142 bufspace = bufspace - reserve - 1;
1143
1144 if (resize_msg != NULL && resize_msg->len > 0) {
1145 bytes = resize_msg->len;
1146 strncpy(p, resize_msg->buf, bytes);
1147 bytes_tot += bytes;
1148 p += bytes;
1149 bufspace -= bytes;
1150 }
1151
1152 trace_fd = open(debug_tracefile, O_RDONLY);
1153
1154 if (trace_fd < 0) {
1155 warn("open() failed on %s", debug_tracefile);
1156 return;
1157 }
1158
1159 sec = timestamp->tv_sec;
1160 us = timestamp->tv_nsec / 1000;
1161
1162 if (slept != 0) {
1163 slept_ms = slept / 1000;
1164 bytes = snprintf(p, bufspace,
1165 "%ld.%06ld Latency %d randomly sleep for %ld ms before print\n",
1166 sec, us, req->ticket, slept_ms);
1167 } else {
1168 bytes = snprintf(p, bufspace,
1169 "%ld.%06ld Latency %d immediate print\n", sec,
1170 us, req->ticket);
1171 }
1172
1173 if (bytes < 0) {
1174 warn("snprintf() failed");
1175 return;
1176 }
1177 p += bytes;
1178 bufspace -= bytes;
1179 bytes_tot += bytes;
1180
1181 bytes = snprintf(p, bufspace,
1182 ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n"
1183 );
1184
1185 if (bytes < 0) {
1186 warn("snprintf() failed");
1187 return;
1188 }
1189
1190 p += bytes;
1191 bufspace -= bytes;
1192 bytes_tot += bytes;
1193
1194 do {
1195 bytes = read(trace_fd, p, bufspace);
1196 if (bytes < 0) {
1197 if (errno == EINTR)
1198 continue;
1199 warn("read() failed on %s", debug_tracefile);
1200 if (unlikely(close(trace_fd) != 0))
1201 warn("close() failed on %s", debug_tracefile);
1202 return;
1203 }
1204 if (bytes == 0)
1205 break;
1206 p += bytes;
1207 bufspace -= bytes;
1208 bytes_tot += bytes;
1209 } while (true);
1210
1211 if (unlikely(close(trace_fd) != 0))
1212 warn("close() failed on %s", debug_tracefile);
1213
1214 printstate_cnt_dec();
1215
1216 bufspace += reserve;
1217
1218 bytes = snprintf(p, bufspace,
1219 ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n");
1220
1221 if (bytes < 0) {
1222 warn("snprintf() failed");
1223 return;
1224 }
1225
1226 bytes_tot += bytes;
1227
1228
1229 mutex_lock(&print_mtx);
1230 write_or_die(fd_stdout, buffer, bytes_tot);
1231 mutex_unlock(&print_mtx);
1232 }
1233
1234 static char *get_no_opt(const char *opt)
1235 {
1236 char *no_opt;
1237 int s;
1238
1239 s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1;
1240
1241 no_opt = malloc_or_die_nocleanup(s);
1242 strcpy(no_opt, OPT_NO_PREFIX);
1243 strcat(no_opt, opt);
1244 return no_opt;
1245 }
1246
1247 static char *find_next_optstr(const char *allopt, const char **next)
1248 {
1249 const char *begin;
1250 const char *end;
1251 char *r;
1252 int s = 0;
1253
1254 if (allopt == NULL)
1255 return NULL;
1256
1257 for (begin = allopt; *begin != '\0'; begin++) {
1258 if (isgraph(*begin))
1259 break;
1260 }
1261
1262 if (*begin == '\0')
1263 return NULL;
1264
1265 for (end = begin; *end != '\0' && isgraph(*end); end++)
1266 s++;
1267
1268 r = malloc_or_die_nocleanup(s + 1);
1269 strncpy(r, begin, s);
1270 r[s] = '\0';
1271 *next = begin + s;
1272 return r;
1273 }
1274
1275 static bool get_trace_opt(const char *allopt, const char *opt, bool *found)
1276 {
1277 *found = false;
1278 char *no_opt;
1279 char *str;
1280 const char *next = allopt;
1281 bool rval = false;
1282
1283 no_opt = get_no_opt(opt);
1284
1285 do {
1286 str = find_next_optstr(next, &next);
1287 if (str == NULL)
1288 break;
1289 if (!strcmp(str, opt)) {
1290 *found = true;
1291 rval = true;
1292 free(str);
1293 break;
1294 }
1295 if (!strcmp(str, no_opt)) {
1296 *found = true;
1297 rval = false;
1298 free(str);
1299 break;
1300 }
1301 free(str);
1302 } while (true);
1303 free(no_opt);
1304
1305 return rval;
1306 }
1307
1308 static int set_trace_opt(const char *opt, bool value)
1309 {
1310 char *str;
1311 int r;
1312
1313 if (value)
1314 str = strdup(opt);
1315 else
1316 str = get_no_opt(opt);
1317
1318 r = tracefs_instance_file_write(NULL, TR_OPTIONS, str);
1319 free(str);
1320 return r;
1321 }
1322
1323 void save_trace_opts(struct ftrace_state *state)
1324 {
1325 char *allopt;
1326 int psize;
1327 int i;
1328
1329 allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize);
1330 if (!allopt)
1331 errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS);
1332
1333 for (i = 0; i < OPTIDX_NR; i++)
1334 state->opt[i] = get_trace_opt(allopt, optstr[i],
1335 &state->opt_valid[i]);
1336
1337 free(allopt);
1338 }
1339
1340 static void write_file(const char *file, const char *cur, const char *new,
1341 enum errhandling h)
1342 {
1343 int r;
1344 static const char *emsg = "Failed to write to the %s file!";
1345
1346
1347 if (cur && !needs_change(cur, new))
1348 return;
1349
1350 r = tracefs_instance_file_write(NULL, file, new);
1351 if (r < 0) {
1352 if (h) {
1353 warnx(emsg, file);
1354 if (h == ERR_CLEANUP)
1355 cleanup_exit(EXIT_FAILURE);
1356 } else
1357 errx(EXIT_FAILURE, emsg, file);
1358 }
1359 if (verbose_ftrace()) {
1360 mutex_lock(&print_mtx);
1361 printf("%s was set to %s\n", file, new);
1362 mutex_unlock(&print_mtx);
1363 }
1364 }
1365
1366 static void reset_max_latency(void)
1367 {
1368 write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP);
1369 }
1370
1371 static void save_and_disable_tracer(void)
1372 {
1373 char *orig_th;
1374 char *tracer;
1375 bool need_nop = false;
1376
1377 mutex_lock(&save_state.mutex);
1378
1379 save_trace_opts(&save_state);
1380 tracer = read_file(TR_CURRENT, ERR_EXIT);
1381 orig_th = read_file(TR_THRESH, ERR_EXIT);
1382
1383 if (needs_change(tracer, NOP_TRACER)) {
1384 mutex_lock(&print_mtx);
1385 if (force_tracer) {
1386 printf(
1387 "The %s tracer is already in use but proceeding anyway!\n",
1388 tracer);
1389 } else {
1390 printf(
1391 "The %s tracer is already in use, cowardly bailing out!\n"
1392 "This could indicate that another program or instance is tracing.\n"
1393 "Use the -F [--force] option to disregard the current tracer.\n", tracer);
1394 exit(0);
1395 }
1396 mutex_unlock(&print_mtx);
1397 need_nop = true;
1398 }
1399
1400 save_state.tracer = tracer;
1401 save_state.thresh = orig_th;
1402
1403 if (need_nop)
1404 write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT);
1405
1406 mutex_unlock(&save_state.mutex);
1407 }
1408
1409 void set_trace_opts(struct ftrace_state *state, bool *new)
1410 {
1411 int i;
1412 int r;
1413
1414
1415
1416
1417
1418
1419 for (i = 0; i < OPTIDX_NR; i++)
1420 if (state->opt_valid[i] &&
1421 state->opt[i] != new[i]) {
1422 r = set_trace_opt(optstr[i], new[i]);
1423 if (r < 0) {
1424 warnx("Failed to set the %s option to %s",
1425 optstr[i], bool2str(new[i]));
1426 cleanup_exit(EXIT_FAILURE);
1427 }
1428 if (verbose_ftrace()) {
1429 mutex_lock(&print_mtx);
1430 printf("%s in %s was set to %s\n", optstr[i],
1431 TR_OPTIONS, bool2str(new[i]));
1432 mutex_unlock(&print_mtx);
1433 }
1434 }
1435 }
1436
1437 static void enable_tracer(void)
1438 {
1439 mutex_lock(&save_state.mutex);
1440 set_trace_opts(&save_state, use_options);
1441
1442 write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP);
1443 write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP);
1444
1445 mutex_unlock(&save_state.mutex);
1446 }
1447
1448 static void tracing_loop(void)
1449 {
1450 int ifd = inotify_init();
1451 int wd;
1452 const ssize_t bufsize = sizeof(inotify_buffer);
1453 const ssize_t istructsize = sizeof(struct inotify_event);
1454 char *buf = &inotify_buffer[0];
1455 ssize_t nr_read;
1456 char *p;
1457 int modified;
1458 struct inotify_event *event;
1459 struct entry req;
1460 char *buffer;
1461 const size_t bufspace = PRINT_BUFFER_SIZE;
1462 struct timespec timestamp;
1463
1464 print_priority();
1465
1466 buffer = malloc_or_die(bufspace);
1467
1468 if (ifd < 0)
1469 err(EXIT_FAILURE, "inotify_init() failed!");
1470
1471
1472 if (setup_ftrace) {
1473
1474
1475
1476 save_and_disable_tracer();
1477
1478
1479
1480
1481 reset_max_latency();
1482 }
1483
1484 wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY);
1485 if (wd < 0)
1486 err(EXIT_FAILURE, "inotify_add_watch() failed!");
1487
1488 if (setup_ftrace)
1489 enable_tracer();
1490
1491 signal_blocking(SIG_UNBLOCK);
1492
1493 while (true) {
1494 modified = 0;
1495 check_signals();
1496 nr_read = read(ifd, buf, bufsize);
1497 check_signals();
1498 if (nr_read < 0) {
1499 if (errno == EINTR)
1500 continue;
1501 warn("read() failed on inotify fd!");
1502 cleanup_exit(EXIT_FAILURE);
1503 }
1504 if (nr_read == bufsize)
1505 warnx("inotify() buffer filled, skipping events");
1506 if (nr_read < istructsize) {
1507 warnx("read() returned too few bytes on inotify fd");
1508 cleanup_exit(EXIT_FAILURE);
1509 }
1510
1511 for (p = buf; p < buf + nr_read;) {
1512 event = (struct inotify_event *) p;
1513 if ((event->mask & IN_MODIFY) != 0)
1514 modified++;
1515 p += istructsize + event->len;
1516 }
1517 while (modified > 0) {
1518 check_signals();
1519 mutex_lock(&printstate.mutex);
1520 check_signals();
1521 printstate_next_ticket(&req);
1522 if (printstate_cnt_read() > 0) {
1523 printstate_mark_req_completed(&req);
1524 mutex_unlock(&printstate.mutex);
1525 if (verbose_lostevent()) {
1526 clock_gettime_or_die(CLOCK_MONOTONIC,
1527 ×tamp);
1528 print_lostmessage(×tamp, buffer,
1529 bufspace, &req,
1530 "inotify loop");
1531 }
1532 break;
1533 }
1534 mutex_unlock(&printstate.mutex);
1535 if (queue_try_to_add_entry(&printqueue, &req) != 0) {
1536
1537 check_signals();
1538 mutex_lock(&print_mtx);
1539 check_signals();
1540 write_or_die(fd_stdout, queue_full_warning,
1541 strlen(queue_full_warning));
1542 mutex_unlock(&print_mtx);
1543 }
1544 modified--;
1545 }
1546 }
1547 }
1548
1549 static void *do_printloop(void *arg)
1550 {
1551 const size_t bufspace = PRINT_BUFFER_SIZE;
1552 char *buffer;
1553 long *rseed = (long *) arg;
1554 struct drand48_data drandbuf;
1555 long slept = 0;
1556 struct entry req;
1557 int prob = 0;
1558 struct timespec timestamp;
1559 struct short_msg resize_msg;
1560
1561 print_priority();
1562
1563 if (srand48_r(*rseed, &drandbuf) != 0) {
1564 warn("srand48_r() failed!\n");
1565 cleanup_exit(EXIT_FAILURE);
1566 }
1567
1568 buffer = malloc_or_die(bufspace);
1569
1570 while (true) {
1571 req = queue_wait_for_entry(&printqueue);
1572 clock_gettime_or_die(CLOCK_MONOTONIC, ×tamp);
1573 mutex_lock(&printstate.mutex);
1574 if (prev_req_won_race(&req)) {
1575 printstate_mark_req_completed(&req);
1576 mutex_unlock(&printstate.mutex);
1577 if (verbose_lostevent())
1578 print_lostmessage(×tamp, buffer, bufspace,
1579 &req, "print loop");
1580 continue;
1581 }
1582 mutex_unlock(&printstate.mutex);
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594 if (trace_enable && use_random_sleep) {
1595 slept = 0;
1596 prob = table_get_probability(&req, &resize_msg);
1597 if (!toss_coin(&drandbuf, prob))
1598 slept = go_to_sleep(&req);
1599 if (slept >= 0) {
1600
1601 printstate_cnt_inc();
1602
1603
1604
1605
1606
1607 mutex_lock(&printstate.mutex);
1608 printstate_mark_req_completed(&req);
1609 mutex_unlock(&printstate.mutex);
1610 }
1611 }
1612 if (trace_enable) {
1613
1614
1615
1616
1617 if (slept >= 0)
1618
1619
1620
1621
1622 print_tracefile(&resize_msg, ×tamp, buffer,
1623 bufspace, slept, &req);
1624 else
1625 print_skipmessage(&resize_msg, ×tamp,
1626 buffer, bufspace, &req, true);
1627 } else {
1628 print_skipmessage(&resize_msg, ×tamp, buffer,
1629 bufspace, &req, false);
1630 }
1631 }
1632 return NULL;
1633 }
1634
1635 static void start_printthread(void)
1636 {
1637 unsigned int i;
1638 long *seed;
1639 int ufd;
1640
1641 ufd = open(DEV_URANDOM, O_RDONLY);
1642 if (nr_threads > MAX_THREADS) {
1643 warnx(
1644 "Number of requested print threads was %d, max number is %d\n",
1645 nr_threads, MAX_THREADS);
1646 nr_threads = MAX_THREADS;
1647 }
1648 for (i = 0; i < nr_threads; i++) {
1649 seed = malloc_or_die(sizeof(*seed));
1650 if (ufd < 0 ||
1651 read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) {
1652 printf(
1653 "Warning! Using trivial random number seed, since %s not available\n",
1654 DEV_URANDOM);
1655 fflush(stdout);
1656 *seed = i;
1657 }
1658 errno = pthread_create(&printthread[i], NULL, do_printloop,
1659 seed);
1660 if (errno)
1661 err(EXIT_FAILURE, "pthread_create()");
1662 }
1663 if (ufd > 0 && close(ufd) != 0)
1664 warn("close() failed");
1665 }
1666
1667 static void show_usage(void)
1668 {
1669 printf(
1670 "Usage: %s [OPTION]...\n\n"
1671 "Collect closely occurring latencies from %s\n"
1672 "with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, "
1673 "wakeup,\nwakeup_dl, or wakeup_rt.\n\n"
1674
1675 "The occurrence of a latency is detected by monitoring the file\n"
1676 "%s with inotify.\n\n"
1677
1678 "The following options are supported:\n\n"
1679
1680 "-l, --list\t\tList the latency tracers that are supported by the\n"
1681 "\t\t\tcurrently running Linux kernel. If you don't see the\n"
1682 "\t\t\ttracer that you want, you will probably need to\n"
1683 "\t\t\tchange your kernel config and build a new kernel.\n\n"
1684
1685 "-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n"
1686 "\t\t\ttracer that is supported by the kernel in the following\n"
1687 "\t\t\torder of precedence:\n\n"
1688 "\t\t\tpreemptirqsoff\n"
1689 "\t\t\tpreemptoff\n"
1690 "\t\t\tirqsoff\n"
1691 "\t\t\twakeup\n"
1692 "\t\t\twakeup_rt\n"
1693 "\t\t\twakeup_dl\n"
1694 "\n"
1695 "\t\t\tIf TR is not on the list above, then a warning will be\n"
1696 "\t\t\tprinted.\n\n"
1697
1698 "-F, --force\t\tProceed even if another ftrace tracer is active. Without\n"
1699 "\t\t\tthis option, the program will refuse to start tracing if\n"
1700 "\t\t\tany other tracer than the nop tracer is active.\n\n"
1701
1702 "-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n"
1703 "\t\t\tfor the tracer. The default is 0, which means that\n"
1704 "\t\t\ttracing_max_latency will be used. tracing_max_latency is\n"
1705 "\t\t\tset to 0 when the program is started and contains the\n"
1706 "\t\t\tmaximum of the latencies that have been encountered.\n\n"
1707
1708 "-f, --function\t\tEnable the function-trace option in trace_options. With\n"
1709 "\t\t\tthis option, ftrace will trace the functions that are\n"
1710 "\t\t\texecuted during a latency, without it we only get the\n"
1711 "\t\t\tbeginning, end, and backtrace.\n\n"
1712
1713 "-g, --graph\t\tEnable the display-graph option in trace_option. This\n"
1714 "\t\t\toption causes ftrace to show the graph of how functions\n"
1715 "\t\t\tare calling other functions.\n\n"
1716
1717 "-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n"
1718 "\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n"
1719 "\t\t\tusing rr or fifo, remember that these policies may cause\n"
1720 "\t\t\tother tasks to experience latencies.\n\n"
1721
1722 "-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n"
1723 "\t\t\tof PRI depends on the scheduling policy.\n\n"
1724
1725 "-n, --notrace\t\tIf latency is detected, do not print out the content of\n"
1726 "\t\t\tthe trace file to standard output\n\n"
1727
1728 "-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n"
1729
1730 "-r, --random\t\tArbitrarily sleep a certain amount of time, default\n"
1731 "\t\t\t%ld ms, before reading the trace file. The\n"
1732 "\t\t\tprobabilities for sleep are chosen so that the\n"
1733 "\t\t\tprobability of obtaining any of a cluster of closely\n"
1734 "\t\t\toccurring latencies are equal, i.e. we will randomly\n"
1735 "\t\t\tchoose which one we collect from the trace file.\n\n"
1736 "\t\t\tThis option is probably only useful with the irqsoff,\n"
1737 "\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n"
1738
1739 "-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n"
1740 "\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n"
1741 "\t\t\tlatencies are detected during a run, this value will\n"
1742 "\t\t\tautomatically be increased to NRLAT + 1 and then to\n"
1743 "\t\t\tNRLAT + 2 and so on. The default is %d. This option\n"
1744 "\t\t\timplies -r. We need to know this number in order to\n"
1745 "\t\t\tbe able to calculate the probabilities of sleeping.\n"
1746 "\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n"
1747 "\t\t\tdo an immediate printout will be:\n\n"
1748 "\t\t\t1/NRLAT 1/(NRLAT - 1) ... 1/3 1/2 1\n\n"
1749 "\t\t\tThe probability of sleeping will be:\n\n"
1750 "\t\t\t1 - P, where P is from the series above\n\n"
1751 "\t\t\tThis descending probability will cause us to choose\n"
1752 "\t\t\tan occurrence at random. Observe that the final\n"
1753 "\t\t\tprobability is 0, it is when we reach this probability\n"
1754 "\t\t\tthat we increase NRLAT automatically. As an example,\n"
1755 "\t\t\twith the default value of 2, the probabilities will be:\n\n"
1756 "\t\t\t1/2 0\n\n"
1757 "\t\t\tThis means, when a latency is detected we will sleep\n"
1758 "\t\t\twith 50%% probability. If we ever detect another latency\n"
1759 "\t\t\tduring the sleep period, then the probability of sleep\n"
1760 "\t\t\twill be 0%% and the table will be expanded to:\n\n"
1761 "\t\t\t1/3 1/2 0\n\n"
1762
1763 "-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n"
1764 "\t\t\tthen print a message every time that the NRLAT value\n"
1765 "\t\t\tis automatically increased. It also causes a message to\n"
1766 "\t\t\tbe printed when the ftrace settings are changed. If this\n"
1767 "\t\t\toption is given at least twice, then also print a\n"
1768 "\t\t\twarning for lost events.\n\n"
1769
1770 "-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n"
1771 "\t\t\tprinting out the trace from the trace file. The default\n"
1772 "\t\t\tis %ld ms. This option implies -r.\n\n"
1773
1774 "-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n"
1775 "\t\t\tconfigures the ftrace files in sysfs such as\n"
1776 "\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n"
1777
1778 "-i, --tracefile FILE\tUse FILE as trace file. The default is\n"
1779 "\t\t\t%s.\n"
1780 "\t\t\tThis options implies -x\n\n"
1781
1782 "-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n"
1783 "\t\t\t%s.\n"
1784 "\t\t\tThis options implies -x\n\n"
1785 ,
1786 prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS,
1787 SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT,
1788 debug_tracefile_dflt, debug_maxlat_dflt);
1789 }
1790
1791 static void find_tracefiles(void)
1792 {
1793 debug_tracefile_dflt = tracefs_get_tracing_file("trace");
1794 if (debug_tracefile_dflt == NULL) {
1795
1796 debug_tracefile_dflt = DEBUG_NOFILE;
1797 }
1798
1799 debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency");
1800 if (debug_maxlat_dflt == NULL) {
1801
1802 debug_maxlat_dflt = DEBUG_NOFILE;
1803 }
1804
1805 debug_tracefile = debug_tracefile_dflt;
1806 debug_maxlat = debug_maxlat_dflt;
1807 }
1808
1809 bool alldigits(const char *s)
1810 {
1811 for (; *s != '\0'; s++)
1812 if (!isdigit(*s))
1813 return false;
1814 return true;
1815 }
1816
1817 void check_alldigits(const char *optarg, const char *argname)
1818 {
1819 if (!alldigits(optarg))
1820 errx(EXIT_FAILURE,
1821 "The %s parameter expects a decimal argument\n", argname);
1822 }
1823
1824 static void scan_arguments(int argc, char *argv[])
1825 {
1826 int c;
1827 int i;
1828 int option_idx = 0;
1829
1830 static struct option long_options[] = {
1831 { "list", no_argument, 0, 'l' },
1832 { "tracer", required_argument, 0, 't' },
1833 { "force", no_argument, 0, 'F' },
1834 { "threshold", required_argument, 0, 's' },
1835 { "function", no_argument, 0, 'f' },
1836 { "graph", no_argument, 0, 'g' },
1837 { "policy", required_argument, 0, 'c' },
1838 { "priority", required_argument, 0, 'p' },
1839 { "help", no_argument, 0, 'h' },
1840 { "notrace", no_argument, 0, 'n' },
1841 { "random", no_argument, 0, 'r' },
1842 { "nrlat", required_argument, 0, 'a' },
1843 { "threads", required_argument, 0, 'e' },
1844 { "time", required_argument, 0, 'u' },
1845 { "verbose", no_argument, 0, 'v' },
1846 { "no-ftrace", no_argument, 0, 'x' },
1847 { "tracefile", required_argument, 0, 'i' },
1848 { "max-lat", required_argument, 0, 'm' },
1849 { 0, 0, 0, 0 }
1850 };
1851 const struct policy *p;
1852 int max, min;
1853 int value;
1854 bool notracer, valid;
1855
1856
1857
1858
1859
1860 find_tracefiles();
1861
1862 while (true) {
1863 c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:",
1864 long_options, &option_idx);
1865 if (c == -1)
1866 break;
1867
1868 switch (c) {
1869 case 'l':
1870 show_available();
1871 exit(0);
1872 break;
1873 case 't':
1874 current_tracer = strdup(optarg);
1875 if (!is_relevant_tracer(current_tracer)) {
1876 warnx("%s is not a known latency tracer!\n",
1877 current_tracer);
1878 }
1879 valid = tracer_valid(current_tracer, ¬racer);
1880 if (notracer)
1881 errx(EXIT_FAILURE, no_tracer_msg);
1882 if (!valid)
1883 errx(EXIT_FAILURE,
1884 "The tracer %s is not supported by your kernel!\n", current_tracer);
1885 break;
1886 case 'F':
1887 force_tracer = true;
1888 break;
1889 case 's':
1890 check_alldigits(optarg, "-s [--threshold]");
1891 threshold = strdup(optarg);
1892 break;
1893 case 'f':
1894 use_options[OPTIDX_FUNC_TR] = true;
1895 break;
1896 case 'g':
1897 use_options[OPTIDX_DISP_GR] = true;
1898 break;
1899 case 'c':
1900 p = policy_from_name(optarg);
1901 if (p != NULL) {
1902 sched_policy = p->policy;
1903 sched_policy_set = true;
1904 if (!sched_pri_set) {
1905 sched_pri = p->default_pri;
1906 sched_pri_set = true;
1907 }
1908 } else {
1909 warnx("Unknown scheduling %s\n", optarg);
1910 show_usage();
1911 exit(0);
1912 }
1913 break;
1914 case 'p':
1915 check_alldigits(optarg, "-p [--priority]");
1916 sched_pri = atoi(optarg);
1917 sched_pri_set = true;
1918 break;
1919 case 'h':
1920 show_usage();
1921 exit(0);
1922 break;
1923 case 'n':
1924 trace_enable = false;
1925 use_random_sleep = false;
1926 break;
1927 case 'e':
1928 check_alldigits(optarg, "-e [--threads]");
1929 value = atoi(optarg);
1930 if (value > 0)
1931 nr_threads = value;
1932 else {
1933 warnx("NRTHR must be > 0\n");
1934 show_usage();
1935 exit(0);
1936 }
1937 break;
1938 case 'u':
1939 check_alldigits(optarg, "-u [--time]");
1940 value = atoi(optarg);
1941 if (value < 0) {
1942 warnx("TIME must be >= 0\n");
1943 show_usage();
1944 exit(0);
1945 }
1946 trace_enable = true;
1947 use_random_sleep = true;
1948 sleep_time = value * USEC_PER_MSEC;
1949 break;
1950 case 'v':
1951 verbosity++;
1952 break;
1953 case 'r':
1954 trace_enable = true;
1955 use_random_sleep = true;
1956 break;
1957 case 'a':
1958 check_alldigits(optarg, "-a [--nrlat]");
1959 value = atoi(optarg);
1960 if (value <= 0) {
1961 warnx("NRLAT must be > 0\n");
1962 show_usage();
1963 exit(0);
1964 }
1965 trace_enable = true;
1966 use_random_sleep = true;
1967 table_startsize = value;
1968 break;
1969 case 'x':
1970 setup_ftrace = false;
1971 break;
1972 case 'i':
1973 setup_ftrace = false;
1974 debug_tracefile = strdup(optarg);
1975 break;
1976 case 'm':
1977 setup_ftrace = false;
1978 debug_maxlat = strdup(optarg);
1979 break;
1980 default:
1981 show_usage();
1982 exit(0);
1983 break;
1984 }
1985 }
1986
1987 if (setup_ftrace) {
1988 if (!current_tracer) {
1989 current_tracer = find_default_tracer();
1990 if (!current_tracer)
1991 errx(EXIT_FAILURE,
1992 "No default tracer found and tracer not specified\n");
1993 }
1994
1995 if (use_random_sleep && !random_makes_sense(current_tracer)) {
1996 warnx("WARNING: The tracer is %s and random sleep has",
1997 current_tracer);
1998 fprintf(stderr,
1999 "been enabled. Random sleep is intended for the following tracers:\n");
2000 for (i = 0; random_tracers[i]; i++)
2001 fprintf(stderr, "%s\n", random_tracers[i]);
2002 fprintf(stderr, "\n");
2003 }
2004 }
2005
2006 if (debug_tracefile == DEBUG_NOFILE ||
2007 debug_maxlat == DEBUG_NOFILE)
2008 errx(EXIT_FAILURE,
2009 "Could not find tracing directory e.g. /sys/kernel/tracing\n");
2010
2011 if (!sched_policy_set) {
2012 sched_policy = SCHED_RR;
2013 sched_policy_set = true;
2014 if (!sched_pri_set) {
2015 sched_pri = RT_DEFAULT_PRI;
2016 sched_pri_set = true;
2017 }
2018 }
2019
2020 max = sched_get_priority_max(sched_policy);
2021 min = sched_get_priority_min(sched_policy);
2022
2023 if (sched_pri < min) {
2024 printf(
2025 "ATTENTION: Increasing priority to minimum, which is %d\n", min);
2026 sched_pri = min;
2027 }
2028 if (sched_pri > max) {
2029 printf(
2030 "ATTENTION: Reducing priority to maximum, which is %d\n", max);
2031 sched_pri = max;
2032 }
2033 }
2034
2035 static void show_params(void)
2036 {
2037 printf(
2038 "\n"
2039 "Running with scheduling policy %s and priority %d. Using %d print threads.\n",
2040 policy_name(sched_policy), sched_pri, nr_threads);
2041 if (trace_enable) {
2042 if (use_random_sleep) {
2043 printf(
2044 "%s will be printed with random delay\n"
2045 "Start size of the probability table:\t\t\t%d\n"
2046 "Print a message when the prob. table changes size:\t%s\n"
2047 "Print a warning when an event has been lost:\t\t%s\n"
2048 "Sleep time is:\t\t\t\t\t\t%ld ms\n",
2049 debug_tracefile,
2050 table_startsize,
2051 bool2str(verbose_sizechange()),
2052 bool2str(verbose_lostevent()),
2053 sleep_time / USEC_PER_MSEC);
2054 } else {
2055 printf("%s will be printed immediately\n",
2056 debug_tracefile);
2057 }
2058 } else {
2059 printf("%s will not be printed\n",
2060 debug_tracefile);
2061 }
2062 if (setup_ftrace) {
2063 printf("Tracer:\t\t\t\t\t\t\t%s\n"
2064 "%s option:\t\t\t\t\t%s\n"
2065 "%s option:\t\t\t\t\t%s\n",
2066 current_tracer,
2067 optstr[OPTIDX_FUNC_TR],
2068 bool2str(use_options[OPTIDX_FUNC_TR]),
2069 optstr[OPTIDX_DISP_GR],
2070 bool2str(use_options[OPTIDX_DISP_GR]));
2071 if (!strcmp(threshold, "0"))
2072 printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n");
2073 else
2074 printf("Threshold:\t\t\t\t\t\t%s\n", threshold);
2075 }
2076 printf("\n");
2077 }
2078
2079 int main(int argc, char *argv[])
2080 {
2081 init_save_state();
2082 signal_blocking(SIG_BLOCK);
2083 setup_sig_handler();
2084 open_stdout();
2085
2086 if (argc >= 1)
2087 prg_name = argv[0];
2088 else
2089 prg_name = prg_unknown;
2090
2091 scan_arguments(argc, argv);
2092 show_params();
2093
2094 init_printstate();
2095 init_print_mtx();
2096 if (use_random_sleep) {
2097 init_probabilities();
2098 if (verbose_sizechange())
2099 printf("Initializing probability table to %d\n",
2100 table_startsize);
2101 sleeptable_resize(table_startsize, false, NULL);
2102 }
2103 set_priority();
2104 init_queue(&printqueue);
2105 start_printthread();
2106 tracing_loop();
2107 return 0;
2108 }