0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #include "util/record.h"
0018 #include <traceevent/event-parse.h>
0019 #include <api/fs/tracing_path.h>
0020 #include <bpf/bpf.h>
0021 #include "util/bpf_map.h"
0022 #include "util/rlimit.h"
0023 #include "builtin.h"
0024 #include "util/cgroup.h"
0025 #include "util/color.h"
0026 #include "util/config.h"
0027 #include "util/debug.h"
0028 #include "util/dso.h"
0029 #include "util/env.h"
0030 #include "util/event.h"
0031 #include "util/evsel.h"
0032 #include "util/evsel_fprintf.h"
0033 #include "util/synthetic-events.h"
0034 #include "util/evlist.h"
0035 #include "util/evswitch.h"
0036 #include "util/mmap.h"
0037 #include <subcmd/pager.h>
0038 #include <subcmd/exec-cmd.h>
0039 #include "util/machine.h"
0040 #include "util/map.h"
0041 #include "util/symbol.h"
0042 #include "util/path.h"
0043 #include "util/session.h"
0044 #include "util/thread.h"
0045 #include <subcmd/parse-options.h>
0046 #include "util/strlist.h"
0047 #include "util/intlist.h"
0048 #include "util/thread_map.h"
0049 #include "util/stat.h"
0050 #include "util/tool.h"
0051 #include "util/util.h"
0052 #include "trace/beauty/beauty.h"
0053 #include "trace-event.h"
0054 #include "util/parse-events.h"
0055 #include "util/bpf-loader.h"
0056 #include "util/tracepoint.h"
0057 #include "callchain.h"
0058 #include "print_binary.h"
0059 #include "string2.h"
0060 #include "syscalltbl.h"
0061 #include "rb_resort.h"
0062 #include "../perf.h"
0063
0064 #include <errno.h>
0065 #include <inttypes.h>
0066 #include <poll.h>
0067 #include <signal.h>
0068 #include <stdlib.h>
0069 #include <string.h>
0070 #include <linux/err.h>
0071 #include <linux/filter.h>
0072 #include <linux/kernel.h>
0073 #include <linux/random.h>
0074 #include <linux/stringify.h>
0075 #include <linux/time64.h>
0076 #include <linux/zalloc.h>
0077 #include <fcntl.h>
0078 #include <sys/sysmacros.h>
0079
0080 #include <linux/ctype.h>
0081 #include <perf/mmap.h>
0082
0083 #ifndef O_CLOEXEC
0084 # define O_CLOEXEC 02000000
0085 #endif
0086
0087 #ifndef F_LINUX_SPECIFIC_BASE
0088 # define F_LINUX_SPECIFIC_BASE 1024
0089 #endif
0090
0091
0092
0093
0094 struct syscall_arg_fmt {
0095 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
0096 bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val);
0097 unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
0098 void *parm;
0099 const char *name;
0100 u16 nr_entries;
0101 bool show_zero;
0102 };
0103
0104 struct syscall_fmt {
0105 const char *name;
0106 const char *alias;
0107 struct {
0108 const char *sys_enter,
0109 *sys_exit;
0110 } bpf_prog_name;
0111 struct syscall_arg_fmt arg[6];
0112 u8 nr_args;
0113 bool errpid;
0114 bool timeout;
0115 bool hexret;
0116 };
0117
0118 struct trace {
0119 struct perf_tool tool;
0120 struct syscalltbl *sctbl;
0121 struct {
0122 struct syscall *table;
0123 struct bpf_map *map;
0124 struct {
0125 struct bpf_map *sys_enter,
0126 *sys_exit;
0127 } prog_array;
0128 struct {
0129 struct evsel *sys_enter,
0130 *sys_exit,
0131 *augmented;
0132 } events;
0133 struct bpf_program *unaugmented_prog;
0134 } syscalls;
0135 struct {
0136 struct bpf_map *map;
0137 } dump;
0138 struct record_opts opts;
0139 struct evlist *evlist;
0140 struct machine *host;
0141 struct thread *current;
0142 struct bpf_object *bpf_obj;
0143 struct cgroup *cgroup;
0144 u64 base_time;
0145 FILE *output;
0146 unsigned long nr_events;
0147 unsigned long nr_events_printed;
0148 unsigned long max_events;
0149 struct evswitch evswitch;
0150 struct strlist *ev_qualifier;
0151 struct {
0152 size_t nr;
0153 int *entries;
0154 } ev_qualifier_ids;
0155 struct {
0156 size_t nr;
0157 pid_t *entries;
0158 struct bpf_map *map;
0159 } filter_pids;
0160 double duration_filter;
0161 double runtime_ms;
0162 struct {
0163 u64 vfs_getname,
0164 proc_getname;
0165 } stats;
0166 unsigned int max_stack;
0167 unsigned int min_stack;
0168 int raw_augmented_syscalls_args_size;
0169 bool raw_augmented_syscalls;
0170 bool fd_path_disabled;
0171 bool sort_events;
0172 bool not_ev_qualifier;
0173 bool live;
0174 bool full_time;
0175 bool sched;
0176 bool multiple_threads;
0177 bool summary;
0178 bool summary_only;
0179 bool errno_summary;
0180 bool failure_only;
0181 bool show_comm;
0182 bool print_sample;
0183 bool show_tool_stats;
0184 bool trace_syscalls;
0185 bool libtraceevent_print;
0186 bool kernel_syscallchains;
0187 s16 args_alignment;
0188 bool show_tstamp;
0189 bool show_duration;
0190 bool show_zeros;
0191 bool show_arg_names;
0192 bool show_string_prefix;
0193 bool force;
0194 bool vfs_getname;
0195 int trace_pgfaults;
0196 char *perfconfig_events;
0197 struct {
0198 struct ordered_events data;
0199 u64 last;
0200 } oe;
0201 };
0202
0203 struct tp_field {
0204 int offset;
0205 union {
0206 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
0207 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
0208 };
0209 };
0210
0211 #define TP_UINT_FIELD(bits) \
0212 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
0213 { \
0214 u##bits value; \
0215 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
0216 return value; \
0217 }
0218
0219 TP_UINT_FIELD(8);
0220 TP_UINT_FIELD(16);
0221 TP_UINT_FIELD(32);
0222 TP_UINT_FIELD(64);
0223
0224 #define TP_UINT_FIELD__SWAPPED(bits) \
0225 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
0226 { \
0227 u##bits value; \
0228 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
0229 return bswap_##bits(value);\
0230 }
0231
0232 TP_UINT_FIELD__SWAPPED(16);
0233 TP_UINT_FIELD__SWAPPED(32);
0234 TP_UINT_FIELD__SWAPPED(64);
0235
0236 static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
0237 {
0238 field->offset = offset;
0239
0240 switch (size) {
0241 case 1:
0242 field->integer = tp_field__u8;
0243 break;
0244 case 2:
0245 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
0246 break;
0247 case 4:
0248 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
0249 break;
0250 case 8:
0251 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
0252 break;
0253 default:
0254 return -1;
0255 }
0256
0257 return 0;
0258 }
0259
0260 static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
0261 {
0262 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
0263 }
0264
0265 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
0266 {
0267 return sample->raw_data + field->offset;
0268 }
0269
0270 static int __tp_field__init_ptr(struct tp_field *field, int offset)
0271 {
0272 field->offset = offset;
0273 field->pointer = tp_field__ptr;
0274 return 0;
0275 }
0276
0277 static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
0278 {
0279 return __tp_field__init_ptr(field, format_field->offset);
0280 }
0281
0282 struct syscall_tp {
0283 struct tp_field id;
0284 union {
0285 struct tp_field args, ret;
0286 };
0287 };
0288
0289
0290
0291
0292
0293
0294 struct evsel_trace {
0295 struct syscall_tp sc;
0296 struct syscall_arg_fmt *fmt;
0297 };
0298
0299 static struct evsel_trace *evsel_trace__new(void)
0300 {
0301 return zalloc(sizeof(struct evsel_trace));
0302 }
0303
0304 static void evsel_trace__delete(struct evsel_trace *et)
0305 {
0306 if (et == NULL)
0307 return;
0308
0309 zfree(&et->fmt);
0310 free(et);
0311 }
0312
0313
0314
0315
0316
0317 static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel)
0318 {
0319 struct evsel_trace *et = evsel->priv;
0320
0321 return &et->sc;
0322 }
0323
0324 static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel)
0325 {
0326 if (evsel->priv == NULL) {
0327 evsel->priv = evsel_trace__new();
0328 if (evsel->priv == NULL)
0329 return NULL;
0330 }
0331
0332 return __evsel__syscall_tp(evsel);
0333 }
0334
0335
0336
0337
0338 static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel)
0339 {
0340 struct evsel_trace *et = evsel->priv;
0341
0342 return et->fmt;
0343 }
0344
0345 static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
0346 {
0347 struct evsel_trace *et = evsel->priv;
0348
0349 if (evsel->priv == NULL) {
0350 et = evsel->priv = evsel_trace__new();
0351
0352 if (et == NULL)
0353 return NULL;
0354 }
0355
0356 if (et->fmt == NULL) {
0357 et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
0358 if (et->fmt == NULL)
0359 goto out_delete;
0360 }
0361
0362 return __evsel__syscall_arg_fmt(evsel);
0363
0364 out_delete:
0365 evsel_trace__delete(evsel->priv);
0366 evsel->priv = NULL;
0367 return NULL;
0368 }
0369
0370 static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
0371 {
0372 struct tep_format_field *format_field = evsel__field(evsel, name);
0373
0374 if (format_field == NULL)
0375 return -1;
0376
0377 return tp_field__init_uint(field, format_field, evsel->needs_swap);
0378 }
0379
0380 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
0381 ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
0382 evsel__init_tp_uint_field(evsel, &sc->name, #name); })
0383
0384 static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
0385 {
0386 struct tep_format_field *format_field = evsel__field(evsel, name);
0387
0388 if (format_field == NULL)
0389 return -1;
0390
0391 return tp_field__init_ptr(field, format_field);
0392 }
0393
0394 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
0395 ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
0396 evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
0397
0398 static void evsel__delete_priv(struct evsel *evsel)
0399 {
0400 zfree(&evsel->priv);
0401 evsel__delete(evsel);
0402 }
0403
0404 static int evsel__init_syscall_tp(struct evsel *evsel)
0405 {
0406 struct syscall_tp *sc = evsel__syscall_tp(evsel);
0407
0408 if (sc != NULL) {
0409 if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
0410 evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
0411 return -ENOENT;
0412 return 0;
0413 }
0414
0415 return -ENOMEM;
0416 }
0417
0418 static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
0419 {
0420 struct syscall_tp *sc = evsel__syscall_tp(evsel);
0421
0422 if (sc != NULL) {
0423 struct tep_format_field *syscall_id = evsel__field(tp, "id");
0424 if (syscall_id == NULL)
0425 syscall_id = evsel__field(tp, "__syscall_nr");
0426 if (syscall_id == NULL ||
0427 __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
0428 return -EINVAL;
0429
0430 return 0;
0431 }
0432
0433 return -ENOMEM;
0434 }
0435
0436 static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
0437 {
0438 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
0439
0440 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
0441 }
0442
0443 static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
0444 {
0445 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
0446
0447 return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
0448 }
0449
0450 static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
0451 {
0452 if (evsel__syscall_tp(evsel) != NULL) {
0453 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
0454 return -ENOENT;
0455
0456 evsel->handler = handler;
0457 return 0;
0458 }
0459
0460 return -ENOMEM;
0461 }
0462
0463 static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
0464 {
0465 struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
0466
0467
0468 if (IS_ERR(evsel))
0469 evsel = evsel__newtp("syscalls", direction);
0470
0471 if (IS_ERR(evsel))
0472 return NULL;
0473
0474 if (evsel__init_raw_syscall_tp(evsel, handler))
0475 goto out_delete;
0476
0477 return evsel;
0478
0479 out_delete:
0480 evsel__delete_priv(evsel);
0481 return NULL;
0482 }
0483
0484 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
0485 ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
0486 fields->name.integer(&fields->name, sample); })
0487
0488 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
0489 ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
0490 fields->name.pointer(&fields->name, sample); })
0491
0492 size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val)
0493 {
0494 int idx = val - sa->offset;
0495
0496 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
0497 size_t printed = scnprintf(bf, size, intfmt, val);
0498 if (show_suffix)
0499 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
0500 return printed;
0501 }
0502
0503 return scnprintf(bf, size, "%s%s", sa->entries[idx], show_suffix ? sa->prefix : "");
0504 }
0505
0506 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
0507 {
0508 int idx = val - sa->offset;
0509
0510 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
0511 size_t printed = scnprintf(bf, size, intfmt, val);
0512 if (show_prefix)
0513 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
0514 return printed;
0515 }
0516
0517 return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
0518 }
0519
0520 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
0521 const char *intfmt,
0522 struct syscall_arg *arg)
0523 {
0524 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
0525 }
0526
0527 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
0528 struct syscall_arg *arg)
0529 {
0530 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
0531 }
0532
0533 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
0534
0535 bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
0536 {
0537 return strarray__strtoul(arg->parm, bf, size, ret);
0538 }
0539
0540 bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
0541 {
0542 return strarray__strtoul_flags(arg->parm, bf, size, ret);
0543 }
0544
0545 bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
0546 {
0547 return strarrays__strtoul(arg->parm, bf, size, ret);
0548 }
0549
0550 size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
0551 {
0552 return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
0553 }
0554
0555 size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
0556 {
0557 size_t printed;
0558 int i;
0559
0560 for (i = 0; i < sas->nr_entries; ++i) {
0561 struct strarray *sa = sas->entries[i];
0562 int idx = val - sa->offset;
0563
0564 if (idx >= 0 && idx < sa->nr_entries) {
0565 if (sa->entries[idx] == NULL)
0566 break;
0567 return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
0568 }
0569 }
0570
0571 printed = scnprintf(bf, size, intfmt, val);
0572 if (show_prefix)
0573 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
0574 return printed;
0575 }
0576
0577 bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret)
0578 {
0579 int i;
0580
0581 for (i = 0; i < sa->nr_entries; ++i) {
0582 if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') {
0583 *ret = sa->offset + i;
0584 return true;
0585 }
0586 }
0587
0588 return false;
0589 }
0590
0591 bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret)
0592 {
0593 u64 val = 0;
0594 char *tok = bf, *sep, *end;
0595
0596 *ret = 0;
0597
0598 while (size != 0) {
0599 int toklen = size;
0600
0601 sep = memchr(tok, '|', size);
0602 if (sep != NULL) {
0603 size -= sep - tok + 1;
0604
0605 end = sep - 1;
0606 while (end > tok && isspace(*end))
0607 --end;
0608
0609 toklen = end - tok + 1;
0610 }
0611
0612 while (isspace(*tok))
0613 ++tok;
0614
0615 if (isalpha(*tok) || *tok == '_') {
0616 if (!strarray__strtoul(sa, tok, toklen, &val))
0617 return false;
0618 } else {
0619 bool is_hexa = tok[0] == 0 && (tok[1] = 'x' || tok[1] == 'X');
0620
0621 val = strtoul(tok, NULL, is_hexa ? 16 : 0);
0622 }
0623
0624 *ret |= (1 << (val - 1));
0625
0626 if (sep == NULL)
0627 break;
0628 tok = sep + 1;
0629 }
0630
0631 return true;
0632 }
0633
0634 bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret)
0635 {
0636 int i;
0637
0638 for (i = 0; i < sas->nr_entries; ++i) {
0639 struct strarray *sa = sas->entries[i];
0640
0641 if (strarray__strtoul(sa, bf, size, ret))
0642 return true;
0643 }
0644
0645 return false;
0646 }
0647
0648 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
0649 struct syscall_arg *arg)
0650 {
0651 return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
0652 }
0653
0654 #ifndef AT_FDCWD
0655 #define AT_FDCWD -100
0656 #endif
0657
0658 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
0659 struct syscall_arg *arg)
0660 {
0661 int fd = arg->val;
0662 const char *prefix = "AT_FD";
0663
0664 if (fd == AT_FDCWD)
0665 return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
0666
0667 return syscall_arg__scnprintf_fd(bf, size, arg);
0668 }
0669
0670 #define SCA_FDAT syscall_arg__scnprintf_fd_at
0671
0672 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
0673 struct syscall_arg *arg);
0674
0675 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
0676
0677 size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
0678 {
0679 return scnprintf(bf, size, "%#lx", arg->val);
0680 }
0681
0682 size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
0683 {
0684 if (arg->val == 0)
0685 return scnprintf(bf, size, "NULL");
0686 return syscall_arg__scnprintf_hex(bf, size, arg);
0687 }
0688
0689 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
0690 {
0691 return scnprintf(bf, size, "%d", arg->val);
0692 }
0693
0694 size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
0695 {
0696 return scnprintf(bf, size, "%ld", arg->val);
0697 }
0698
0699 static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg)
0700 {
0701
0702
0703
0704 return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries ?: arg->len, arg->val);
0705 }
0706
0707 #define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array
0708
0709 static const char *bpf_cmd[] = {
0710 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
0711 "MAP_GET_NEXT_KEY", "PROG_LOAD", "OBJ_PIN", "OBJ_GET", "PROG_ATTACH",
0712 "PROG_DETACH", "PROG_TEST_RUN", "PROG_GET_NEXT_ID", "MAP_GET_NEXT_ID",
0713 "PROG_GET_FD_BY_ID", "MAP_GET_FD_BY_ID", "OBJ_GET_INFO_BY_FD",
0714 "PROG_QUERY", "RAW_TRACEPOINT_OPEN", "BTF_LOAD", "BTF_GET_FD_BY_ID",
0715 "TASK_FD_QUERY", "MAP_LOOKUP_AND_DELETE_ELEM", "MAP_FREEZE",
0716 "BTF_GET_NEXT_ID", "MAP_LOOKUP_BATCH", "MAP_LOOKUP_AND_DELETE_BATCH",
0717 "MAP_UPDATE_BATCH", "MAP_DELETE_BATCH", "LINK_CREATE", "LINK_UPDATE",
0718 "LINK_GET_FD_BY_ID", "LINK_GET_NEXT_ID", "ENABLE_STATS", "ITER_CREATE",
0719 "LINK_DETACH", "PROG_BIND_MAP",
0720 };
0721 static DEFINE_STRARRAY(bpf_cmd, "BPF_");
0722
0723 static const char *fsmount_flags[] = {
0724 [1] = "CLOEXEC",
0725 };
0726 static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
0727
0728 #include "trace/beauty/generated/fsconfig_arrays.c"
0729
0730 static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
0731
0732 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
0733 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
0734
0735 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
0736 static DEFINE_STRARRAY(itimers, "ITIMER_");
0737
0738 static const char *keyctl_options[] = {
0739 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
0740 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
0741 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
0742 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
0743 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
0744 };
0745 static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
0746
0747 static const char *whences[] = { "SET", "CUR", "END",
0748 #ifdef SEEK_DATA
0749 "DATA",
0750 #endif
0751 #ifdef SEEK_HOLE
0752 "HOLE",
0753 #endif
0754 };
0755 static DEFINE_STRARRAY(whences, "SEEK_");
0756
0757 static const char *fcntl_cmds[] = {
0758 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
0759 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
0760 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
0761 "GETOWNER_UIDS",
0762 };
0763 static DEFINE_STRARRAY(fcntl_cmds, "F_");
0764
0765 static const char *fcntl_linux_specific_cmds[] = {
0766 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
0767 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
0768 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
0769 };
0770
0771 static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
0772
0773 static struct strarray *fcntl_cmds_arrays[] = {
0774 &strarray__fcntl_cmds,
0775 &strarray__fcntl_linux_specific_cmds,
0776 };
0777
0778 static DEFINE_STRARRAYS(fcntl_cmds_arrays);
0779
0780 static const char *rlimit_resources[] = {
0781 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
0782 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
0783 "RTTIME",
0784 };
0785 static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
0786
0787 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
0788 static DEFINE_STRARRAY(sighow, "SIG_");
0789
0790 static const char *clockid[] = {
0791 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
0792 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
0793 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
0794 };
0795 static DEFINE_STRARRAY(clockid, "CLOCK_");
0796
0797 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
0798 struct syscall_arg *arg)
0799 {
0800 bool show_prefix = arg->show_string_prefix;
0801 const char *suffix = "_OK";
0802 size_t printed = 0;
0803 int mode = arg->val;
0804
0805 if (mode == F_OK)
0806 return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
0807 #define P_MODE(n) \
0808 if (mode & n##_OK) { \
0809 printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
0810 mode &= ~n##_OK; \
0811 }
0812
0813 P_MODE(R);
0814 P_MODE(W);
0815 P_MODE(X);
0816 #undef P_MODE
0817
0818 if (mode)
0819 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
0820
0821 return printed;
0822 }
0823
0824 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
0825
0826 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
0827 struct syscall_arg *arg);
0828
0829 #define SCA_FILENAME syscall_arg__scnprintf_filename
0830
0831 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
0832 struct syscall_arg *arg)
0833 {
0834 bool show_prefix = arg->show_string_prefix;
0835 const char *prefix = "O_";
0836 int printed = 0, flags = arg->val;
0837
0838 #define P_FLAG(n) \
0839 if (flags & O_##n) { \
0840 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
0841 flags &= ~O_##n; \
0842 }
0843
0844 P_FLAG(CLOEXEC);
0845 P_FLAG(NONBLOCK);
0846 #undef P_FLAG
0847
0848 if (flags)
0849 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
0850
0851 return printed;
0852 }
0853
0854 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
0855
0856 #ifndef GRND_NONBLOCK
0857 #define GRND_NONBLOCK 0x0001
0858 #endif
0859 #ifndef GRND_RANDOM
0860 #define GRND_RANDOM 0x0002
0861 #endif
0862
0863 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
0864 struct syscall_arg *arg)
0865 {
0866 bool show_prefix = arg->show_string_prefix;
0867 const char *prefix = "GRND_";
0868 int printed = 0, flags = arg->val;
0869
0870 #define P_FLAG(n) \
0871 if (flags & GRND_##n) { \
0872 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
0873 flags &= ~GRND_##n; \
0874 }
0875
0876 P_FLAG(RANDOM);
0877 P_FLAG(NONBLOCK);
0878 #undef P_FLAG
0879
0880 if (flags)
0881 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
0882
0883 return printed;
0884 }
0885
0886 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
0887
0888 #define STRARRAY(name, array) \
0889 { .scnprintf = SCA_STRARRAY, \
0890 .strtoul = STUL_STRARRAY, \
0891 .parm = &strarray__##array, }
0892
0893 #define STRARRAY_FLAGS(name, array) \
0894 { .scnprintf = SCA_STRARRAY_FLAGS, \
0895 .strtoul = STUL_STRARRAY_FLAGS, \
0896 .parm = &strarray__##array, }
0897
0898 #include "trace/beauty/arch_errno_names.c"
0899 #include "trace/beauty/eventfd.c"
0900 #include "trace/beauty/futex_op.c"
0901 #include "trace/beauty/futex_val3.c"
0902 #include "trace/beauty/mmap.c"
0903 #include "trace/beauty/mode_t.c"
0904 #include "trace/beauty/msg_flags.c"
0905 #include "trace/beauty/open_flags.c"
0906 #include "trace/beauty/perf_event_open.c"
0907 #include "trace/beauty/pid.c"
0908 #include "trace/beauty/sched_policy.c"
0909 #include "trace/beauty/seccomp.c"
0910 #include "trace/beauty/signum.c"
0911 #include "trace/beauty/socket_type.c"
0912 #include "trace/beauty/waitid_options.c"
0913
0914 static struct syscall_fmt syscall_fmts[] = {
0915 { .name = "access",
0916 .arg = { [1] = { .scnprintf = SCA_ACCMODE, }, }, },
0917 { .name = "arch_prctl",
0918 .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, },
0919 [1] = { .scnprintf = SCA_PTR, }, }, },
0920 { .name = "bind",
0921 .arg = { [0] = { .scnprintf = SCA_INT, },
0922 [1] = { .scnprintf = SCA_SOCKADDR, },
0923 [2] = { .scnprintf = SCA_INT, }, }, },
0924 { .name = "bpf",
0925 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
0926 { .name = "brk", .hexret = true,
0927 .arg = { [0] = { .scnprintf = SCA_PTR, }, }, },
0928 { .name = "clock_gettime",
0929 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
0930 { .name = "clone", .errpid = true, .nr_args = 5,
0931 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
0932 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
0933 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
0934 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
0935 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
0936 { .name = "close",
0937 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, }, }, },
0938 { .name = "connect",
0939 .arg = { [0] = { .scnprintf = SCA_INT, },
0940 [1] = { .scnprintf = SCA_SOCKADDR, },
0941 [2] = { .scnprintf = SCA_INT, }, }, },
0942 { .name = "epoll_ctl",
0943 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
0944 { .name = "eventfd2",
0945 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, }, }, },
0946 { .name = "fchmodat",
0947 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
0948 { .name = "fchownat",
0949 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
0950 { .name = "fcntl",
0951 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD,
0952 .strtoul = STUL_STRARRAYS,
0953 .parm = &strarrays__fcntl_cmds_arrays,
0954 .show_zero = true, },
0955 [2] = { .scnprintf = SCA_FCNTL_ARG, }, }, },
0956 { .name = "flock",
0957 .arg = { [1] = { .scnprintf = SCA_FLOCK, }, }, },
0958 { .name = "fsconfig",
0959 .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
0960 { .name = "fsmount",
0961 .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
0962 [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, }, }, },
0963 { .name = "fspick",
0964 .arg = { [0] = { .scnprintf = SCA_FDAT, },
0965 [1] = { .scnprintf = SCA_FILENAME, },
0966 [2] = { .scnprintf = SCA_FSPICK_FLAGS, }, }, },
0967 { .name = "fstat", .alias = "newfstat", },
0968 { .name = "fstatat", .alias = "newfstatat", },
0969 { .name = "futex",
0970 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, },
0971 [5] = { .scnprintf = SCA_FUTEX_VAL3, }, }, },
0972 { .name = "futimesat",
0973 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
0974 { .name = "getitimer",
0975 .arg = { [0] = STRARRAY(which, itimers), }, },
0976 { .name = "getpid", .errpid = true, },
0977 { .name = "getpgid", .errpid = true, },
0978 { .name = "getppid", .errpid = true, },
0979 { .name = "getrandom",
0980 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, }, }, },
0981 { .name = "getrlimit",
0982 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
0983 { .name = "getsockopt",
0984 .arg = { [1] = STRARRAY(level, socket_level), }, },
0985 { .name = "gettid", .errpid = true, },
0986 { .name = "ioctl",
0987 .arg = {
0988 #if defined(__i386__) || defined(__x86_64__)
0989
0990
0991
0992 [1] = { .scnprintf = SCA_IOCTL_CMD, },
0993 [2] = { .scnprintf = SCA_HEX, }, }, },
0994 #else
0995 [2] = { .scnprintf = SCA_HEX, }, }, },
0996 #endif
0997 { .name = "kcmp", .nr_args = 5,
0998 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
0999 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
1000 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
1001 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
1002 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
1003 { .name = "keyctl",
1004 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1005 { .name = "kill",
1006 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1007 { .name = "linkat",
1008 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1009 { .name = "lseek",
1010 .arg = { [2] = STRARRAY(whence, whences), }, },
1011 { .name = "lstat", .alias = "newlstat", },
1012 { .name = "madvise",
1013 .arg = { [0] = { .scnprintf = SCA_HEX, },
1014 [2] = { .scnprintf = SCA_MADV_BHV, }, }, },
1015 { .name = "mkdirat",
1016 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1017 { .name = "mknodat",
1018 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1019 { .name = "mmap", .hexret = true,
1020
1021 #if defined(__s390x__)
1022 .alias = "old_mmap",
1023 #endif
1024 .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, },
1025 [3] = { .scnprintf = SCA_MMAP_FLAGS,
1026 .strtoul = STUL_STRARRAY_FLAGS,
1027 .parm = &strarray__mmap_flags, },
1028 [5] = { .scnprintf = SCA_HEX, }, }, },
1029 { .name = "mount",
1030 .arg = { [0] = { .scnprintf = SCA_FILENAME, },
1031 [3] = { .scnprintf = SCA_MOUNT_FLAGS,
1032 .mask_val = SCAMV_MOUNT_FLAGS, }, }, },
1033 { .name = "move_mount",
1034 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1035 [1] = { .scnprintf = SCA_FILENAME, },
1036 [2] = { .scnprintf = SCA_FDAT, },
1037 [3] = { .scnprintf = SCA_FILENAME, },
1038 [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, }, }, },
1039 { .name = "mprotect",
1040 .arg = { [0] = { .scnprintf = SCA_HEX, },
1041 [2] = { .scnprintf = SCA_MMAP_PROT, }, }, },
1042 { .name = "mq_unlink",
1043 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1044 { .name = "mremap", .hexret = true,
1045 .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, }, }, },
1046 { .name = "name_to_handle_at",
1047 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1048 { .name = "newfstatat",
1049 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1050 { .name = "open",
1051 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1052 { .name = "open_by_handle_at",
1053 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1054 [2] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1055 { .name = "openat",
1056 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1057 [2] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1058 { .name = "perf_event_open",
1059 .arg = { [2] = { .scnprintf = SCA_INT, },
1060 [3] = { .scnprintf = SCA_FD, },
1061 [4] = { .scnprintf = SCA_PERF_FLAGS, }, }, },
1062 { .name = "pipe2",
1063 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, }, }, },
1064 { .name = "pkey_alloc",
1065 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, }, }, },
1066 { .name = "pkey_free",
1067 .arg = { [0] = { .scnprintf = SCA_INT, }, }, },
1068 { .name = "pkey_mprotect",
1069 .arg = { [0] = { .scnprintf = SCA_HEX, },
1070 [2] = { .scnprintf = SCA_MMAP_PROT, },
1071 [3] = { .scnprintf = SCA_INT, }, }, },
1072 { .name = "poll", .timeout = true, },
1073 { .name = "ppoll", .timeout = true, },
1074 { .name = "prctl",
1075 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION,
1076 .strtoul = STUL_STRARRAY,
1077 .parm = &strarray__prctl_options, },
1078 [1] = { .scnprintf = SCA_PRCTL_ARG2, },
1079 [2] = { .scnprintf = SCA_PRCTL_ARG3, }, }, },
1080 { .name = "pread", .alias = "pread64", },
1081 { .name = "preadv", .alias = "pread", },
1082 { .name = "prlimit64",
1083 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1084 { .name = "pwrite", .alias = "pwrite64", },
1085 { .name = "readlinkat",
1086 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1087 { .name = "recvfrom",
1088 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1089 { .name = "recvmmsg",
1090 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1091 { .name = "recvmsg",
1092 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1093 { .name = "renameat",
1094 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1095 [2] = { .scnprintf = SCA_FDAT, }, }, },
1096 { .name = "renameat2",
1097 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1098 [2] = { .scnprintf = SCA_FDAT, },
1099 [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, }, }, },
1100 { .name = "rt_sigaction",
1101 .arg = { [0] = { .scnprintf = SCA_SIGNUM, }, }, },
1102 { .name = "rt_sigprocmask",
1103 .arg = { [0] = STRARRAY(how, sighow), }, },
1104 { .name = "rt_sigqueueinfo",
1105 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1106 { .name = "rt_tgsigqueueinfo",
1107 .arg = { [2] = { .scnprintf = SCA_SIGNUM, }, }, },
1108 { .name = "sched_setscheduler",
1109 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, }, }, },
1110 { .name = "seccomp",
1111 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, },
1112 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, }, }, },
1113 { .name = "select", .timeout = true, },
1114 { .name = "sendfile", .alias = "sendfile64", },
1115 { .name = "sendmmsg",
1116 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1117 { .name = "sendmsg",
1118 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1119 { .name = "sendto",
1120 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, },
1121 [4] = { .scnprintf = SCA_SOCKADDR, }, }, },
1122 { .name = "set_tid_address", .errpid = true, },
1123 { .name = "setitimer",
1124 .arg = { [0] = STRARRAY(which, itimers), }, },
1125 { .name = "setrlimit",
1126 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1127 { .name = "setsockopt",
1128 .arg = { [1] = STRARRAY(level, socket_level), }, },
1129 { .name = "socket",
1130 .arg = { [0] = STRARRAY(family, socket_families),
1131 [1] = { .scnprintf = SCA_SK_TYPE, },
1132 [2] = { .scnprintf = SCA_SK_PROTO, }, }, },
1133 { .name = "socketpair",
1134 .arg = { [0] = STRARRAY(family, socket_families),
1135 [1] = { .scnprintf = SCA_SK_TYPE, },
1136 [2] = { .scnprintf = SCA_SK_PROTO, }, }, },
1137 { .name = "stat", .alias = "newstat", },
1138 { .name = "statx",
1139 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1140 [2] = { .scnprintf = SCA_STATX_FLAGS, } ,
1141 [3] = { .scnprintf = SCA_STATX_MASK, }, }, },
1142 { .name = "swapoff",
1143 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1144 { .name = "swapon",
1145 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1146 { .name = "symlinkat",
1147 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1148 { .name = "sync_file_range",
1149 .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, }, }, },
1150 { .name = "tgkill",
1151 .arg = { [2] = { .scnprintf = SCA_SIGNUM, }, }, },
1152 { .name = "tkill",
1153 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1154 { .name = "umount2", .alias = "umount",
1155 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1156 { .name = "uname", .alias = "newuname", },
1157 { .name = "unlinkat",
1158 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1159 { .name = "utimensat",
1160 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1161 { .name = "wait4", .errpid = true,
1162 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, }, }, },
1163 { .name = "waitid", .errpid = true,
1164 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, }, }, },
1165 };
1166
1167 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1168 {
1169 const struct syscall_fmt *fmt = fmtp;
1170 return strcmp(name, fmt->name);
1171 }
1172
1173 static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name)
1174 {
1175 return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1176 }
1177
1178 static struct syscall_fmt *syscall_fmt__find(const char *name)
1179 {
1180 const int nmemb = ARRAY_SIZE(syscall_fmts);
1181 return __syscall_fmt__find(syscall_fmts, nmemb, name);
1182 }
1183
1184 static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias)
1185 {
1186 int i;
1187
1188 for (i = 0; i < nmemb; ++i) {
1189 if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0)
1190 return &fmts[i];
1191 }
1192
1193 return NULL;
1194 }
1195
1196 static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
1197 {
1198 const int nmemb = ARRAY_SIZE(syscall_fmts);
1199 return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
1200 }
1201
1202
1203
1204
1205
1206
1207
1208 struct syscall {
1209 struct tep_event *tp_format;
1210 int nr_args;
1211 int args_size;
1212 struct {
1213 struct bpf_program *sys_enter,
1214 *sys_exit;
1215 } bpf_prog;
1216 bool is_exit;
1217 bool is_open;
1218 bool nonexistent;
1219 struct tep_format_field *args;
1220 const char *name;
1221 struct syscall_fmt *fmt;
1222 struct syscall_arg_fmt *arg_fmt;
1223 };
1224
1225
1226
1227
1228
1229
1230 struct bpf_map_syscall_entry {
1231 bool enabled;
1232 u16 string_args_len[6];
1233 };
1234
1235
1236
1237
1238
1239
1240
1241
1242 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
1243 {
1244 double duration = (double)t / NSEC_PER_MSEC;
1245 size_t printed = fprintf(fp, "(");
1246
1247 if (!calculated)
1248 printed += fprintf(fp, " ");
1249 else if (duration >= 1.0)
1250 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1251 else if (duration >= 0.01)
1252 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1253 else
1254 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1255 return printed + fprintf(fp, "): ");
1256 }
1257
1258
1259
1260
1261
1262
1263
1264
1265 struct thread_trace {
1266 u64 entry_time;
1267 bool entry_pending;
1268 unsigned long nr_events;
1269 unsigned long pfmaj, pfmin;
1270 char *entry_str;
1271 double runtime_ms;
1272 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1273 struct {
1274 unsigned long ptr;
1275 short int entry_str_pos;
1276 bool pending_open;
1277 unsigned int namelen;
1278 char *name;
1279 } filename;
1280 struct {
1281 int max;
1282 struct file *table;
1283 } files;
1284
1285 struct intlist *syscall_stats;
1286 };
1287
1288 static struct thread_trace *thread_trace__new(void)
1289 {
1290 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1291
1292 if (ttrace) {
1293 ttrace->files.max = -1;
1294 ttrace->syscall_stats = intlist__new(NULL);
1295 }
1296
1297 return ttrace;
1298 }
1299
1300 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1301 {
1302 struct thread_trace *ttrace;
1303
1304 if (thread == NULL)
1305 goto fail;
1306
1307 if (thread__priv(thread) == NULL)
1308 thread__set_priv(thread, thread_trace__new());
1309
1310 if (thread__priv(thread) == NULL)
1311 goto fail;
1312
1313 ttrace = thread__priv(thread);
1314 ++ttrace->nr_events;
1315
1316 return ttrace;
1317 fail:
1318 color_fprintf(fp, PERF_COLOR_RED,
1319 "WARNING: not enough memory, dropping samples!\n");
1320 return NULL;
1321 }
1322
1323
1324 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
1325 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
1326 {
1327 struct thread_trace *ttrace = thread__priv(arg->thread);
1328
1329 ttrace->ret_scnprintf = ret_scnprintf;
1330 }
1331
1332 #define TRACE_PFMAJ (1 << 0)
1333 #define TRACE_PFMIN (1 << 1)
1334
1335 static const size_t trace__entry_str_size = 2048;
1336
1337 static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1338 {
1339 if (fd < 0)
1340 return NULL;
1341
1342 if (fd > ttrace->files.max) {
1343 struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1344
1345 if (nfiles == NULL)
1346 return NULL;
1347
1348 if (ttrace->files.max != -1) {
1349 memset(nfiles + ttrace->files.max + 1, 0,
1350 (fd - ttrace->files.max) * sizeof(struct file));
1351 } else {
1352 memset(nfiles, 0, (fd + 1) * sizeof(struct file));
1353 }
1354
1355 ttrace->files.table = nfiles;
1356 ttrace->files.max = fd;
1357 }
1358
1359 return ttrace->files.table + fd;
1360 }
1361
1362 struct file *thread__files_entry(struct thread *thread, int fd)
1363 {
1364 return thread_trace__files_entry(thread__priv(thread), fd);
1365 }
1366
1367 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1368 {
1369 struct thread_trace *ttrace = thread__priv(thread);
1370 struct file *file = thread_trace__files_entry(ttrace, fd);
1371
1372 if (file != NULL) {
1373 struct stat st;
1374 if (stat(pathname, &st) == 0)
1375 file->dev_maj = major(st.st_rdev);
1376 file->pathname = strdup(pathname);
1377 if (file->pathname)
1378 return 0;
1379 }
1380
1381 return -1;
1382 }
1383
1384 static int thread__read_fd_path(struct thread *thread, int fd)
1385 {
1386 char linkname[PATH_MAX], pathname[PATH_MAX];
1387 struct stat st;
1388 int ret;
1389
1390 if (thread->pid_ == thread->tid) {
1391 scnprintf(linkname, sizeof(linkname),
1392 "/proc/%d/fd/%d", thread->pid_, fd);
1393 } else {
1394 scnprintf(linkname, sizeof(linkname),
1395 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1396 }
1397
1398 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1399 return -1;
1400
1401 ret = readlink(linkname, pathname, sizeof(pathname));
1402
1403 if (ret < 0 || ret > st.st_size)
1404 return -1;
1405
1406 pathname[ret] = '\0';
1407 return trace__set_fd_pathname(thread, fd, pathname);
1408 }
1409
1410 static const char *thread__fd_path(struct thread *thread, int fd,
1411 struct trace *trace)
1412 {
1413 struct thread_trace *ttrace = thread__priv(thread);
1414
1415 if (ttrace == NULL || trace->fd_path_disabled)
1416 return NULL;
1417
1418 if (fd < 0)
1419 return NULL;
1420
1421 if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
1422 if (!trace->live)
1423 return NULL;
1424 ++trace->stats.proc_getname;
1425 if (thread__read_fd_path(thread, fd))
1426 return NULL;
1427 }
1428
1429 return ttrace->files.table[fd].pathname;
1430 }
1431
1432 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
1433 {
1434 int fd = arg->val;
1435 size_t printed = scnprintf(bf, size, "%d", fd);
1436 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1437
1438 if (path)
1439 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1440
1441 return printed;
1442 }
1443
1444 size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1445 {
1446 size_t printed = scnprintf(bf, size, "%d", fd);
1447 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1448
1449 if (thread) {
1450 const char *path = thread__fd_path(thread, fd, trace);
1451
1452 if (path)
1453 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1454
1455 thread__put(thread);
1456 }
1457
1458 return printed;
1459 }
1460
1461 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1462 struct syscall_arg *arg)
1463 {
1464 int fd = arg->val;
1465 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1466 struct thread_trace *ttrace = thread__priv(arg->thread);
1467
1468 if (ttrace && fd >= 0 && fd <= ttrace->files.max)
1469 zfree(&ttrace->files.table[fd].pathname);
1470
1471 return printed;
1472 }
1473
1474 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1475 unsigned long ptr)
1476 {
1477 struct thread_trace *ttrace = thread__priv(thread);
1478
1479 ttrace->filename.ptr = ptr;
1480 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1481 }
1482
1483 static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1484 {
1485 struct augmented_arg *augmented_arg = arg->augmented.args;
1486 size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1487
1488
1489
1490
1491 int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1492
1493 arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1494 arg->augmented.size -= consumed;
1495
1496 return printed;
1497 }
1498
1499 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1500 struct syscall_arg *arg)
1501 {
1502 unsigned long ptr = arg->val;
1503
1504 if (arg->augmented.args)
1505 return syscall_arg__scnprintf_augmented_string(arg, bf, size);
1506
1507 if (!arg->trace->vfs_getname)
1508 return scnprintf(bf, size, "%#x", ptr);
1509
1510 thread__set_filename_pos(arg->thread, bf, ptr);
1511 return 0;
1512 }
1513
1514 static bool trace__filter_duration(struct trace *trace, double t)
1515 {
1516 return t < (trace->duration_filter * NSEC_PER_MSEC);
1517 }
1518
1519 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1520 {
1521 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1522
1523 return fprintf(fp, "%10.3f ", ts);
1524 }
1525
1526
1527
1528
1529
1530
1531
1532 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1533 {
1534 if (tstamp > 0)
1535 return __trace__fprintf_tstamp(trace, tstamp, fp);
1536
1537 return fprintf(fp, " ? ");
1538 }
1539
1540 static pid_t workload_pid = -1;
1541 static bool done = false;
1542 static bool interrupted = false;
1543
1544 static void sighandler_interrupt(int sig __maybe_unused)
1545 {
1546 done = interrupted = true;
1547 }
1548
1549 static void sighandler_chld(int sig __maybe_unused, siginfo_t *info,
1550 void *context __maybe_unused)
1551 {
1552 if (info->si_pid == workload_pid)
1553 done = true;
1554 }
1555
1556 static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
1557 {
1558 size_t printed = 0;
1559
1560 if (trace->multiple_threads) {
1561 if (trace->show_comm)
1562 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1563 printed += fprintf(fp, "%d ", thread->tid);
1564 }
1565
1566 return printed;
1567 }
1568
1569 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1570 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1571 {
1572 size_t printed = 0;
1573
1574 if (trace->show_tstamp)
1575 printed = trace__fprintf_tstamp(trace, tstamp, fp);
1576 if (trace->show_duration)
1577 printed += fprintf_duration(duration, duration_calculated, fp);
1578 return printed + trace__fprintf_comm_tid(trace, thread, fp);
1579 }
1580
1581 static int trace__process_event(struct trace *trace, struct machine *machine,
1582 union perf_event *event, struct perf_sample *sample)
1583 {
1584 int ret = 0;
1585
1586 switch (event->header.type) {
1587 case PERF_RECORD_LOST:
1588 color_fprintf(trace->output, PERF_COLOR_RED,
1589 "LOST %" PRIu64 " events!\n", event->lost.lost);
1590 ret = machine__process_lost_event(machine, event, sample);
1591 break;
1592 default:
1593 ret = machine__process_event(machine, event, sample);
1594 break;
1595 }
1596
1597 return ret;
1598 }
1599
1600 static int trace__tool_process(struct perf_tool *tool,
1601 union perf_event *event,
1602 struct perf_sample *sample,
1603 struct machine *machine)
1604 {
1605 struct trace *trace = container_of(tool, struct trace, tool);
1606 return trace__process_event(trace, machine, event, sample);
1607 }
1608
1609 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1610 {
1611 struct machine *machine = vmachine;
1612
1613 if (machine->kptr_restrict_warned)
1614 return NULL;
1615
1616 if (symbol_conf.kptr_restrict) {
1617 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1618 "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1619 "Kernel samples will not be resolved.\n");
1620 machine->kptr_restrict_warned = true;
1621 return NULL;
1622 }
1623
1624 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1625 }
1626
1627 static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
1628 {
1629 int err = symbol__init(NULL);
1630
1631 if (err)
1632 return err;
1633
1634 trace->host = machine__new_host();
1635 if (trace->host == NULL)
1636 return -ENOMEM;
1637
1638 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1639 if (err < 0)
1640 goto out;
1641
1642 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1643 evlist->core.threads, trace__tool_process,
1644 true, false, 1);
1645 out:
1646 if (err)
1647 symbol__exit();
1648
1649 return err;
1650 }
1651
1652 static void trace__symbols__exit(struct trace *trace)
1653 {
1654 machine__exit(trace->host);
1655 trace->host = NULL;
1656
1657 symbol__exit();
1658 }
1659
1660 static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1661 {
1662 int idx;
1663
1664 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1665 nr_args = sc->fmt->nr_args;
1666
1667 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1668 if (sc->arg_fmt == NULL)
1669 return -1;
1670
1671 for (idx = 0; idx < nr_args; ++idx) {
1672 if (sc->fmt)
1673 sc->arg_fmt[idx] = sc->fmt->arg[idx];
1674 }
1675
1676 sc->nr_args = nr_args;
1677 return 0;
1678 }
1679
1680 static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
1681 { .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, },
1682 { .name = "vector", .scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, },
1683 };
1684
1685 static int syscall_arg_fmt__cmp(const void *name, const void *fmtp)
1686 {
1687 const struct syscall_arg_fmt *fmt = fmtp;
1688 return strcmp(name, fmt->name);
1689 }
1690
1691 static struct syscall_arg_fmt *
1692 __syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name)
1693 {
1694 return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp);
1695 }
1696
1697 static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
1698 {
1699 const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name);
1700 return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
1701 }
1702
1703 static struct tep_format_field *
1704 syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field)
1705 {
1706 struct tep_format_field *last_field = NULL;
1707 int len;
1708
1709 for (; field; field = field->next, ++arg) {
1710 last_field = field;
1711
1712 if (arg->scnprintf)
1713 continue;
1714
1715 len = strlen(field->name);
1716
1717 if (strcmp(field->type, "const char *") == 0 &&
1718 ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
1719 strstr(field->name, "path") != NULL))
1720 arg->scnprintf = SCA_FILENAME;
1721 else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
1722 arg->scnprintf = SCA_PTR;
1723 else if (strcmp(field->type, "pid_t") == 0)
1724 arg->scnprintf = SCA_PID;
1725 else if (strcmp(field->type, "umode_t") == 0)
1726 arg->scnprintf = SCA_MODE_T;
1727 else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstr(field->type, "char")) {
1728 arg->scnprintf = SCA_CHAR_ARRAY;
1729 arg->nr_entries = field->arraylen;
1730 } else if ((strcmp(field->type, "int") == 0 ||
1731 strcmp(field->type, "unsigned int") == 0 ||
1732 strcmp(field->type, "long") == 0) &&
1733 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
1734
1735
1736
1737
1738
1739
1740
1741 arg->scnprintf = SCA_FD;
1742 } else {
1743 struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name);
1744
1745 if (fmt) {
1746 arg->scnprintf = fmt->scnprintf;
1747 arg->strtoul = fmt->strtoul;
1748 }
1749 }
1750 }
1751
1752 return last_field;
1753 }
1754
1755 static int syscall__set_arg_fmts(struct syscall *sc)
1756 {
1757 struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args);
1758
1759 if (last_field)
1760 sc->args_size = last_field->offset + last_field->size;
1761
1762 return 0;
1763 }
1764
1765 static int trace__read_syscall_info(struct trace *trace, int id)
1766 {
1767 char tp_name[128];
1768 struct syscall *sc;
1769 const char *name = syscalltbl__name(trace->sctbl, id);
1770
1771 #ifdef HAVE_SYSCALL_TABLE_SUPPORT
1772 if (trace->syscalls.table == NULL) {
1773 trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
1774 if (trace->syscalls.table == NULL)
1775 return -ENOMEM;
1776 }
1777 #else
1778 if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
1779
1780 struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1781
1782 if (table == NULL)
1783 return -ENOMEM;
1784
1785
1786 if (trace->syscalls.table == NULL)
1787 memset(table, 0, (id + 1) * sizeof(*sc));
1788 else
1789 memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
1790
1791 trace->syscalls.table = table;
1792 trace->sctbl->syscalls.max_id = id;
1793 }
1794 #endif
1795 sc = trace->syscalls.table + id;
1796 if (sc->nonexistent)
1797 return 0;
1798
1799 if (name == NULL) {
1800 sc->nonexistent = true;
1801 return 0;
1802 }
1803
1804 sc->name = name;
1805 sc->fmt = syscall_fmt__find(sc->name);
1806
1807 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1808 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1809
1810 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1811 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1812 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1813 }
1814
1815 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1816 return -ENOMEM;
1817
1818 if (IS_ERR(sc->tp_format))
1819 return PTR_ERR(sc->tp_format);
1820
1821 sc->args = sc->tp_format->format.fields;
1822
1823
1824
1825
1826
1827 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1828 sc->args = sc->args->next;
1829 --sc->nr_args;
1830 }
1831
1832 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1833 sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
1834
1835 return syscall__set_arg_fmts(sc);
1836 }
1837
1838 static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
1839 {
1840 struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
1841
1842 if (fmt != NULL) {
1843 syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields);
1844 return 0;
1845 }
1846
1847 return -ENOMEM;
1848 }
1849
1850 static int intcmp(const void *a, const void *b)
1851 {
1852 const int *one = a, *another = b;
1853
1854 return *one - *another;
1855 }
1856
1857 static int trace__validate_ev_qualifier(struct trace *trace)
1858 {
1859 int err = 0;
1860 bool printed_invalid_prefix = false;
1861 struct str_node *pos;
1862 size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
1863
1864 trace->ev_qualifier_ids.entries = malloc(nr_allocated *
1865 sizeof(trace->ev_qualifier_ids.entries[0]));
1866
1867 if (trace->ev_qualifier_ids.entries == NULL) {
1868 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1869 trace->output);
1870 err = -EINVAL;
1871 goto out;
1872 }
1873
1874 strlist__for_each_entry(pos, trace->ev_qualifier) {
1875 const char *sc = pos->s;
1876 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1877
1878 if (id < 0) {
1879 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1880 if (id >= 0)
1881 goto matches;
1882
1883 if (!printed_invalid_prefix) {
1884 pr_debug("Skipping unknown syscalls: ");
1885 printed_invalid_prefix = true;
1886 } else {
1887 pr_debug(", ");
1888 }
1889
1890 pr_debug("%s", sc);
1891 continue;
1892 }
1893 matches:
1894 trace->ev_qualifier_ids.entries[nr_used++] = id;
1895 if (match_next == -1)
1896 continue;
1897
1898 while (1) {
1899 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1900 if (id < 0)
1901 break;
1902 if (nr_allocated == nr_used) {
1903 void *entries;
1904
1905 nr_allocated += 8;
1906 entries = realloc(trace->ev_qualifier_ids.entries,
1907 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1908 if (entries == NULL) {
1909 err = -ENOMEM;
1910 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1911 goto out_free;
1912 }
1913 trace->ev_qualifier_ids.entries = entries;
1914 }
1915 trace->ev_qualifier_ids.entries[nr_used++] = id;
1916 }
1917 }
1918
1919 trace->ev_qualifier_ids.nr = nr_used;
1920 qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
1921 out:
1922 if (printed_invalid_prefix)
1923 pr_debug("\n");
1924 return err;
1925 out_free:
1926 zfree(&trace->ev_qualifier_ids.entries);
1927 trace->ev_qualifier_ids.nr = 0;
1928 goto out;
1929 }
1930
1931 static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
1932 {
1933 bool in_ev_qualifier;
1934
1935 if (trace->ev_qualifier_ids.nr == 0)
1936 return true;
1937
1938 in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
1939 trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
1940
1941 if (in_ev_qualifier)
1942 return !trace->not_ev_qualifier;
1943
1944 return trace->not_ev_qualifier;
1945 }
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955 unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1956 {
1957 unsigned long val;
1958 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
1959
1960 memcpy(&val, p, sizeof(val));
1961 return val;
1962 }
1963
1964 static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1965 struct syscall_arg *arg)
1966 {
1967 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1968 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1969
1970 return scnprintf(bf, size, "arg%d: ", arg->idx);
1971 }
1972
1973
1974
1975
1976
1977
1978 static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val)
1979 {
1980 if (fmt && fmt->mask_val)
1981 return fmt->mask_val(arg, val);
1982
1983 return val;
1984 }
1985
1986 static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size,
1987 struct syscall_arg *arg, unsigned long val)
1988 {
1989 if (fmt && fmt->scnprintf) {
1990 arg->val = val;
1991 if (fmt->parm)
1992 arg->parm = fmt->parm;
1993 return fmt->scnprintf(bf, size, arg);
1994 }
1995 return scnprintf(bf, size, "%ld", val);
1996 }
1997
1998 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1999 unsigned char *args, void *augmented_args, int augmented_args_size,
2000 struct trace *trace, struct thread *thread)
2001 {
2002 size_t printed = 0;
2003 unsigned long val;
2004 u8 bit = 1;
2005 struct syscall_arg arg = {
2006 .args = args,
2007 .augmented = {
2008 .size = augmented_args_size,
2009 .args = augmented_args,
2010 },
2011 .idx = 0,
2012 .mask = 0,
2013 .trace = trace,
2014 .thread = thread,
2015 .show_string_prefix = trace->show_string_prefix,
2016 };
2017 struct thread_trace *ttrace = thread__priv(thread);
2018
2019
2020
2021
2022
2023
2024 ttrace->ret_scnprintf = NULL;
2025
2026 if (sc->args != NULL) {
2027 struct tep_format_field *field;
2028
2029 for (field = sc->args; field;
2030 field = field->next, ++arg.idx, bit <<= 1) {
2031 if (arg.mask & bit)
2032 continue;
2033
2034 arg.fmt = &sc->arg_fmt[arg.idx];
2035 val = syscall_arg__val(&arg, arg.idx);
2036
2037
2038
2039
2040 val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val);
2041
2042
2043
2044
2045
2046
2047 if (val == 0 &&
2048 !trace->show_zeros &&
2049 !(sc->arg_fmt &&
2050 (sc->arg_fmt[arg.idx].show_zero ||
2051 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
2052 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
2053 sc->arg_fmt[arg.idx].parm))
2054 continue;
2055
2056 printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2057
2058 if (trace->show_arg_names)
2059 printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2060
2061 printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
2062 bf + printed, size - printed, &arg, val);
2063 }
2064 } else if (IS_ERR(sc->tp_format)) {
2065
2066
2067
2068
2069
2070 while (arg.idx < sc->nr_args) {
2071 if (arg.mask & bit)
2072 goto next_arg;
2073 val = syscall_arg__val(&arg, arg.idx);
2074 if (printed)
2075 printed += scnprintf(bf + printed, size - printed, ", ");
2076 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
2077 printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val);
2078 next_arg:
2079 ++arg.idx;
2080 bit <<= 1;
2081 }
2082 }
2083
2084 return printed;
2085 }
2086
2087 typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
2088 union perf_event *event,
2089 struct perf_sample *sample);
2090
2091 static struct syscall *trace__syscall_info(struct trace *trace,
2092 struct evsel *evsel, int id)
2093 {
2094 int err = 0;
2095
2096 if (id < 0) {
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108 if (verbose > 1) {
2109 static u64 n;
2110 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
2111 id, evsel__name(evsel), ++n);
2112 }
2113 return NULL;
2114 }
2115
2116 err = -EINVAL;
2117
2118 #ifdef HAVE_SYSCALL_TABLE_SUPPORT
2119 if (id > trace->sctbl->syscalls.max_id) {
2120 #else
2121 if (id >= trace->sctbl->syscalls.max_id) {
2122
2123
2124
2125
2126
2127 err = trace__read_syscall_info(trace, id);
2128 if (err)
2129 #endif
2130 goto out_cant_read;
2131 }
2132
2133 if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
2134 (err = trace__read_syscall_info(trace, id)) != 0)
2135 goto out_cant_read;
2136
2137 if (trace->syscalls.table[id].name == NULL) {
2138 if (trace->syscalls.table[id].nonexistent)
2139 return NULL;
2140 goto out_cant_read;
2141 }
2142
2143 return &trace->syscalls.table[id];
2144
2145 out_cant_read:
2146 if (verbose > 0) {
2147 char sbuf[STRERR_BUFSIZE];
2148 fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
2149 if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
2150 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
2151 fputs(" information\n", trace->output);
2152 }
2153 return NULL;
2154 }
2155
2156 struct syscall_stats {
2157 struct stats stats;
2158 u64 nr_failures;
2159 int max_errno;
2160 u32 *errnos;
2161 };
2162
2163 static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
2164 int id, struct perf_sample *sample, long err, bool errno_summary)
2165 {
2166 struct int_node *inode;
2167 struct syscall_stats *stats;
2168 u64 duration = 0;
2169
2170 inode = intlist__findnew(ttrace->syscall_stats, id);
2171 if (inode == NULL)
2172 return;
2173
2174 stats = inode->priv;
2175 if (stats == NULL) {
2176 stats = malloc(sizeof(*stats));
2177 if (stats == NULL)
2178 return;
2179
2180 stats->nr_failures = 0;
2181 stats->max_errno = 0;
2182 stats->errnos = NULL;
2183 init_stats(&stats->stats);
2184 inode->priv = stats;
2185 }
2186
2187 if (ttrace->entry_time && sample->time > ttrace->entry_time)
2188 duration = sample->time - ttrace->entry_time;
2189
2190 update_stats(&stats->stats, duration);
2191
2192 if (err < 0) {
2193 ++stats->nr_failures;
2194
2195 if (!errno_summary)
2196 return;
2197
2198 err = -err;
2199 if (err > stats->max_errno) {
2200 u32 *new_errnos = realloc(stats->errnos, err * sizeof(u32));
2201
2202 if (new_errnos) {
2203 memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32));
2204 } else {
2205 pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n",
2206 thread__comm_str(thread), thread->pid_, thread->tid);
2207 return;
2208 }
2209
2210 stats->errnos = new_errnos;
2211 stats->max_errno = err;
2212 }
2213
2214 ++stats->errnos[err - 1];
2215 }
2216 }
2217
2218 static int trace__printf_interrupted_entry(struct trace *trace)
2219 {
2220 struct thread_trace *ttrace;
2221 size_t printed;
2222 int len;
2223
2224 if (trace->failure_only || trace->current == NULL)
2225 return 0;
2226
2227 ttrace = thread__priv(trace->current);
2228
2229 if (!ttrace->entry_pending)
2230 return 0;
2231
2232 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
2233 printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
2234
2235 if (len < trace->args_alignment - 4)
2236 printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
2237
2238 printed += fprintf(trace->output, " ...\n");
2239
2240 ttrace->entry_pending = false;
2241 ++trace->nr_events_printed;
2242
2243 return printed;
2244 }
2245
2246 static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
2247 struct perf_sample *sample, struct thread *thread)
2248 {
2249 int printed = 0;
2250
2251 if (trace->print_sample) {
2252 double ts = (double)sample->time / NSEC_PER_MSEC;
2253
2254 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
2255 evsel__name(evsel), ts,
2256 thread__comm_str(thread),
2257 sample->pid, sample->tid, sample->cpu);
2258 }
2259
2260 return printed;
2261 }
2262
2263 static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
2264 {
2265 void *augmented_args = NULL;
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280 int args_size = raw_augmented_args_size ?: sc->args_size;
2281
2282 *augmented_args_size = sample->raw_size - args_size;
2283 if (*augmented_args_size > 0)
2284 augmented_args = sample->raw_data + args_size;
2285
2286 return augmented_args;
2287 }
2288
2289 static void syscall__exit(struct syscall *sc)
2290 {
2291 if (!sc)
2292 return;
2293
2294 free(sc->arg_fmt);
2295 }
2296
2297 static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
2298 union perf_event *event __maybe_unused,
2299 struct perf_sample *sample)
2300 {
2301 char *msg;
2302 void *args;
2303 int printed = 0;
2304 struct thread *thread;
2305 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2306 int augmented_args_size = 0;
2307 void *augmented_args = NULL;
2308 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2309 struct thread_trace *ttrace;
2310
2311 if (sc == NULL)
2312 return -1;
2313
2314 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2315 ttrace = thread__trace(thread, trace->output);
2316 if (ttrace == NULL)
2317 goto out_put;
2318
2319 trace__fprintf_sample(trace, evsel, sample, thread);
2320
2321 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2322
2323 if (ttrace->entry_str == NULL) {
2324 ttrace->entry_str = malloc(trace__entry_str_size);
2325 if (!ttrace->entry_str)
2326 goto out_put;
2327 }
2328
2329 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
2330 trace__printf_interrupted_entry(trace);
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341 if (evsel != trace->syscalls.events.sys_enter)
2342 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2343 ttrace->entry_time = sample->time;
2344 msg = ttrace->entry_str;
2345 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
2346
2347 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
2348 args, augmented_args, augmented_args_size, trace, thread);
2349
2350 if (sc->is_exit) {
2351 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
2352 int alignment = 0;
2353
2354 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
2355 printed = fprintf(trace->output, "%s)", ttrace->entry_str);
2356 if (trace->args_alignment > printed)
2357 alignment = trace->args_alignment - printed;
2358 fprintf(trace->output, "%*s= ?\n", alignment, " ");
2359 }
2360 } else {
2361 ttrace->entry_pending = true;
2362
2363 ttrace->filename.pending_open = false;
2364 }
2365
2366 if (trace->current != thread) {
2367 thread__put(trace->current);
2368 trace->current = thread__get(thread);
2369 }
2370 err = 0;
2371 out_put:
2372 thread__put(thread);
2373 return err;
2374 }
2375
2376 static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
2377 struct perf_sample *sample)
2378 {
2379 struct thread_trace *ttrace;
2380 struct thread *thread;
2381 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2382 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2383 char msg[1024];
2384 void *args, *augmented_args = NULL;
2385 int augmented_args_size;
2386
2387 if (sc == NULL)
2388 return -1;
2389
2390 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2391 ttrace = thread__trace(thread, trace->output);
2392
2393
2394
2395
2396 if (ttrace == NULL)
2397 goto out_put;
2398
2399 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2400 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2401 syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
2402 fprintf(trace->output, "%s", msg);
2403 err = 0;
2404 out_put:
2405 thread__put(thread);
2406 return err;
2407 }
2408
2409 static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
2410 struct perf_sample *sample,
2411 struct callchain_cursor *cursor)
2412 {
2413 struct addr_location al;
2414 int max_stack = evsel->core.attr.sample_max_stack ?
2415 evsel->core.attr.sample_max_stack :
2416 trace->max_stack;
2417 int err;
2418
2419 if (machine__resolve(trace->host, &al, sample) < 0)
2420 return -1;
2421
2422 err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
2423 addr_location__put(&al);
2424 return err;
2425 }
2426
2427 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
2428 {
2429
2430 const unsigned int print_opts = EVSEL__PRINT_SYM |
2431 EVSEL__PRINT_DSO |
2432 EVSEL__PRINT_UNKNOWN_AS_ADDR;
2433
2434 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
2435 }
2436
2437 static const char *errno_to_name(struct evsel *evsel, int err)
2438 {
2439 struct perf_env *env = evsel__env(evsel);
2440 const char *arch_name = perf_env__arch(env);
2441
2442 return arch_syscalls__strerrno(arch_name, err);
2443 }
2444
2445 static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
2446 union perf_event *event __maybe_unused,
2447 struct perf_sample *sample)
2448 {
2449 long ret;
2450 u64 duration = 0;
2451 bool duration_calculated = false;
2452 struct thread *thread;
2453 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
2454 int alignment = trace->args_alignment;
2455 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2456 struct thread_trace *ttrace;
2457
2458 if (sc == NULL)
2459 return -1;
2460
2461 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2462 ttrace = thread__trace(thread, trace->output);
2463 if (ttrace == NULL)
2464 goto out_put;
2465
2466 trace__fprintf_sample(trace, evsel, sample, thread);
2467
2468 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2469
2470 if (trace->summary)
2471 thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
2472
2473 if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2474 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2475 ttrace->filename.pending_open = false;
2476 ++trace->stats.vfs_getname;
2477 }
2478
2479 if (ttrace->entry_time) {
2480 duration = sample->time - ttrace->entry_time;
2481 if (trace__filter_duration(trace, duration))
2482 goto out;
2483 duration_calculated = true;
2484 } else if (trace->duration_filter)
2485 goto out;
2486
2487 if (sample->callchain) {
2488 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2489 if (callchain_ret == 0) {
2490 if (callchain_cursor.nr < trace->min_stack)
2491 goto out;
2492 callchain_ret = 1;
2493 }
2494 }
2495
2496 if (trace->summary_only || (ret >= 0 && trace->failure_only))
2497 goto out;
2498
2499 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
2500
2501 if (ttrace->entry_pending) {
2502 printed = fprintf(trace->output, "%s", ttrace->entry_str);
2503 } else {
2504 printed += fprintf(trace->output, " ... [");
2505 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2506 printed += 9;
2507 printed += fprintf(trace->output, "]: %s()", sc->name);
2508 }
2509
2510 printed++;
2511
2512 if (alignment > printed)
2513 alignment -= printed;
2514 else
2515 alignment = 0;
2516
2517 fprintf(trace->output, ")%*s= ", alignment, " ");
2518
2519 if (sc->fmt == NULL) {
2520 if (ret < 0)
2521 goto errno_print;
2522 signed_print:
2523 fprintf(trace->output, "%ld", ret);
2524 } else if (ret < 0) {
2525 errno_print: {
2526 char bf[STRERR_BUFSIZE];
2527 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
2528 *e = errno_to_name(evsel, -ret);
2529
2530 fprintf(trace->output, "-1 %s (%s)", e, emsg);
2531 }
2532 } else if (ret == 0 && sc->fmt->timeout)
2533 fprintf(trace->output, "0 (Timeout)");
2534 else if (ttrace->ret_scnprintf) {
2535 char bf[1024];
2536 struct syscall_arg arg = {
2537 .val = ret,
2538 .thread = thread,
2539 .trace = trace,
2540 };
2541 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
2542 ttrace->ret_scnprintf = NULL;
2543 fprintf(trace->output, "%s", bf);
2544 } else if (sc->fmt->hexret)
2545 fprintf(trace->output, "%#lx", ret);
2546 else if (sc->fmt->errpid) {
2547 struct thread *child = machine__find_thread(trace->host, ret, ret);
2548
2549 if (child != NULL) {
2550 fprintf(trace->output, "%ld", ret);
2551 if (child->comm_set)
2552 fprintf(trace->output, " (%s)", thread__comm_str(child));
2553 thread__put(child);
2554 }
2555 } else
2556 goto signed_print;
2557
2558 fputc('\n', trace->output);
2559
2560
2561
2562
2563
2564 if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
2565 interrupted = true;
2566
2567 if (callchain_ret > 0)
2568 trace__fprintf_callchain(trace, sample);
2569 else if (callchain_ret < 0)
2570 pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2571 out:
2572 ttrace->entry_pending = false;
2573 err = 0;
2574 out_put:
2575 thread__put(thread);
2576 return err;
2577 }
2578
2579 static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
2580 union perf_event *event __maybe_unused,
2581 struct perf_sample *sample)
2582 {
2583 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2584 struct thread_trace *ttrace;
2585 size_t filename_len, entry_str_len, to_move;
2586 ssize_t remaining_space;
2587 char *pos;
2588 const char *filename = evsel__rawptr(evsel, sample, "pathname");
2589
2590 if (!thread)
2591 goto out;
2592
2593 ttrace = thread__priv(thread);
2594 if (!ttrace)
2595 goto out_put;
2596
2597 filename_len = strlen(filename);
2598 if (filename_len == 0)
2599 goto out_put;
2600
2601 if (ttrace->filename.namelen < filename_len) {
2602 char *f = realloc(ttrace->filename.name, filename_len + 1);
2603
2604 if (f == NULL)
2605 goto out_put;
2606
2607 ttrace->filename.namelen = filename_len;
2608 ttrace->filename.name = f;
2609 }
2610
2611 strcpy(ttrace->filename.name, filename);
2612 ttrace->filename.pending_open = true;
2613
2614 if (!ttrace->filename.ptr)
2615 goto out_put;
2616
2617 entry_str_len = strlen(ttrace->entry_str);
2618 remaining_space = trace__entry_str_size - entry_str_len - 1;
2619 if (remaining_space <= 0)
2620 goto out_put;
2621
2622 if (filename_len > (size_t)remaining_space) {
2623 filename += filename_len - remaining_space;
2624 filename_len = remaining_space;
2625 }
2626
2627 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1;
2628 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2629 memmove(pos + filename_len, pos, to_move);
2630 memcpy(pos, filename, filename_len);
2631
2632 ttrace->filename.ptr = 0;
2633 ttrace->filename.entry_str_pos = 0;
2634 out_put:
2635 thread__put(thread);
2636 out:
2637 return 0;
2638 }
2639
2640 static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
2641 union perf_event *event __maybe_unused,
2642 struct perf_sample *sample)
2643 {
2644 u64 runtime = evsel__intval(evsel, sample, "runtime");
2645 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2646 struct thread *thread = machine__findnew_thread(trace->host,
2647 sample->pid,
2648 sample->tid);
2649 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2650
2651 if (ttrace == NULL)
2652 goto out_dump;
2653
2654 ttrace->runtime_ms += runtime_ms;
2655 trace->runtime_ms += runtime_ms;
2656 out_put:
2657 thread__put(thread);
2658 return 0;
2659
2660 out_dump:
2661 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2662 evsel->name,
2663 evsel__strval(evsel, sample, "comm"),
2664 (pid_t)evsel__intval(evsel, sample, "pid"),
2665 runtime,
2666 evsel__intval(evsel, sample, "vruntime"));
2667 goto out_put;
2668 }
2669
2670 static int bpf_output__printer(enum binary_printer_ops op,
2671 unsigned int val, void *extra __maybe_unused, FILE *fp)
2672 {
2673 unsigned char ch = (unsigned char)val;
2674
2675 switch (op) {
2676 case BINARY_PRINT_CHAR_DATA:
2677 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
2678 case BINARY_PRINT_DATA_BEGIN:
2679 case BINARY_PRINT_LINE_BEGIN:
2680 case BINARY_PRINT_ADDR:
2681 case BINARY_PRINT_NUM_DATA:
2682 case BINARY_PRINT_NUM_PAD:
2683 case BINARY_PRINT_SEP:
2684 case BINARY_PRINT_CHAR_PAD:
2685 case BINARY_PRINT_LINE_END:
2686 case BINARY_PRINT_DATA_END:
2687 default:
2688 break;
2689 }
2690
2691 return 0;
2692 }
2693
2694 static void bpf_output__fprintf(struct trace *trace,
2695 struct perf_sample *sample)
2696 {
2697 binary__fprintf(sample->raw_data, sample->raw_size, 8,
2698 bpf_output__printer, NULL, trace->output);
2699 ++trace->nr_events_printed;
2700 }
2701
2702 static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample,
2703 struct thread *thread, void *augmented_args, int augmented_args_size)
2704 {
2705 char bf[2048];
2706 size_t size = sizeof(bf);
2707 struct tep_format_field *field = evsel->tp_format->format.fields;
2708 struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
2709 size_t printed = 0;
2710 unsigned long val;
2711 u8 bit = 1;
2712 struct syscall_arg syscall_arg = {
2713 .augmented = {
2714 .size = augmented_args_size,
2715 .args = augmented_args,
2716 },
2717 .idx = 0,
2718 .mask = 0,
2719 .trace = trace,
2720 .thread = thread,
2721 .show_string_prefix = trace->show_string_prefix,
2722 };
2723
2724 for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) {
2725 if (syscall_arg.mask & bit)
2726 continue;
2727
2728 syscall_arg.len = 0;
2729 syscall_arg.fmt = arg;
2730 if (field->flags & TEP_FIELD_IS_ARRAY) {
2731 int offset = field->offset;
2732
2733 if (field->flags & TEP_FIELD_IS_DYNAMIC) {
2734 offset = format_field__intval(field, sample, evsel->needs_swap);
2735 syscall_arg.len = offset >> 16;
2736 offset &= 0xffff;
2737 if (field->flags & TEP_FIELD_IS_RELATIVE)
2738 offset += field->offset + field->size;
2739 }
2740
2741 val = (uintptr_t)(sample->raw_data + offset);
2742 } else
2743 val = format_field__intval(field, sample, evsel->needs_swap);
2744
2745
2746
2747
2748 val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
2749
2750
2751
2752
2753
2754
2755 if (val == 0 &&
2756 !trace->show_zeros &&
2757 !((arg->show_zero ||
2758 arg->scnprintf == SCA_STRARRAY ||
2759 arg->scnprintf == SCA_STRARRAYS) &&
2760 arg->parm))
2761 continue;
2762
2763 printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2764
2765
2766
2767
2768
2769 if (1 || trace->show_arg_names)
2770 printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2771
2772 printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
2773 }
2774
2775 return printed + fprintf(trace->output, "%s", bf);
2776 }
2777
2778 static int trace__event_handler(struct trace *trace, struct evsel *evsel,
2779 union perf_event *event __maybe_unused,
2780 struct perf_sample *sample)
2781 {
2782 struct thread *thread;
2783 int callchain_ret = 0;
2784
2785
2786
2787
2788
2789
2790 if (evsel->disabled)
2791 return 0;
2792
2793 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2794
2795 if (sample->callchain) {
2796 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2797 if (callchain_ret == 0) {
2798 if (callchain_cursor.nr < trace->min_stack)
2799 goto out;
2800 callchain_ret = 1;
2801 }
2802 }
2803
2804 trace__printf_interrupted_entry(trace);
2805 trace__fprintf_tstamp(trace, sample->time, trace->output);
2806
2807 if (trace->trace_syscalls && trace->show_duration)
2808 fprintf(trace->output, "( ): ");
2809
2810 if (thread)
2811 trace__fprintf_comm_tid(trace, thread, trace->output);
2812
2813 if (evsel == trace->syscalls.events.augmented) {
2814 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2815 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2816
2817 if (sc) {
2818 fprintf(trace->output, "%s(", sc->name);
2819 trace__fprintf_sys_enter(trace, evsel, sample);
2820 fputc(')', trace->output);
2821 goto newline;
2822 }
2823
2824
2825
2826
2827
2828
2829 }
2830
2831 fprintf(trace->output, "%s(", evsel->name);
2832
2833 if (evsel__is_bpf_output(evsel)) {
2834 bpf_output__fprintf(trace, sample);
2835 } else if (evsel->tp_format) {
2836 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2837 trace__fprintf_sys_enter(trace, evsel, sample)) {
2838 if (trace->libtraceevent_print) {
2839 event_format__fprintf(evsel->tp_format, sample->cpu,
2840 sample->raw_data, sample->raw_size,
2841 trace->output);
2842 } else {
2843 trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0);
2844 }
2845 }
2846 }
2847
2848 newline:
2849 fprintf(trace->output, ")\n");
2850
2851 if (callchain_ret > 0)
2852 trace__fprintf_callchain(trace, sample);
2853 else if (callchain_ret < 0)
2854 pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2855
2856 ++trace->nr_events_printed;
2857
2858 if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
2859 evsel__disable(evsel);
2860 evsel__close(evsel);
2861 }
2862 out:
2863 thread__put(thread);
2864 return 0;
2865 }
2866
2867 static void print_location(FILE *f, struct perf_sample *sample,
2868 struct addr_location *al,
2869 bool print_dso, bool print_sym)
2870 {
2871
2872 if ((verbose > 0 || print_dso) && al->map)
2873 fprintf(f, "%s@", al->map->dso->long_name);
2874
2875 if ((verbose > 0 || print_sym) && al->sym)
2876 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2877 al->addr - al->sym->start);
2878 else if (al->map)
2879 fprintf(f, "0x%" PRIx64, al->addr);
2880 else
2881 fprintf(f, "0x%" PRIx64, sample->addr);
2882 }
2883
2884 static int trace__pgfault(struct trace *trace,
2885 struct evsel *evsel,
2886 union perf_event *event __maybe_unused,
2887 struct perf_sample *sample)
2888 {
2889 struct thread *thread;
2890 struct addr_location al;
2891 char map_type = 'd';
2892 struct thread_trace *ttrace;
2893 int err = -1;
2894 int callchain_ret = 0;
2895
2896 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2897
2898 if (sample->callchain) {
2899 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2900 if (callchain_ret == 0) {
2901 if (callchain_cursor.nr < trace->min_stack)
2902 goto out_put;
2903 callchain_ret = 1;
2904 }
2905 }
2906
2907 ttrace = thread__trace(thread, trace->output);
2908 if (ttrace == NULL)
2909 goto out_put;
2910
2911 if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2912 ttrace->pfmaj++;
2913 else
2914 ttrace->pfmin++;
2915
2916 if (trace->summary_only)
2917 goto out;
2918
2919 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
2920
2921 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
2922
2923 fprintf(trace->output, "%sfault [",
2924 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2925 "maj" : "min");
2926
2927 print_location(trace->output, sample, &al, false, true);
2928
2929 fprintf(trace->output, "] => ");
2930
2931 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2932
2933 if (!al.map) {
2934 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2935
2936 if (al.map)
2937 map_type = 'x';
2938 else
2939 map_type = '?';
2940 }
2941
2942 print_location(trace->output, sample, &al, true, false);
2943
2944 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2945
2946 if (callchain_ret > 0)
2947 trace__fprintf_callchain(trace, sample);
2948 else if (callchain_ret < 0)
2949 pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2950
2951 ++trace->nr_events_printed;
2952 out:
2953 err = 0;
2954 out_put:
2955 thread__put(thread);
2956 return err;
2957 }
2958
2959 static void trace__set_base_time(struct trace *trace,
2960 struct evsel *evsel,
2961 struct perf_sample *sample)
2962 {
2963
2964
2965
2966
2967
2968
2969
2970
2971 if (trace->base_time == 0 && !trace->full_time &&
2972 (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
2973 trace->base_time = sample->time;
2974 }
2975
2976 static int trace__process_sample(struct perf_tool *tool,
2977 union perf_event *event,
2978 struct perf_sample *sample,
2979 struct evsel *evsel,
2980 struct machine *machine __maybe_unused)
2981 {
2982 struct trace *trace = container_of(tool, struct trace, tool);
2983 struct thread *thread;
2984 int err = 0;
2985
2986 tracepoint_handler handler = evsel->handler;
2987
2988 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2989 if (thread && thread__is_filtered(thread))
2990 goto out;
2991
2992 trace__set_base_time(trace, evsel, sample);
2993
2994 if (handler) {
2995 ++trace->nr_events;
2996 handler(trace, evsel, event, sample);
2997 }
2998 out:
2999 thread__put(thread);
3000 return err;
3001 }
3002
3003 static int trace__record(struct trace *trace, int argc, const char **argv)
3004 {
3005 unsigned int rec_argc, i, j;
3006 const char **rec_argv;
3007 const char * const record_args[] = {
3008 "record",
3009 "-R",
3010 "-m", "1024",
3011 "-c", "1",
3012 };
3013 pid_t pid = getpid();
3014 char *filter = asprintf__tp_filter_pids(1, &pid);
3015 const char * const sc_args[] = { "-e", };
3016 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
3017 const char * const majpf_args[] = { "-e", "major-faults" };
3018 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
3019 const char * const minpf_args[] = { "-e", "minor-faults" };
3020 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
3021 int err = -1;
3022
3023
3024 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 3 +
3025 majpf_args_nr + minpf_args_nr + argc;
3026 rec_argv = calloc(rec_argc + 1, sizeof(char *));
3027
3028 if (rec_argv == NULL || filter == NULL)
3029 goto out_free;
3030
3031 j = 0;
3032 for (i = 0; i < ARRAY_SIZE(record_args); i++)
3033 rec_argv[j++] = record_args[i];
3034
3035 if (trace->trace_syscalls) {
3036 for (i = 0; i < sc_args_nr; i++)
3037 rec_argv[j++] = sc_args[i];
3038
3039
3040 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
3041 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
3042 else if (is_valid_tracepoint("syscalls:sys_enter"))
3043 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
3044 else {
3045 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
3046 goto out_free;
3047 }
3048 }
3049
3050 rec_argv[j++] = "--filter";
3051 rec_argv[j++] = filter;
3052
3053 if (trace->trace_pgfaults & TRACE_PFMAJ)
3054 for (i = 0; i < majpf_args_nr; i++)
3055 rec_argv[j++] = majpf_args[i];
3056
3057 if (trace->trace_pgfaults & TRACE_PFMIN)
3058 for (i = 0; i < minpf_args_nr; i++)
3059 rec_argv[j++] = minpf_args[i];
3060
3061 for (i = 0; i < (unsigned int)argc; i++)
3062 rec_argv[j++] = argv[i];
3063
3064 err = cmd_record(j, rec_argv);
3065 out_free:
3066 free(filter);
3067 free(rec_argv);
3068 return err;
3069 }
3070
3071 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
3072
3073 static bool evlist__add_vfs_getname(struct evlist *evlist)
3074 {
3075 bool found = false;
3076 struct evsel *evsel, *tmp;
3077 struct parse_events_error err;
3078 int ret;
3079
3080 parse_events_error__init(&err);
3081 ret = parse_events(evlist, "probe:vfs_getname*", &err);
3082 parse_events_error__exit(&err);
3083 if (ret)
3084 return false;
3085
3086 evlist__for_each_entry_safe(evlist, evsel, tmp) {
3087 if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
3088 continue;
3089
3090 if (evsel__field(evsel, "pathname")) {
3091 evsel->handler = trace__vfs_getname;
3092 found = true;
3093 continue;
3094 }
3095
3096 list_del_init(&evsel->core.node);
3097 evsel->evlist = NULL;
3098 evsel__delete(evsel);
3099 }
3100
3101 return found;
3102 }
3103
3104 static struct evsel *evsel__new_pgfault(u64 config)
3105 {
3106 struct evsel *evsel;
3107 struct perf_event_attr attr = {
3108 .type = PERF_TYPE_SOFTWARE,
3109 .mmap_data = 1,
3110 };
3111
3112 attr.config = config;
3113 attr.sample_period = 1;
3114
3115 event_attr_init(&attr);
3116
3117 evsel = evsel__new(&attr);
3118 if (evsel)
3119 evsel->handler = trace__pgfault;
3120
3121 return evsel;
3122 }
3123
3124 static void evlist__free_syscall_tp_fields(struct evlist *evlist)
3125 {
3126 struct evsel *evsel;
3127
3128 evlist__for_each_entry(evlist, evsel) {
3129 struct evsel_trace *et = evsel->priv;
3130
3131 if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls"))
3132 continue;
3133
3134 free(et->fmt);
3135 free(et);
3136 }
3137 }
3138
3139 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
3140 {
3141 const u32 type = event->header.type;
3142 struct evsel *evsel;
3143
3144 if (type != PERF_RECORD_SAMPLE) {
3145 trace__process_event(trace, trace->host, event, sample);
3146 return;
3147 }
3148
3149 evsel = evlist__id2evsel(trace->evlist, sample->id);
3150 if (evsel == NULL) {
3151 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
3152 return;
3153 }
3154
3155 if (evswitch__discard(&trace->evswitch, evsel))
3156 return;
3157
3158 trace__set_base_time(trace, evsel, sample);
3159
3160 if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
3161 sample->raw_data == NULL) {
3162 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
3163 evsel__name(evsel), sample->tid,
3164 sample->cpu, sample->raw_size);
3165 } else {
3166 tracepoint_handler handler = evsel->handler;
3167 handler(trace, evsel, event, sample);
3168 }
3169
3170 if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
3171 interrupted = true;
3172 }
3173
3174 static int trace__add_syscall_newtp(struct trace *trace)
3175 {
3176 int ret = -1;
3177 struct evlist *evlist = trace->evlist;
3178 struct evsel *sys_enter, *sys_exit;
3179
3180 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
3181 if (sys_enter == NULL)
3182 goto out;
3183
3184 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
3185 goto out_delete_sys_enter;
3186
3187 sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
3188 if (sys_exit == NULL)
3189 goto out_delete_sys_enter;
3190
3191 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
3192 goto out_delete_sys_exit;
3193
3194 evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
3195 evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
3196
3197 evlist__add(evlist, sys_enter);
3198 evlist__add(evlist, sys_exit);
3199
3200 if (callchain_param.enabled && !trace->kernel_syscallchains) {
3201
3202
3203
3204
3205
3206 sys_exit->core.attr.exclude_callchain_kernel = 1;
3207 }
3208
3209 trace->syscalls.events.sys_enter = sys_enter;
3210 trace->syscalls.events.sys_exit = sys_exit;
3211
3212 ret = 0;
3213 out:
3214 return ret;
3215
3216 out_delete_sys_exit:
3217 evsel__delete_priv(sys_exit);
3218 out_delete_sys_enter:
3219 evsel__delete_priv(sys_enter);
3220 goto out;
3221 }
3222
3223 static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
3224 {
3225 int err = -1;
3226 struct evsel *sys_exit;
3227 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
3228 trace->ev_qualifier_ids.nr,
3229 trace->ev_qualifier_ids.entries);
3230
3231 if (filter == NULL)
3232 goto out_enomem;
3233
3234 if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) {
3235 sys_exit = trace->syscalls.events.sys_exit;
3236 err = evsel__append_tp_filter(sys_exit, filter);
3237 }
3238
3239 free(filter);
3240 out:
3241 return err;
3242 out_enomem:
3243 errno = ENOMEM;
3244 goto out;
3245 }
3246
3247 #ifdef HAVE_LIBBPF_SUPPORT
3248 static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
3249 {
3250 if (trace->bpf_obj == NULL)
3251 return NULL;
3252
3253 return bpf_object__find_map_by_name(trace->bpf_obj, name);
3254 }
3255
3256 static void trace__set_bpf_map_filtered_pids(struct trace *trace)
3257 {
3258 trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
3259 }
3260
3261 static void trace__set_bpf_map_syscalls(struct trace *trace)
3262 {
3263 trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
3264 trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
3265 trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
3266 }
3267
3268 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
3269 {
3270 struct bpf_program *pos, *prog = NULL;
3271 const char *sec_name;
3272
3273 if (trace->bpf_obj == NULL)
3274 return NULL;
3275
3276 bpf_object__for_each_program(pos, trace->bpf_obj) {
3277 sec_name = bpf_program__section_name(pos);
3278 if (sec_name && !strcmp(sec_name, name)) {
3279 prog = pos;
3280 break;
3281 }
3282 }
3283
3284 return prog;
3285 }
3286
3287 static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
3288 const char *prog_name, const char *type)
3289 {
3290 struct bpf_program *prog;
3291
3292 if (prog_name == NULL) {
3293 char default_prog_name[256];
3294 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
3295 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3296 if (prog != NULL)
3297 goto out_found;
3298 if (sc->fmt && sc->fmt->alias) {
3299 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
3300 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3301 if (prog != NULL)
3302 goto out_found;
3303 }
3304 goto out_unaugmented;
3305 }
3306
3307 prog = trace__find_bpf_program_by_title(trace, prog_name);
3308
3309 if (prog != NULL) {
3310 out_found:
3311 return prog;
3312 }
3313
3314 pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
3315 prog_name, type, sc->name);
3316 out_unaugmented:
3317 return trace->syscalls.unaugmented_prog;
3318 }
3319
3320 static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
3321 {
3322 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3323
3324 if (sc == NULL)
3325 return;
3326
3327 sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3328 sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit");
3329 }
3330
3331 static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
3332 {
3333 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3334 return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3335 }
3336
3337 static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
3338 {
3339 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3340 return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3341 }
3342
3343 static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
3344 {
3345 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3346 int arg = 0;
3347
3348 if (sc == NULL)
3349 goto out;
3350
3351 for (; arg < sc->nr_args; ++arg) {
3352 entry->string_args_len[arg] = 0;
3353 if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
3354
3355 entry->string_args_len[arg] = PATH_MAX;
3356 }
3357 }
3358 out:
3359 for (; arg < 6; ++arg)
3360 entry->string_args_len[arg] = 0;
3361 }
3362 static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
3363 {
3364 int fd = bpf_map__fd(trace->syscalls.map);
3365 struct bpf_map_syscall_entry value = {
3366 .enabled = !trace->not_ev_qualifier,
3367 };
3368 int err = 0;
3369 size_t i;
3370
3371 for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
3372 int key = trace->ev_qualifier_ids.entries[i];
3373
3374 if (value.enabled) {
3375 trace__init_bpf_map_syscall_args(trace, key, &value);
3376 trace__init_syscall_bpf_progs(trace, key);
3377 }
3378
3379 err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
3380 if (err)
3381 break;
3382 }
3383
3384 return err;
3385 }
3386
3387 static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
3388 {
3389 int fd = bpf_map__fd(trace->syscalls.map);
3390 struct bpf_map_syscall_entry value = {
3391 .enabled = enabled,
3392 };
3393 int err = 0, key;
3394
3395 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3396 if (enabled)
3397 trace__init_bpf_map_syscall_args(trace, key, &value);
3398
3399 err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
3400 if (err)
3401 break;
3402 }
3403
3404 return err;
3405 }
3406
3407 static int trace__init_syscalls_bpf_map(struct trace *trace)
3408 {
3409 bool enabled = true;
3410
3411 if (trace->ev_qualifier_ids.nr)
3412 enabled = trace->not_ev_qualifier;
3413
3414 return __trace__init_syscalls_bpf_map(trace, enabled);
3415 }
3416
3417 static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
3418 {
3419 struct tep_format_field *field, *candidate_field;
3420 int id;
3421
3422
3423
3424
3425 for (field = sc->args; field; field = field->next) {
3426 if (field->flags & TEP_FIELD_IS_POINTER)
3427 goto try_to_find_pair;
3428 }
3429
3430 return NULL;
3431
3432 try_to_find_pair:
3433 for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
3434 struct syscall *pair = trace__syscall_info(trace, NULL, id);
3435 struct bpf_program *pair_prog;
3436 bool is_candidate = false;
3437
3438 if (pair == NULL || pair == sc ||
3439 pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
3440 continue;
3441
3442 for (field = sc->args, candidate_field = pair->args;
3443 field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
3444 bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
3445 candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
3446
3447 if (is_pointer) {
3448 if (!candidate_is_pointer) {
3449
3450 continue;
3451 }
3452 } else {
3453 if (candidate_is_pointer) {
3454
3455 goto next_candidate;
3456 }
3457 continue;
3458 }
3459
3460 if (strcmp(field->type, candidate_field->type))
3461 goto next_candidate;
3462
3463 is_candidate = true;
3464 }
3465
3466 if (!is_candidate)
3467 goto next_candidate;
3468
3469
3470
3471
3472
3473
3474 if (candidate_field) {
3475 for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
3476 if (candidate_field->flags & TEP_FIELD_IS_POINTER)
3477 goto next_candidate;
3478 }
3479
3480 pair_prog = pair->bpf_prog.sys_enter;
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490 if (pair_prog == NULL) {
3491 pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3492 if (pair_prog == trace->syscalls.unaugmented_prog)
3493 goto next_candidate;
3494 }
3495
3496 pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3497 return pair_prog;
3498 next_candidate:
3499 continue;
3500 }
3501
3502 return NULL;
3503 }
3504
3505 static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3506 {
3507 int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
3508 map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
3509 int err = 0, key;
3510
3511 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3512 int prog_fd;
3513
3514 if (!trace__syscall_enabled(trace, key))
3515 continue;
3516
3517 trace__init_syscall_bpf_progs(trace, key);
3518
3519
3520 prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3521 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3522 if (err)
3523 break;
3524 prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3525 err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3526 if (err)
3527 break;
3528 }
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3559 struct syscall *sc = trace__syscall_info(trace, NULL, key);
3560 struct bpf_program *pair_prog;
3561 int prog_fd;
3562
3563 if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3564 continue;
3565
3566
3567
3568
3569
3570 if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
3571 continue;
3572
3573
3574
3575
3576
3577 pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3578 if (pair_prog == NULL)
3579 continue;
3580
3581 sc->bpf_prog.sys_enter = pair_prog;
3582
3583
3584
3585
3586
3587 prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3588 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3589 if (err)
3590 break;
3591 }
3592
3593
3594 return err;
3595 }
3596
3597 static void trace__delete_augmented_syscalls(struct trace *trace)
3598 {
3599 struct evsel *evsel, *tmp;
3600
3601 evlist__remove(trace->evlist, trace->syscalls.events.augmented);
3602 evsel__delete(trace->syscalls.events.augmented);
3603 trace->syscalls.events.augmented = NULL;
3604
3605 evlist__for_each_entry_safe(trace->evlist, tmp, evsel) {
3606 if (evsel->bpf_obj == trace->bpf_obj) {
3607 evlist__remove(trace->evlist, evsel);
3608 evsel__delete(evsel);
3609 }
3610
3611 }
3612
3613 bpf_object__close(trace->bpf_obj);
3614 trace->bpf_obj = NULL;
3615 }
3616 #else
3617 static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
3618 const char *name __maybe_unused)
3619 {
3620 return NULL;
3621 }
3622
3623 static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused)
3624 {
3625 }
3626
3627 static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
3628 {
3629 }
3630
3631 static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
3632 {
3633 return 0;
3634 }
3635
3636 static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
3637 {
3638 return 0;
3639 }
3640
3641 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
3642 const char *name __maybe_unused)
3643 {
3644 return NULL;
3645 }
3646
3647 static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
3648 {
3649 return 0;
3650 }
3651
3652 static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused)
3653 {
3654 }
3655 #endif
3656
3657 static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
3658 {
3659 struct evsel *evsel;
3660
3661 evlist__for_each_entry(trace->evlist, evsel) {
3662 if (evsel == trace->syscalls.events.augmented ||
3663 evsel->bpf_obj == trace->bpf_obj)
3664 continue;
3665
3666 return false;
3667 }
3668
3669 return true;
3670 }
3671
3672 static int trace__set_ev_qualifier_filter(struct trace *trace)
3673 {
3674 if (trace->syscalls.map)
3675 return trace__set_ev_qualifier_bpf_filter(trace);
3676 if (trace->syscalls.events.sys_enter)
3677 return trace__set_ev_qualifier_tp_filter(trace);
3678 return 0;
3679 }
3680
3681 static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
3682 size_t npids __maybe_unused, pid_t *pids __maybe_unused)
3683 {
3684 int err = 0;
3685 #ifdef HAVE_LIBBPF_SUPPORT
3686 bool value = true;
3687 int map_fd = bpf_map__fd(map);
3688 size_t i;
3689
3690 for (i = 0; i < npids; ++i) {
3691 err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
3692 if (err)
3693 break;
3694 }
3695 #endif
3696 return err;
3697 }
3698
3699 static int trace__set_filter_loop_pids(struct trace *trace)
3700 {
3701 unsigned int nr = 1, err;
3702 pid_t pids[32] = {
3703 getpid(),
3704 };
3705 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
3706
3707 while (thread && nr < ARRAY_SIZE(pids)) {
3708 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
3709
3710 if (parent == NULL)
3711 break;
3712
3713 if (!strcmp(thread__comm_str(parent), "sshd") ||
3714 strstarts(thread__comm_str(parent), "gnome-terminal")) {
3715 pids[nr++] = parent->tid;
3716 break;
3717 }
3718 thread = parent;
3719 }
3720
3721 err = evlist__append_tp_filter_pids(trace->evlist, nr, pids);
3722 if (!err && trace->filter_pids.map)
3723 err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
3724
3725 return err;
3726 }
3727
3728 static int trace__set_filter_pids(struct trace *trace)
3729 {
3730 int err = 0;
3731
3732
3733
3734
3735
3736
3737 if (trace->filter_pids.nr > 0) {
3738 err = evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
3739 trace->filter_pids.entries);
3740 if (!err && trace->filter_pids.map) {
3741 err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
3742 trace->filter_pids.entries);
3743 }
3744 } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
3745 err = trace__set_filter_loop_pids(trace);
3746 }
3747
3748 return err;
3749 }
3750
3751 static int __trace__deliver_event(struct trace *trace, union perf_event *event)
3752 {
3753 struct evlist *evlist = trace->evlist;
3754 struct perf_sample sample;
3755 int err = evlist__parse_sample(evlist, event, &sample);
3756
3757 if (err)
3758 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
3759 else
3760 trace__handle_event(trace, event, &sample);
3761
3762 return 0;
3763 }
3764
3765 static int __trace__flush_events(struct trace *trace)
3766 {
3767 u64 first = ordered_events__first_time(&trace->oe.data);
3768 u64 flush = trace->oe.last - NSEC_PER_SEC;
3769
3770
3771 if (first && first < flush)
3772 return ordered_events__flush_time(&trace->oe.data, flush);
3773
3774 return 0;
3775 }
3776
3777 static int trace__flush_events(struct trace *trace)
3778 {
3779 return !trace->sort_events ? 0 : __trace__flush_events(trace);
3780 }
3781
3782 static int trace__deliver_event(struct trace *trace, union perf_event *event)
3783 {
3784 int err;
3785
3786 if (!trace->sort_events)
3787 return __trace__deliver_event(trace, event);
3788
3789 err = evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
3790 if (err && err != -1)
3791 return err;
3792
3793 err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0, NULL);
3794 if (err)
3795 return err;
3796
3797 return trace__flush_events(trace);
3798 }
3799
3800 static int ordered_events__deliver_event(struct ordered_events *oe,
3801 struct ordered_event *event)
3802 {
3803 struct trace *trace = container_of(oe, struct trace, oe.data);
3804
3805 return __trace__deliver_event(trace, event->event);
3806 }
3807
3808 static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
3809 {
3810 struct tep_format_field *field;
3811 struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
3812
3813 if (evsel->tp_format == NULL || fmt == NULL)
3814 return NULL;
3815
3816 for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
3817 if (strcmp(field->name, arg) == 0)
3818 return fmt;
3819
3820 return NULL;
3821 }
3822
3823 static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel)
3824 {
3825 char *tok, *left = evsel->filter, *new_filter = evsel->filter;
3826
3827 while ((tok = strpbrk(left, "=<>!")) != NULL) {
3828 char *right = tok + 1, *right_end;
3829
3830 if (*right == '=')
3831 ++right;
3832
3833 while (isspace(*right))
3834 ++right;
3835
3836 if (*right == '\0')
3837 break;
3838
3839 while (!isalpha(*left))
3840 if (++left == tok) {
3841
3842
3843
3844
3845 return 0;
3846 }
3847
3848 right_end = right + 1;
3849 while (isalnum(*right_end) || *right_end == '_' || *right_end == '|')
3850 ++right_end;
3851
3852 if (isalpha(*right)) {
3853 struct syscall_arg_fmt *fmt;
3854 int left_size = tok - left,
3855 right_size = right_end - right;
3856 char arg[128];
3857
3858 while (isspace(left[left_size - 1]))
3859 --left_size;
3860
3861 scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
3862
3863 fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
3864 if (fmt == NULL) {
3865 pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
3866 arg, evsel->name, evsel->filter);
3867 return -1;
3868 }
3869
3870 pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ",
3871 arg, (int)(right - tok), tok, right_size, right);
3872
3873 if (fmt->strtoul) {
3874 u64 val;
3875 struct syscall_arg syscall_arg = {
3876 .parm = fmt->parm,
3877 };
3878
3879 if (fmt->strtoul(right, right_size, &syscall_arg, &val)) {
3880 char *n, expansion[19];
3881 int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val);
3882 int expansion_offset = right - new_filter;
3883
3884 pr_debug("%s", expansion);
3885
3886 if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) {
3887 pr_debug(" out of memory!\n");
3888 free(new_filter);
3889 return -1;
3890 }
3891 if (new_filter != evsel->filter)
3892 free(new_filter);
3893 left = n + expansion_offset + expansion_lenght;
3894 new_filter = n;
3895 } else {
3896 pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3897 right_size, right, arg, evsel->name, evsel->filter);
3898 return -1;
3899 }
3900 } else {
3901 pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3902 arg, evsel->name, evsel->filter);
3903 return -1;
3904 }
3905
3906 pr_debug("\n");
3907 } else {
3908 left = right_end;
3909 }
3910 }
3911
3912 if (new_filter != evsel->filter) {
3913 pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
3914 evsel__set_filter(evsel, new_filter);
3915 free(new_filter);
3916 }
3917
3918 return 0;
3919 }
3920
3921 static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel)
3922 {
3923 struct evlist *evlist = trace->evlist;
3924 struct evsel *evsel;
3925
3926 evlist__for_each_entry(evlist, evsel) {
3927 if (evsel->filter == NULL)
3928 continue;
3929
3930 if (trace__expand_filter(trace, evsel)) {
3931 *err_evsel = evsel;
3932 return -1;
3933 }
3934 }
3935
3936 return 0;
3937 }
3938
3939 static int trace__run(struct trace *trace, int argc, const char **argv)
3940 {
3941 struct evlist *evlist = trace->evlist;
3942 struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
3943 int err = -1, i;
3944 unsigned long before;
3945 const bool forks = argc > 0;
3946 bool draining = false;
3947
3948 trace->live = true;
3949
3950 if (!trace->raw_augmented_syscalls) {
3951 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
3952 goto out_error_raw_syscalls;
3953
3954 if (trace->trace_syscalls)
3955 trace->vfs_getname = evlist__add_vfs_getname(evlist);
3956 }
3957
3958 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
3959 pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
3960 if (pgfault_maj == NULL)
3961 goto out_error_mem;
3962 evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
3963 evlist__add(evlist, pgfault_maj);
3964 }
3965
3966 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
3967 pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
3968 if (pgfault_min == NULL)
3969 goto out_error_mem;
3970 evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
3971 evlist__add(evlist, pgfault_min);
3972 }
3973
3974
3975 trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid;
3976
3977 if (trace->sched &&
3978 evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
3979 goto out_error_sched_stat_runtime;
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005 if (trace->cgroup)
4006 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
4007
4008 err = evlist__create_maps(evlist, &trace->opts.target);
4009 if (err < 0) {
4010 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
4011 goto out_delete_evlist;
4012 }
4013
4014 err = trace__symbols_init(trace, evlist);
4015 if (err < 0) {
4016 fprintf(trace->output, "Problems initializing symbol libraries!\n");
4017 goto out_delete_evlist;
4018 }
4019
4020 evlist__config(evlist, &trace->opts, &callchain_param);
4021
4022 if (forks) {
4023 err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
4024 if (err < 0) {
4025 fprintf(trace->output, "Couldn't run the workload!\n");
4026 goto out_delete_evlist;
4027 }
4028 workload_pid = evlist->workload.pid;
4029 }
4030
4031 err = evlist__open(evlist);
4032 if (err < 0)
4033 goto out_error_open;
4034
4035 err = bpf__apply_obj_config();
4036 if (err) {
4037 char errbuf[BUFSIZ];
4038
4039 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
4040 pr_err("ERROR: Apply config to BPF failed: %s\n",
4041 errbuf);
4042 goto out_error_open;
4043 }
4044
4045 err = trace__set_filter_pids(trace);
4046 if (err < 0)
4047 goto out_error_mem;
4048
4049 if (trace->syscalls.map)
4050 trace__init_syscalls_bpf_map(trace);
4051
4052 if (trace->syscalls.prog_array.sys_enter)
4053 trace__init_syscalls_bpf_prog_array_maps(trace);
4054
4055 if (trace->ev_qualifier_ids.nr > 0) {
4056 err = trace__set_ev_qualifier_filter(trace);
4057 if (err < 0)
4058 goto out_errno;
4059
4060 if (trace->syscalls.events.sys_exit) {
4061 pr_debug("event qualifier tracepoint filter: %s\n",
4062 trace->syscalls.events.sys_exit->filter);
4063 }
4064 }
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077 trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
4078
4079 err = trace__expand_filters(trace, &evsel);
4080 if (err)
4081 goto out_delete_evlist;
4082 err = evlist__apply_filters(evlist, &evsel);
4083 if (err < 0)
4084 goto out_error_apply_filters;
4085
4086 if (trace->dump.map)
4087 bpf_map__fprintf(trace->dump.map, trace->output);
4088
4089 err = evlist__mmap(evlist, trace->opts.mmap_pages);
4090 if (err < 0)
4091 goto out_error_mmap;
4092
4093 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
4094 evlist__enable(evlist);
4095
4096 if (forks)
4097 evlist__start_workload(evlist);
4098
4099 if (trace->opts.initial_delay) {
4100 usleep(trace->opts.initial_delay * 1000);
4101 evlist__enable(evlist);
4102 }
4103
4104 trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
4105 evlist->core.threads->nr > 1 ||
4106 evlist__first(evlist)->core.attr.inherit;
4107
4108
4109
4110
4111
4112
4113
4114 evlist__for_each_entry(evlist, evsel) {
4115 if (evsel__has_callchain(evsel) &&
4116 evsel->core.attr.sample_max_stack == 0)
4117 evsel->core.attr.sample_max_stack = trace->max_stack;
4118 }
4119 again:
4120 before = trace->nr_events;
4121
4122 for (i = 0; i < evlist->core.nr_mmaps; i++) {
4123 union perf_event *event;
4124 struct mmap *md;
4125
4126 md = &evlist->mmap[i];
4127 if (perf_mmap__read_init(&md->core) < 0)
4128 continue;
4129
4130 while ((event = perf_mmap__read_event(&md->core)) != NULL) {
4131 ++trace->nr_events;
4132
4133 err = trace__deliver_event(trace, event);
4134 if (err)
4135 goto out_disable;
4136
4137 perf_mmap__consume(&md->core);
4138
4139 if (interrupted)
4140 goto out_disable;
4141
4142 if (done && !draining) {
4143 evlist__disable(evlist);
4144 draining = true;
4145 }
4146 }
4147 perf_mmap__read_done(&md->core);
4148 }
4149
4150 if (trace->nr_events == before) {
4151 int timeout = done ? 100 : -1;
4152
4153 if (!draining && evlist__poll(evlist, timeout) > 0) {
4154 if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
4155 draining = true;
4156
4157 goto again;
4158 } else {
4159 if (trace__flush_events(trace))
4160 goto out_disable;
4161 }
4162 } else {
4163 goto again;
4164 }
4165
4166 out_disable:
4167 thread__zput(trace->current);
4168
4169 evlist__disable(evlist);
4170
4171 if (trace->sort_events)
4172 ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
4173
4174 if (!err) {
4175 if (trace->summary)
4176 trace__fprintf_thread_summary(trace, trace->output);
4177
4178 if (trace->show_tool_stats) {
4179 fprintf(trace->output, "Stats:\n "
4180 " vfs_getname : %" PRIu64 "\n"
4181 " proc_getname: %" PRIu64 "\n",
4182 trace->stats.vfs_getname,
4183 trace->stats.proc_getname);
4184 }
4185 }
4186
4187 out_delete_evlist:
4188 trace__symbols__exit(trace);
4189 evlist__free_syscall_tp_fields(evlist);
4190 evlist__delete(evlist);
4191 cgroup__put(trace->cgroup);
4192 trace->evlist = NULL;
4193 trace->live = false;
4194 return err;
4195 {
4196 char errbuf[BUFSIZ];
4197
4198 out_error_sched_stat_runtime:
4199 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
4200 goto out_error;
4201
4202 out_error_raw_syscalls:
4203 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
4204 goto out_error;
4205
4206 out_error_mmap:
4207 evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
4208 goto out_error;
4209
4210 out_error_open:
4211 evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
4212
4213 out_error:
4214 fprintf(trace->output, "%s\n", errbuf);
4215 goto out_delete_evlist;
4216
4217 out_error_apply_filters:
4218 fprintf(trace->output,
4219 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
4220 evsel->filter, evsel__name(evsel), errno,
4221 str_error_r(errno, errbuf, sizeof(errbuf)));
4222 goto out_delete_evlist;
4223 }
4224 out_error_mem:
4225 fprintf(trace->output, "Not enough memory to run!\n");
4226 goto out_delete_evlist;
4227
4228 out_errno:
4229 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
4230 goto out_delete_evlist;
4231 }
4232
4233 static int trace__replay(struct trace *trace)
4234 {
4235 const struct evsel_str_handler handlers[] = {
4236 { "probe:vfs_getname", trace__vfs_getname, },
4237 };
4238 struct perf_data data = {
4239 .path = input_name,
4240 .mode = PERF_DATA_MODE_READ,
4241 .force = trace->force,
4242 };
4243 struct perf_session *session;
4244 struct evsel *evsel;
4245 int err = -1;
4246
4247 trace->tool.sample = trace__process_sample;
4248 trace->tool.mmap = perf_event__process_mmap;
4249 trace->tool.mmap2 = perf_event__process_mmap2;
4250 trace->tool.comm = perf_event__process_comm;
4251 trace->tool.exit = perf_event__process_exit;
4252 trace->tool.fork = perf_event__process_fork;
4253 trace->tool.attr = perf_event__process_attr;
4254 trace->tool.tracing_data = perf_event__process_tracing_data;
4255 trace->tool.build_id = perf_event__process_build_id;
4256 trace->tool.namespaces = perf_event__process_namespaces;
4257
4258 trace->tool.ordered_events = true;
4259 trace->tool.ordering_requires_timestamps = true;
4260
4261
4262 trace->multiple_threads = true;
4263
4264 session = perf_session__new(&data, &trace->tool);
4265 if (IS_ERR(session))
4266 return PTR_ERR(session);
4267
4268 if (trace->opts.target.pid)
4269 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
4270
4271 if (trace->opts.target.tid)
4272 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
4273
4274 if (symbol__init(&session->header.env) < 0)
4275 goto out;
4276
4277 trace->host = &session->machines.host;
4278
4279 err = perf_session__set_tracepoints_handlers(session, handlers);
4280 if (err)
4281 goto out;
4282
4283 evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
4284 trace->syscalls.events.sys_enter = evsel;
4285
4286 if (evsel == NULL)
4287 evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
4288
4289 if (evsel &&
4290 (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
4291 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
4292 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
4293 goto out;
4294 }
4295
4296 evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
4297 trace->syscalls.events.sys_exit = evsel;
4298 if (evsel == NULL)
4299 evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
4300 if (evsel &&
4301 (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
4302 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
4303 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
4304 goto out;
4305 }
4306
4307 evlist__for_each_entry(session->evlist, evsel) {
4308 if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
4309 (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
4310 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
4311 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
4312 evsel->handler = trace__pgfault;
4313 }
4314
4315 setup_pager();
4316
4317 err = perf_session__process_events(session);
4318 if (err)
4319 pr_err("Failed to process events, error %d", err);
4320
4321 else if (trace->summary)
4322 trace__fprintf_thread_summary(trace, trace->output);
4323
4324 out:
4325 perf_session__delete(session);
4326
4327 return err;
4328 }
4329
4330 static size_t trace__fprintf_threads_header(FILE *fp)
4331 {
4332 size_t printed;
4333
4334 printed = fprintf(fp, "\n Summary of events:\n\n");
4335
4336 return printed;
4337 }
4338
4339 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
4340 struct syscall_stats *stats;
4341 double msecs;
4342 int syscall;
4343 )
4344 {
4345 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
4346 struct syscall_stats *stats = source->priv;
4347
4348 entry->syscall = source->i;
4349 entry->stats = stats;
4350 entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
4351 }
4352
4353 static size_t thread__dump_stats(struct thread_trace *ttrace,
4354 struct trace *trace, FILE *fp)
4355 {
4356 size_t printed = 0;
4357 struct syscall *sc;
4358 struct rb_node *nd;
4359 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
4360
4361 if (syscall_stats == NULL)
4362 return 0;
4363
4364 printed += fprintf(fp, "\n");
4365
4366 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
4367 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
4368 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
4369
4370 resort_rb__for_each_entry(nd, syscall_stats) {
4371 struct syscall_stats *stats = syscall_stats_entry->stats;
4372 if (stats) {
4373 double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
4374 double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
4375 double avg = avg_stats(&stats->stats);
4376 double pct;
4377 u64 n = (u64)stats->stats.n;
4378
4379 pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
4380 avg /= NSEC_PER_MSEC;
4381
4382 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
4383 printed += fprintf(fp, " %-15s", sc->name);
4384 printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
4385 n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
4386 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
4387
4388 if (trace->errno_summary && stats->nr_failures) {
4389 const char *arch_name = perf_env__arch(trace->host->env);
4390 int e;
4391
4392 for (e = 0; e < stats->max_errno; ++e) {
4393 if (stats->errnos[e] != 0)
4394 fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]);
4395 }
4396 }
4397 }
4398 }
4399
4400 resort_rb__delete(syscall_stats);
4401 printed += fprintf(fp, "\n\n");
4402
4403 return printed;
4404 }
4405
4406 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
4407 {
4408 size_t printed = 0;
4409 struct thread_trace *ttrace = thread__priv(thread);
4410 double ratio;
4411
4412 if (ttrace == NULL)
4413 return 0;
4414
4415 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
4416
4417 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
4418 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
4419 printed += fprintf(fp, "%.1f%%", ratio);
4420 if (ttrace->pfmaj)
4421 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
4422 if (ttrace->pfmin)
4423 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
4424 if (trace->sched)
4425 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
4426 else if (fputc('\n', fp) != EOF)
4427 ++printed;
4428
4429 printed += thread__dump_stats(ttrace, trace, fp);
4430
4431 return printed;
4432 }
4433
4434 static unsigned long thread__nr_events(struct thread_trace *ttrace)
4435 {
4436 return ttrace ? ttrace->nr_events : 0;
4437 }
4438
4439 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
4440 struct thread *thread;
4441 )
4442 {
4443 entry->thread = rb_entry(nd, struct thread, rb_node);
4444 }
4445
4446 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
4447 {
4448 size_t printed = trace__fprintf_threads_header(fp);
4449 struct rb_node *nd;
4450 int i;
4451
4452 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
4453 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
4454
4455 if (threads == NULL) {
4456 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
4457 return 0;
4458 }
4459
4460 resort_rb__for_each_entry(nd, threads)
4461 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
4462
4463 resort_rb__delete(threads);
4464 }
4465 return printed;
4466 }
4467
4468 static int trace__set_duration(const struct option *opt, const char *str,
4469 int unset __maybe_unused)
4470 {
4471 struct trace *trace = opt->value;
4472
4473 trace->duration_filter = atof(str);
4474 return 0;
4475 }
4476
4477 static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
4478 int unset __maybe_unused)
4479 {
4480 int ret = -1;
4481 size_t i;
4482 struct trace *trace = opt->value;
4483
4484
4485
4486
4487 struct intlist *list = intlist__new(str);
4488
4489 if (list == NULL)
4490 return -1;
4491
4492 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
4493 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
4494
4495 if (trace->filter_pids.entries == NULL)
4496 goto out;
4497
4498 trace->filter_pids.entries[0] = getpid();
4499
4500 for (i = 1; i < trace->filter_pids.nr; ++i)
4501 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
4502
4503 intlist__delete(list);
4504 ret = 0;
4505 out:
4506 return ret;
4507 }
4508
4509 static int trace__open_output(struct trace *trace, const char *filename)
4510 {
4511 struct stat st;
4512
4513 if (!stat(filename, &st) && st.st_size) {
4514 char oldname[PATH_MAX];
4515
4516 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
4517 unlink(oldname);
4518 rename(filename, oldname);
4519 }
4520
4521 trace->output = fopen(filename, "w");
4522
4523 return trace->output == NULL ? -errno : 0;
4524 }
4525
4526 static int parse_pagefaults(const struct option *opt, const char *str,
4527 int unset __maybe_unused)
4528 {
4529 int *trace_pgfaults = opt->value;
4530
4531 if (strcmp(str, "all") == 0)
4532 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
4533 else if (strcmp(str, "maj") == 0)
4534 *trace_pgfaults |= TRACE_PFMAJ;
4535 else if (strcmp(str, "min") == 0)
4536 *trace_pgfaults |= TRACE_PFMIN;
4537 else
4538 return -1;
4539
4540 return 0;
4541 }
4542
4543 static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler)
4544 {
4545 struct evsel *evsel;
4546
4547 evlist__for_each_entry(evlist, evsel) {
4548 if (evsel->handler == NULL)
4549 evsel->handler = handler;
4550 }
4551 }
4552
4553 static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
4554 {
4555 struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
4556
4557 if (fmt) {
4558 struct syscall_fmt *scfmt = syscall_fmt__find(name);
4559
4560 if (scfmt) {
4561 int skip = 0;
4562
4563 if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
4564 strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
4565 ++skip;
4566
4567 memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
4568 }
4569 }
4570 }
4571
4572 static int evlist__set_syscall_tp_fields(struct evlist *evlist)
4573 {
4574 struct evsel *evsel;
4575
4576 evlist__for_each_entry(evlist, evsel) {
4577 if (evsel->priv || !evsel->tp_format)
4578 continue;
4579
4580 if (strcmp(evsel->tp_format->system, "syscalls")) {
4581 evsel__init_tp_arg_scnprintf(evsel);
4582 continue;
4583 }
4584
4585 if (evsel__init_syscall_tp(evsel))
4586 return -1;
4587
4588 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
4589 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4590
4591 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
4592 return -1;
4593
4594 evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
4595 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
4596 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4597
4598 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
4599 return -1;
4600
4601 evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
4602 }
4603 }
4604
4605 return 0;
4606 }
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616 static int trace__parse_events_option(const struct option *opt, const char *str,
4617 int unset __maybe_unused)
4618 {
4619 struct trace *trace = (struct trace *)opt->value;
4620 const char *s = str;
4621 char *sep = NULL, *lists[2] = { NULL, NULL, };
4622 int len = strlen(str) + 1, err = -1, list, idx;
4623 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
4624 char group_name[PATH_MAX];
4625 struct syscall_fmt *fmt;
4626
4627 if (strace_groups_dir == NULL)
4628 return -1;
4629
4630 if (*s == '!') {
4631 ++s;
4632 trace->not_ev_qualifier = true;
4633 }
4634
4635 while (1) {
4636 if ((sep = strchr(s, ',')) != NULL)
4637 *sep = '\0';
4638
4639 list = 0;
4640 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
4641 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
4642 list = 1;
4643 goto do_concat;
4644 }
4645
4646 fmt = syscall_fmt__find_by_alias(s);
4647 if (fmt != NULL) {
4648 list = 1;
4649 s = fmt->name;
4650 } else {
4651 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
4652 if (access(group_name, R_OK) == 0)
4653 list = 1;
4654 }
4655 do_concat:
4656 if (lists[list]) {
4657 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
4658 } else {
4659 lists[list] = malloc(len);
4660 if (lists[list] == NULL)
4661 goto out;
4662 strcpy(lists[list], s);
4663 }
4664
4665 if (!sep)
4666 break;
4667
4668 *sep = ',';
4669 s = sep + 1;
4670 }
4671
4672 if (lists[1] != NULL) {
4673 struct strlist_config slist_config = {
4674 .dirname = strace_groups_dir,
4675 };
4676
4677 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
4678 if (trace->ev_qualifier == NULL) {
4679 fputs("Not enough memory to parse event qualifier", trace->output);
4680 goto out;
4681 }
4682
4683 if (trace__validate_ev_qualifier(trace))
4684 goto out;
4685 trace->trace_syscalls = true;
4686 }
4687
4688 err = 0;
4689
4690 if (lists[0]) {
4691 struct option o = {
4692 .value = &trace->evlist,
4693 };
4694 err = parse_events_option(&o, lists[0], 0);
4695 }
4696 out:
4697 free(strace_groups_dir);
4698 free(lists[0]);
4699 free(lists[1]);
4700 if (sep)
4701 *sep = ',';
4702
4703 return err;
4704 }
4705
4706 static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
4707 {
4708 struct trace *trace = opt->value;
4709
4710 if (!list_empty(&trace->evlist->core.entries)) {
4711 struct option o = {
4712 .value = &trace->evlist,
4713 };
4714 return parse_cgroups(&o, str, unset);
4715 }
4716 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
4717
4718 return 0;
4719 }
4720
4721 static int trace__config(const char *var, const char *value, void *arg)
4722 {
4723 struct trace *trace = arg;
4724 int err = 0;
4725
4726 if (!strcmp(var, "trace.add_events")) {
4727 trace->perfconfig_events = strdup(value);
4728 if (trace->perfconfig_events == NULL) {
4729 pr_err("Not enough memory for %s\n", "trace.add_events");
4730 return -1;
4731 }
4732 } else if (!strcmp(var, "trace.show_timestamp")) {
4733 trace->show_tstamp = perf_config_bool(var, value);
4734 } else if (!strcmp(var, "trace.show_duration")) {
4735 trace->show_duration = perf_config_bool(var, value);
4736 } else if (!strcmp(var, "trace.show_arg_names")) {
4737 trace->show_arg_names = perf_config_bool(var, value);
4738 if (!trace->show_arg_names)
4739 trace->show_zeros = true;
4740 } else if (!strcmp(var, "trace.show_zeros")) {
4741 bool new_show_zeros = perf_config_bool(var, value);
4742 if (!trace->show_arg_names && !new_show_zeros) {
4743 pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
4744 goto out;
4745 }
4746 trace->show_zeros = new_show_zeros;
4747 } else if (!strcmp(var, "trace.show_prefix")) {
4748 trace->show_string_prefix = perf_config_bool(var, value);
4749 } else if (!strcmp(var, "trace.no_inherit")) {
4750 trace->opts.no_inherit = perf_config_bool(var, value);
4751 } else if (!strcmp(var, "trace.args_alignment")) {
4752 int args_alignment = 0;
4753 if (perf_config_int(&args_alignment, var, value) == 0)
4754 trace->args_alignment = args_alignment;
4755 } else if (!strcmp(var, "trace.tracepoint_beautifiers")) {
4756 if (strcasecmp(value, "libtraceevent") == 0)
4757 trace->libtraceevent_print = true;
4758 else if (strcasecmp(value, "libbeauty") == 0)
4759 trace->libtraceevent_print = false;
4760 }
4761 out:
4762 return err;
4763 }
4764
4765 static void trace__exit(struct trace *trace)
4766 {
4767 int i;
4768
4769 strlist__delete(trace->ev_qualifier);
4770 free(trace->ev_qualifier_ids.entries);
4771 if (trace->syscalls.table) {
4772 for (i = 0; i <= trace->sctbl->syscalls.max_id; i++)
4773 syscall__exit(&trace->syscalls.table[i]);
4774 free(trace->syscalls.table);
4775 }
4776 syscalltbl__delete(trace->sctbl);
4777 zfree(&trace->perfconfig_events);
4778 }
4779
4780 int cmd_trace(int argc, const char **argv)
4781 {
4782 const char *trace_usage[] = {
4783 "perf trace [<options>] [<command>]",
4784 "perf trace [<options>] -- <command> [<options>]",
4785 "perf trace record [<options>] [<command>]",
4786 "perf trace record [<options>] -- <command> [<options>]",
4787 NULL
4788 };
4789 struct trace trace = {
4790 .opts = {
4791 .target = {
4792 .uid = UINT_MAX,
4793 .uses_mmap = true,
4794 },
4795 .user_freq = UINT_MAX,
4796 .user_interval = ULLONG_MAX,
4797 .no_buffering = true,
4798 .mmap_pages = UINT_MAX,
4799 },
4800 .output = stderr,
4801 .show_comm = true,
4802 .show_tstamp = true,
4803 .show_duration = true,
4804 .show_arg_names = true,
4805 .args_alignment = 70,
4806 .trace_syscalls = false,
4807 .kernel_syscallchains = false,
4808 .max_stack = UINT_MAX,
4809 .max_events = ULONG_MAX,
4810 };
4811 const char *map_dump_str = NULL;
4812 const char *output_name = NULL;
4813 const struct option trace_options[] = {
4814 OPT_CALLBACK('e', "event", &trace, "event",
4815 "event/syscall selector. use 'perf list' to list available events",
4816 trace__parse_events_option),
4817 OPT_CALLBACK(0, "filter", &trace.evlist, "filter",
4818 "event filter", parse_filter),
4819 OPT_BOOLEAN(0, "comm", &trace.show_comm,
4820 "show the thread COMM next to its id"),
4821 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
4822 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
4823 trace__parse_events_option),
4824 OPT_STRING('o', "output", &output_name, "file", "output file name"),
4825 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
4826 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
4827 "trace events on existing process id"),
4828 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
4829 "trace events on existing thread id"),
4830 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
4831 "pids to filter (by the kernel)", trace__set_filter_pids_from_option),
4832 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
4833 "system-wide collection from all CPUs"),
4834 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
4835 "list of cpus to monitor"),
4836 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
4837 "child tasks do not inherit counters"),
4838 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
4839 "number of mmap data pages", evlist__parse_mmap_pages),
4840 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
4841 "user to profile"),
4842 OPT_CALLBACK(0, "duration", &trace, "float",
4843 "show only events with duration > N.M ms",
4844 trace__set_duration),
4845 #ifdef HAVE_LIBBPF_SUPPORT
4846 OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
4847 #endif
4848 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
4849 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4850 OPT_BOOLEAN('T', "time", &trace.full_time,
4851 "Show full timestamp, not time relative to first start"),
4852 OPT_BOOLEAN(0, "failure", &trace.failure_only,
4853 "Show only syscalls that failed"),
4854 OPT_BOOLEAN('s', "summary", &trace.summary_only,
4855 "Show only syscall summary with statistics"),
4856 OPT_BOOLEAN('S', "with-summary", &trace.summary,
4857 "Show all syscalls and summary with statistics"),
4858 OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
4859 "Show errno stats per syscall, use with -s or -S"),
4860 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
4861 "Trace pagefaults", parse_pagefaults, "maj"),
4862 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
4863 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
4864 OPT_CALLBACK(0, "call-graph", &trace.opts,
4865 "record_mode[,record_size]", record_callchain_help,
4866 &record_parse_callchain_opt),
4867 OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
4868 "Use libtraceevent to print the tracepoint arguments."),
4869 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
4870 "Show the kernel callchains on the syscall exit path"),
4871 OPT_ULONG(0, "max-events", &trace.max_events,
4872 "Set the maximum number of events to print, exit after that is reached. "),
4873 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
4874 "Set the minimum stack depth when parsing the callchain, "
4875 "anything below the specified depth will be ignored."),
4876 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
4877 "Set the maximum stack depth when parsing the callchain, "
4878 "anything beyond the specified depth will be ignored. "
4879 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
4880 OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
4881 "Sort batch of events before processing, use if getting out of order events"),
4882 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
4883 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
4884 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
4885 "per thread proc mmap processing timeout in ms"),
4886 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
4887 trace__parse_cgroups),
4888 OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
4889 "ms to wait before starting measurement after program "
4890 "start"),
4891 OPTS_EVSWITCH(&trace.evswitch),
4892 OPT_END()
4893 };
4894 bool __maybe_unused max_stack_user_set = true;
4895 bool mmap_pages_user_set = true;
4896 struct evsel *evsel;
4897 const char * const trace_subcommands[] = { "record", NULL };
4898 int err = -1;
4899 char bf[BUFSIZ];
4900 struct sigaction sigchld_act;
4901
4902 signal(SIGSEGV, sighandler_dump_stack);
4903 signal(SIGFPE, sighandler_dump_stack);
4904 signal(SIGINT, sighandler_interrupt);
4905
4906 memset(&sigchld_act, 0, sizeof(sigchld_act));
4907 sigchld_act.sa_flags = SA_SIGINFO;
4908 sigchld_act.sa_sigaction = sighandler_chld;
4909 sigaction(SIGCHLD, &sigchld_act, NULL);
4910
4911 trace.evlist = evlist__new();
4912 trace.sctbl = syscalltbl__new();
4913
4914 if (trace.evlist == NULL || trace.sctbl == NULL) {
4915 pr_err("Not enough memory to run!\n");
4916 err = -ENOMEM;
4917 goto out;
4918 }
4919
4920
4921
4922
4923
4924
4925
4926
4927 rlimit__bump_memlock();
4928
4929 err = perf_config(trace__config, &trace);
4930 if (err)
4931 goto out;
4932
4933 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
4934 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
4948 trace.evlist->core.nr_entries == 0 ) {
4949 trace.trace_syscalls = true;
4950 }
4951
4952
4953
4954
4955
4956
4957 if (trace.perfconfig_events != NULL) {
4958 struct parse_events_error parse_err;
4959
4960 parse_events_error__init(&parse_err);
4961 err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err);
4962 if (err)
4963 parse_events_error__print(&parse_err, trace.perfconfig_events);
4964 parse_events_error__exit(&parse_err);
4965 if (err)
4966 goto out;
4967 }
4968
4969 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
4970 usage_with_options_msg(trace_usage, trace_options,
4971 "cgroup monitoring only available in system-wide mode");
4972 }
4973
4974 evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
4975 if (IS_ERR(evsel)) {
4976 bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
4977 pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
4978 goto out;
4979 }
4980
4981 if (evsel) {
4982 trace.syscalls.events.augmented = evsel;
4983
4984 evsel = evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
4985 if (evsel == NULL) {
4986 pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
4987 goto out;
4988 }
4989
4990 if (evsel->bpf_obj == NULL) {
4991 pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
4992 goto out;
4993 }
4994
4995 trace.bpf_obj = evsel->bpf_obj;
4996
4997
4998
4999
5000
5001
5002 if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace))
5003 trace.trace_syscalls = true;
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029 if (!trace.trace_syscalls) {
5030 trace__delete_augmented_syscalls(&trace);
5031 } else {
5032 trace__set_bpf_map_filtered_pids(&trace);
5033 trace__set_bpf_map_syscalls(&trace);
5034 trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
5035 }
5036 }
5037
5038 err = bpf__setup_stdout(trace.evlist);
5039 if (err) {
5040 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
5041 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
5042 goto out;
5043 }
5044
5045 err = -1;
5046
5047 if (map_dump_str) {
5048 trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
5049 if (trace.dump.map == NULL) {
5050 pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
5051 goto out;
5052 }
5053 }
5054
5055 if (trace.trace_pgfaults) {
5056 trace.opts.sample_address = true;
5057 trace.opts.sample_time = true;
5058 }
5059
5060 if (trace.opts.mmap_pages == UINT_MAX)
5061 mmap_pages_user_set = false;
5062
5063 if (trace.max_stack == UINT_MAX) {
5064 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
5065 max_stack_user_set = false;
5066 }
5067
5068 #ifdef HAVE_DWARF_UNWIND_SUPPORT
5069 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
5070 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
5071 }
5072 #endif
5073
5074 if (callchain_param.enabled) {
5075 if (!mmap_pages_user_set && geteuid() == 0)
5076 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
5077
5078 symbol_conf.use_callchain = true;
5079 }
5080
5081 if (trace.evlist->core.nr_entries > 0) {
5082 evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
5083 if (evlist__set_syscall_tp_fields(trace.evlist)) {
5084 perror("failed to set syscalls:* tracepoint fields");
5085 goto out;
5086 }
5087 }
5088
5089 if (trace.sort_events) {
5090 ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
5091 ordered_events__set_copy_on_queue(&trace.oe.data, true);
5092 }
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105 if (trace.syscalls.events.augmented) {
5106 evlist__for_each_entry(trace.evlist, evsel) {
5107 bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
5108
5109 if (raw_syscalls_sys_exit) {
5110 trace.raw_augmented_syscalls = true;
5111 goto init_augmented_syscall_tp;
5112 }
5113
5114 if (trace.syscalls.events.augmented->priv == NULL &&
5115 strstr(evsel__name(evsel), "syscalls:sys_enter")) {
5116 struct evsel *augmented = trace.syscalls.events.augmented;
5117 if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
5118 evsel__init_augmented_syscall_tp_args(augmented))
5119 goto out;
5120
5121
5122
5123
5124
5125 augmented->handler = trace__sys_enter;
5126
5127
5128
5129
5130
5131
5132 if (evsel__init_augmented_syscall_tp(evsel, evsel) ||
5133 evsel__init_augmented_syscall_tp_args(evsel))
5134 goto out;
5135 evsel->handler = trace__sys_enter;
5136 }
5137
5138 if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) {
5139 struct syscall_tp *sc;
5140 init_augmented_syscall_tp:
5141 if (evsel__init_augmented_syscall_tp(evsel, evsel))
5142 goto out;
5143 sc = __evsel__syscall_tp(evsel);
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163 if (trace.raw_augmented_syscalls)
5164 trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
5165 evsel__init_augmented_syscall_tp_ret(evsel);
5166 evsel->handler = trace__sys_exit;
5167 }
5168 }
5169 }
5170
5171 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
5172 return trace__record(&trace, argc-1, &argv[1]);
5173
5174
5175 if (trace.errno_summary && !trace.summary && !trace.summary_only)
5176 trace.summary_only = true;
5177
5178
5179 if (trace.summary_only)
5180 trace.summary = trace.summary_only;
5181
5182 if (output_name != NULL) {
5183 err = trace__open_output(&trace, output_name);
5184 if (err < 0) {
5185 perror("failed to create output file");
5186 goto out;
5187 }
5188 }
5189
5190 err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
5191 if (err)
5192 goto out_close;
5193
5194 err = target__validate(&trace.opts.target);
5195 if (err) {
5196 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5197 fprintf(trace.output, "%s", bf);
5198 goto out_close;
5199 }
5200
5201 err = target__parse_uid(&trace.opts.target);
5202 if (err) {
5203 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5204 fprintf(trace.output, "%s", bf);
5205 goto out_close;
5206 }
5207
5208 if (!argc && target__none(&trace.opts.target))
5209 trace.opts.target.system_wide = true;
5210
5211 if (input_name)
5212 err = trace__replay(&trace);
5213 else
5214 err = trace__run(&trace, argc, argv);
5215
5216 out_close:
5217 if (output_name != NULL)
5218 fclose(trace.output);
5219 out:
5220 trace__exit(&trace);
5221 return err;
5222 }