0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "builtin.h"
0010
0011 #include "util/build-id.h"
0012 #include <subcmd/parse-options.h>
0013 #include "util/parse-events.h"
0014 #include "util/config.h"
0015
0016 #include "util/callchain.h"
0017 #include "util/cgroup.h"
0018 #include "util/header.h"
0019 #include "util/event.h"
0020 #include "util/evlist.h"
0021 #include "util/evsel.h"
0022 #include "util/debug.h"
0023 #include "util/mmap.h"
0024 #include "util/target.h"
0025 #include "util/session.h"
0026 #include "util/tool.h"
0027 #include "util/symbol.h"
0028 #include "util/record.h"
0029 #include "util/cpumap.h"
0030 #include "util/thread_map.h"
0031 #include "util/data.h"
0032 #include "util/perf_regs.h"
0033 #include "util/auxtrace.h"
0034 #include "util/tsc.h"
0035 #include "util/parse-branch-options.h"
0036 #include "util/parse-regs-options.h"
0037 #include "util/perf_api_probe.h"
0038 #include "util/llvm-utils.h"
0039 #include "util/bpf-loader.h"
0040 #include "util/trigger.h"
0041 #include "util/perf-hooks.h"
0042 #include "util/cpu-set-sched.h"
0043 #include "util/synthetic-events.h"
0044 #include "util/time-utils.h"
0045 #include "util/units.h"
0046 #include "util/bpf-event.h"
0047 #include "util/util.h"
0048 #include "util/pfm.h"
0049 #include "util/clockid.h"
0050 #include "util/pmu-hybrid.h"
0051 #include "util/evlist-hybrid.h"
0052 #include "util/off_cpu.h"
0053 #include "asm/bug.h"
0054 #include "perf.h"
0055 #include "cputopo.h"
0056
0057 #include <errno.h>
0058 #include <inttypes.h>
0059 #include <locale.h>
0060 #include <poll.h>
0061 #include <pthread.h>
0062 #include <unistd.h>
0063 #ifndef HAVE_GETTID
0064 #include <syscall.h>
0065 #endif
0066 #include <sched.h>
0067 #include <signal.h>
0068 #ifdef HAVE_EVENTFD_SUPPORT
0069 #include <sys/eventfd.h>
0070 #endif
0071 #include <sys/mman.h>
0072 #include <sys/wait.h>
0073 #include <sys/types.h>
0074 #include <sys/stat.h>
0075 #include <fcntl.h>
0076 #include <linux/err.h>
0077 #include <linux/string.h>
0078 #include <linux/time64.h>
0079 #include <linux/zalloc.h>
0080 #include <linux/bitmap.h>
0081 #include <sys/time.h>
0082
0083 struct switch_output {
0084 bool enabled;
0085 bool signal;
0086 unsigned long size;
0087 unsigned long time;
0088 const char *str;
0089 bool set;
0090 char **filenames;
0091 int num_files;
0092 int cur_file;
0093 };
0094
0095 struct thread_mask {
0096 struct mmap_cpu_mask maps;
0097 struct mmap_cpu_mask affinity;
0098 };
0099
0100 struct record_thread {
0101 pid_t tid;
0102 struct thread_mask *mask;
0103 struct {
0104 int msg[2];
0105 int ack[2];
0106 } pipes;
0107 struct fdarray pollfd;
0108 int ctlfd_pos;
0109 int nr_mmaps;
0110 struct mmap **maps;
0111 struct mmap **overwrite_maps;
0112 struct record *rec;
0113 unsigned long long samples;
0114 unsigned long waking;
0115 u64 bytes_written;
0116 u64 bytes_transferred;
0117 u64 bytes_compressed;
0118 };
0119
0120 static __thread struct record_thread *thread;
0121
0122 enum thread_msg {
0123 THREAD_MSG__UNDEFINED = 0,
0124 THREAD_MSG__READY,
0125 THREAD_MSG__MAX,
0126 };
0127
0128 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
0129 "UNDEFINED", "READY"
0130 };
0131
0132 enum thread_spec {
0133 THREAD_SPEC__UNDEFINED = 0,
0134 THREAD_SPEC__CPU,
0135 THREAD_SPEC__CORE,
0136 THREAD_SPEC__PACKAGE,
0137 THREAD_SPEC__NUMA,
0138 THREAD_SPEC__USER,
0139 THREAD_SPEC__MAX,
0140 };
0141
0142 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
0143 "undefined", "cpu", "core", "package", "numa", "user"
0144 };
0145
0146 struct record {
0147 struct perf_tool tool;
0148 struct record_opts opts;
0149 u64 bytes_written;
0150 struct perf_data data;
0151 struct auxtrace_record *itr;
0152 struct evlist *evlist;
0153 struct perf_session *session;
0154 struct evlist *sb_evlist;
0155 pthread_t thread_id;
0156 int realtime_prio;
0157 bool switch_output_event_set;
0158 bool no_buildid;
0159 bool no_buildid_set;
0160 bool no_buildid_cache;
0161 bool no_buildid_cache_set;
0162 bool buildid_all;
0163 bool buildid_mmap;
0164 bool timestamp_filename;
0165 bool timestamp_boundary;
0166 bool off_cpu;
0167 struct switch_output switch_output;
0168 unsigned long long samples;
0169 unsigned long output_max_size;
0170 struct perf_debuginfod debuginfod;
0171 int nr_threads;
0172 struct thread_mask *thread_masks;
0173 struct record_thread *thread_data;
0174 };
0175
0176 static volatile int done;
0177
0178 static volatile int auxtrace_record__snapshot_started;
0179 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
0180 static DEFINE_TRIGGER(switch_output_trigger);
0181
0182 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
0183 "SYS", "NODE", "CPU"
0184 };
0185
0186 #ifndef HAVE_GETTID
0187 static inline pid_t gettid(void)
0188 {
0189 return (pid_t)syscall(__NR_gettid);
0190 }
0191 #endif
0192
0193 static int record__threads_enabled(struct record *rec)
0194 {
0195 return rec->opts.threads_spec;
0196 }
0197
0198 static bool switch_output_signal(struct record *rec)
0199 {
0200 return rec->switch_output.signal &&
0201 trigger_is_ready(&switch_output_trigger);
0202 }
0203
0204 static bool switch_output_size(struct record *rec)
0205 {
0206 return rec->switch_output.size &&
0207 trigger_is_ready(&switch_output_trigger) &&
0208 (rec->bytes_written >= rec->switch_output.size);
0209 }
0210
0211 static bool switch_output_time(struct record *rec)
0212 {
0213 return rec->switch_output.time &&
0214 trigger_is_ready(&switch_output_trigger);
0215 }
0216
0217 static u64 record__bytes_written(struct record *rec)
0218 {
0219 int t;
0220 u64 bytes_written = rec->bytes_written;
0221 struct record_thread *thread_data = rec->thread_data;
0222
0223 for (t = 0; t < rec->nr_threads; t++)
0224 bytes_written += thread_data[t].bytes_written;
0225
0226 return bytes_written;
0227 }
0228
0229 static bool record__output_max_size_exceeded(struct record *rec)
0230 {
0231 return rec->output_max_size &&
0232 (record__bytes_written(rec) >= rec->output_max_size);
0233 }
0234
0235 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
0236 void *bf, size_t size)
0237 {
0238 struct perf_data_file *file = &rec->session->data->file;
0239
0240 if (map && map->file)
0241 file = map->file;
0242
0243 if (perf_data_file__write(file, bf, size) < 0) {
0244 pr_err("failed to write perf data, error: %m\n");
0245 return -1;
0246 }
0247
0248 if (map && map->file)
0249 thread->bytes_written += size;
0250 else
0251 rec->bytes_written += size;
0252
0253 if (record__output_max_size_exceeded(rec) && !done) {
0254 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
0255 " stopping session ]\n",
0256 record__bytes_written(rec) >> 10);
0257 done = 1;
0258 }
0259
0260 if (switch_output_size(rec))
0261 trigger_hit(&switch_output_trigger);
0262
0263 return 0;
0264 }
0265
0266 static int record__aio_enabled(struct record *rec);
0267 static int record__comp_enabled(struct record *rec);
0268 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
0269 void *dst, size_t dst_size, void *src, size_t src_size);
0270
0271 #ifdef HAVE_AIO_SUPPORT
0272 static int record__aio_write(struct aiocb *cblock, int trace_fd,
0273 void *buf, size_t size, off_t off)
0274 {
0275 int rc;
0276
0277 cblock->aio_fildes = trace_fd;
0278 cblock->aio_buf = buf;
0279 cblock->aio_nbytes = size;
0280 cblock->aio_offset = off;
0281 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
0282
0283 do {
0284 rc = aio_write(cblock);
0285 if (rc == 0) {
0286 break;
0287 } else if (errno != EAGAIN) {
0288 cblock->aio_fildes = -1;
0289 pr_err("failed to queue perf data, error: %m\n");
0290 break;
0291 }
0292 } while (1);
0293
0294 return rc;
0295 }
0296
0297 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
0298 {
0299 void *rem_buf;
0300 off_t rem_off;
0301 size_t rem_size;
0302 int rc, aio_errno;
0303 ssize_t aio_ret, written;
0304
0305 aio_errno = aio_error(cblock);
0306 if (aio_errno == EINPROGRESS)
0307 return 0;
0308
0309 written = aio_ret = aio_return(cblock);
0310 if (aio_ret < 0) {
0311 if (aio_errno != EINTR)
0312 pr_err("failed to write perf data, error: %m\n");
0313 written = 0;
0314 }
0315
0316 rem_size = cblock->aio_nbytes - written;
0317
0318 if (rem_size == 0) {
0319 cblock->aio_fildes = -1;
0320
0321
0322
0323
0324
0325 perf_mmap__put(&md->core);
0326 rc = 1;
0327 } else {
0328
0329
0330
0331
0332
0333 rem_off = cblock->aio_offset + written;
0334 rem_buf = (void *)(cblock->aio_buf + written);
0335 record__aio_write(cblock, cblock->aio_fildes,
0336 rem_buf, rem_size, rem_off);
0337 rc = 0;
0338 }
0339
0340 return rc;
0341 }
0342
0343 static int record__aio_sync(struct mmap *md, bool sync_all)
0344 {
0345 struct aiocb **aiocb = md->aio.aiocb;
0346 struct aiocb *cblocks = md->aio.cblocks;
0347 struct timespec timeout = { 0, 1000 * 1000 * 1 };
0348 int i, do_suspend;
0349
0350 do {
0351 do_suspend = 0;
0352 for (i = 0; i < md->aio.nr_cblocks; ++i) {
0353 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
0354 if (sync_all)
0355 aiocb[i] = NULL;
0356 else
0357 return i;
0358 } else {
0359
0360
0361
0362
0363
0364 aiocb[i] = &cblocks[i];
0365 do_suspend = 1;
0366 }
0367 }
0368 if (!do_suspend)
0369 return -1;
0370
0371 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
0372 if (!(errno == EAGAIN || errno == EINTR))
0373 pr_err("failed to sync perf data, error: %m\n");
0374 }
0375 } while (1);
0376 }
0377
0378 struct record_aio {
0379 struct record *rec;
0380 void *data;
0381 size_t size;
0382 };
0383
0384 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
0385 {
0386 struct record_aio *aio = to;
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402 if (record__comp_enabled(aio->rec)) {
0403 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
0404 mmap__mmap_len(map) - aio->size,
0405 buf, size);
0406 } else {
0407 memcpy(aio->data + aio->size, buf, size);
0408 }
0409
0410 if (!aio->size) {
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421 perf_mmap__get(&map->core);
0422 }
0423
0424 aio->size += size;
0425
0426 return size;
0427 }
0428
0429 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
0430 {
0431 int ret, idx;
0432 int trace_fd = rec->session->data->file.fd;
0433 struct record_aio aio = { .rec = rec, .size = 0 };
0434
0435
0436
0437
0438
0439
0440 idx = record__aio_sync(map, false);
0441 aio.data = map->aio.data[idx];
0442 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
0443 if (ret != 0)
0444 return ret;
0445
0446 rec->samples++;
0447 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
0448 if (!ret) {
0449 *off += aio.size;
0450 rec->bytes_written += aio.size;
0451 if (switch_output_size(rec))
0452 trigger_hit(&switch_output_trigger);
0453 } else {
0454
0455
0456
0457
0458
0459
0460 perf_mmap__put(&map->core);
0461 }
0462
0463 return ret;
0464 }
0465
0466 static off_t record__aio_get_pos(int trace_fd)
0467 {
0468 return lseek(trace_fd, 0, SEEK_CUR);
0469 }
0470
0471 static void record__aio_set_pos(int trace_fd, off_t pos)
0472 {
0473 lseek(trace_fd, pos, SEEK_SET);
0474 }
0475
0476 static void record__aio_mmap_read_sync(struct record *rec)
0477 {
0478 int i;
0479 struct evlist *evlist = rec->evlist;
0480 struct mmap *maps = evlist->mmap;
0481
0482 if (!record__aio_enabled(rec))
0483 return;
0484
0485 for (i = 0; i < evlist->core.nr_mmaps; i++) {
0486 struct mmap *map = &maps[i];
0487
0488 if (map->core.base)
0489 record__aio_sync(map, true);
0490 }
0491 }
0492
0493 static int nr_cblocks_default = 1;
0494 static int nr_cblocks_max = 4;
0495
0496 static int record__aio_parse(const struct option *opt,
0497 const char *str,
0498 int unset)
0499 {
0500 struct record_opts *opts = (struct record_opts *)opt->value;
0501
0502 if (unset) {
0503 opts->nr_cblocks = 0;
0504 } else {
0505 if (str)
0506 opts->nr_cblocks = strtol(str, NULL, 0);
0507 if (!opts->nr_cblocks)
0508 opts->nr_cblocks = nr_cblocks_default;
0509 }
0510
0511 return 0;
0512 }
0513 #else
0514 static int nr_cblocks_max = 0;
0515
0516 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
0517 off_t *off __maybe_unused)
0518 {
0519 return -1;
0520 }
0521
0522 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
0523 {
0524 return -1;
0525 }
0526
0527 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
0528 {
0529 }
0530
0531 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
0532 {
0533 }
0534 #endif
0535
0536 static int record__aio_enabled(struct record *rec)
0537 {
0538 return rec->opts.nr_cblocks > 0;
0539 }
0540
0541 #define MMAP_FLUSH_DEFAULT 1
0542 static int record__mmap_flush_parse(const struct option *opt,
0543 const char *str,
0544 int unset)
0545 {
0546 int flush_max;
0547 struct record_opts *opts = (struct record_opts *)opt->value;
0548 static struct parse_tag tags[] = {
0549 { .tag = 'B', .mult = 1 },
0550 { .tag = 'K', .mult = 1 << 10 },
0551 { .tag = 'M', .mult = 1 << 20 },
0552 { .tag = 'G', .mult = 1 << 30 },
0553 { .tag = 0 },
0554 };
0555
0556 if (unset)
0557 return 0;
0558
0559 if (str) {
0560 opts->mmap_flush = parse_tag_value(str, tags);
0561 if (opts->mmap_flush == (int)-1)
0562 opts->mmap_flush = strtol(str, NULL, 0);
0563 }
0564
0565 if (!opts->mmap_flush)
0566 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
0567
0568 flush_max = evlist__mmap_size(opts->mmap_pages);
0569 flush_max /= 4;
0570 if (opts->mmap_flush > flush_max)
0571 opts->mmap_flush = flush_max;
0572
0573 return 0;
0574 }
0575
0576 #ifdef HAVE_ZSTD_SUPPORT
0577 static unsigned int comp_level_default = 1;
0578
0579 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
0580 {
0581 struct record_opts *opts = opt->value;
0582
0583 if (unset) {
0584 opts->comp_level = 0;
0585 } else {
0586 if (str)
0587 opts->comp_level = strtol(str, NULL, 0);
0588 if (!opts->comp_level)
0589 opts->comp_level = comp_level_default;
0590 }
0591
0592 return 0;
0593 }
0594 #endif
0595 static unsigned int comp_level_max = 22;
0596
0597 static int record__comp_enabled(struct record *rec)
0598 {
0599 return rec->opts.comp_level > 0;
0600 }
0601
0602 static int process_synthesized_event(struct perf_tool *tool,
0603 union perf_event *event,
0604 struct perf_sample *sample __maybe_unused,
0605 struct machine *machine __maybe_unused)
0606 {
0607 struct record *rec = container_of(tool, struct record, tool);
0608 return record__write(rec, NULL, event, event->header.size);
0609 }
0610
0611 static int process_locked_synthesized_event(struct perf_tool *tool,
0612 union perf_event *event,
0613 struct perf_sample *sample __maybe_unused,
0614 struct machine *machine __maybe_unused)
0615 {
0616 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
0617 int ret;
0618
0619 pthread_mutex_lock(&synth_lock);
0620 ret = process_synthesized_event(tool, event, sample, machine);
0621 pthread_mutex_unlock(&synth_lock);
0622 return ret;
0623 }
0624
0625 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
0626 {
0627 struct record *rec = to;
0628
0629 if (record__comp_enabled(rec)) {
0630 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
0631 bf = map->data;
0632 }
0633
0634 thread->samples++;
0635 return record__write(rec, map, bf, size);
0636 }
0637
0638 static volatile int signr = -1;
0639 static volatile int child_finished;
0640 #ifdef HAVE_EVENTFD_SUPPORT
0641 static int done_fd = -1;
0642 #endif
0643
0644 static void sig_handler(int sig)
0645 {
0646 if (sig == SIGCHLD)
0647 child_finished = 1;
0648 else
0649 signr = sig;
0650
0651 done = 1;
0652 #ifdef HAVE_EVENTFD_SUPPORT
0653 {
0654 u64 tmp = 1;
0655
0656
0657
0658
0659
0660
0661
0662
0663 if (write(done_fd, &tmp, sizeof(tmp)) < 0)
0664 pr_err("failed to signal wakeup fd, error: %m\n");
0665 }
0666 #endif
0667 }
0668
0669 static void sigsegv_handler(int sig)
0670 {
0671 perf_hooks__recover();
0672 sighandler_dump_stack(sig);
0673 }
0674
0675 static void record__sig_exit(void)
0676 {
0677 if (signr == -1)
0678 return;
0679
0680 signal(signr, SIG_DFL);
0681 raise(signr);
0682 }
0683
0684 #ifdef HAVE_AUXTRACE_SUPPORT
0685
0686 static int record__process_auxtrace(struct perf_tool *tool,
0687 struct mmap *map,
0688 union perf_event *event, void *data1,
0689 size_t len1, void *data2, size_t len2)
0690 {
0691 struct record *rec = container_of(tool, struct record, tool);
0692 struct perf_data *data = &rec->data;
0693 size_t padding;
0694 u8 pad[8] = {0};
0695
0696 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
0697 off_t file_offset;
0698 int fd = perf_data__fd(data);
0699 int err;
0700
0701 file_offset = lseek(fd, 0, SEEK_CUR);
0702 if (file_offset == -1)
0703 return -1;
0704 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
0705 event, file_offset);
0706 if (err)
0707 return err;
0708 }
0709
0710
0711 padding = (len1 + len2) & 7;
0712 if (padding)
0713 padding = 8 - padding;
0714
0715 record__write(rec, map, event, event->header.size);
0716 record__write(rec, map, data1, len1);
0717 if (len2)
0718 record__write(rec, map, data2, len2);
0719 record__write(rec, map, &pad, padding);
0720
0721 return 0;
0722 }
0723
0724 static int record__auxtrace_mmap_read(struct record *rec,
0725 struct mmap *map)
0726 {
0727 int ret;
0728
0729 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
0730 record__process_auxtrace);
0731 if (ret < 0)
0732 return ret;
0733
0734 if (ret)
0735 rec->samples++;
0736
0737 return 0;
0738 }
0739
0740 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
0741 struct mmap *map)
0742 {
0743 int ret;
0744
0745 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
0746 record__process_auxtrace,
0747 rec->opts.auxtrace_snapshot_size);
0748 if (ret < 0)
0749 return ret;
0750
0751 if (ret)
0752 rec->samples++;
0753
0754 return 0;
0755 }
0756
0757 static int record__auxtrace_read_snapshot_all(struct record *rec)
0758 {
0759 int i;
0760 int rc = 0;
0761
0762 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
0763 struct mmap *map = &rec->evlist->mmap[i];
0764
0765 if (!map->auxtrace_mmap.base)
0766 continue;
0767
0768 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
0769 rc = -1;
0770 goto out;
0771 }
0772 }
0773 out:
0774 return rc;
0775 }
0776
0777 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
0778 {
0779 pr_debug("Recording AUX area tracing snapshot\n");
0780 if (record__auxtrace_read_snapshot_all(rec) < 0) {
0781 trigger_error(&auxtrace_snapshot_trigger);
0782 } else {
0783 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
0784 trigger_error(&auxtrace_snapshot_trigger);
0785 else
0786 trigger_ready(&auxtrace_snapshot_trigger);
0787 }
0788 }
0789
0790 static int record__auxtrace_snapshot_exit(struct record *rec)
0791 {
0792 if (trigger_is_error(&auxtrace_snapshot_trigger))
0793 return 0;
0794
0795 if (!auxtrace_record__snapshot_started &&
0796 auxtrace_record__snapshot_start(rec->itr))
0797 return -1;
0798
0799 record__read_auxtrace_snapshot(rec, true);
0800 if (trigger_is_error(&auxtrace_snapshot_trigger))
0801 return -1;
0802
0803 return 0;
0804 }
0805
0806 static int record__auxtrace_init(struct record *rec)
0807 {
0808 int err;
0809
0810 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
0811 && record__threads_enabled(rec)) {
0812 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
0813 return -EINVAL;
0814 }
0815
0816 if (!rec->itr) {
0817 rec->itr = auxtrace_record__init(rec->evlist, &err);
0818 if (err)
0819 return err;
0820 }
0821
0822 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
0823 rec->opts.auxtrace_snapshot_opts);
0824 if (err)
0825 return err;
0826
0827 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
0828 rec->opts.auxtrace_sample_opts);
0829 if (err)
0830 return err;
0831
0832 auxtrace_regroup_aux_output(rec->evlist);
0833
0834 return auxtrace_parse_filters(rec->evlist);
0835 }
0836
0837 #else
0838
0839 static inline
0840 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
0841 struct mmap *map __maybe_unused)
0842 {
0843 return 0;
0844 }
0845
0846 static inline
0847 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
0848 bool on_exit __maybe_unused)
0849 {
0850 }
0851
0852 static inline
0853 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
0854 {
0855 return 0;
0856 }
0857
0858 static inline
0859 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
0860 {
0861 return 0;
0862 }
0863
0864 static int record__auxtrace_init(struct record *rec __maybe_unused)
0865 {
0866 return 0;
0867 }
0868
0869 #endif
0870
0871 static int record__config_text_poke(struct evlist *evlist)
0872 {
0873 struct evsel *evsel;
0874
0875
0876 evlist__for_each_entry(evlist, evsel) {
0877 if (evsel->core.attr.text_poke)
0878 return 0;
0879 }
0880
0881 evsel = evlist__add_dummy_on_all_cpus(evlist);
0882 if (!evsel)
0883 return -ENOMEM;
0884
0885 evsel->core.attr.text_poke = 1;
0886 evsel->core.attr.ksymbol = 1;
0887 evsel->immediate = true;
0888 evsel__set_sample_bit(evsel, TIME);
0889
0890 return 0;
0891 }
0892
0893 static int record__config_off_cpu(struct record *rec)
0894 {
0895 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
0896 }
0897
0898 static bool record__kcore_readable(struct machine *machine)
0899 {
0900 char kcore[PATH_MAX];
0901 int fd;
0902
0903 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
0904
0905 fd = open(kcore, O_RDONLY);
0906 if (fd < 0)
0907 return false;
0908
0909 close(fd);
0910
0911 return true;
0912 }
0913
0914 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
0915 {
0916 char from_dir[PATH_MAX];
0917 char kcore_dir[PATH_MAX];
0918 int ret;
0919
0920 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
0921
0922 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
0923 if (ret)
0924 return ret;
0925
0926 return kcore_copy(from_dir, kcore_dir);
0927 }
0928
0929 static void record__thread_data_init_pipes(struct record_thread *thread_data)
0930 {
0931 thread_data->pipes.msg[0] = -1;
0932 thread_data->pipes.msg[1] = -1;
0933 thread_data->pipes.ack[0] = -1;
0934 thread_data->pipes.ack[1] = -1;
0935 }
0936
0937 static int record__thread_data_open_pipes(struct record_thread *thread_data)
0938 {
0939 if (pipe(thread_data->pipes.msg))
0940 return -EINVAL;
0941
0942 if (pipe(thread_data->pipes.ack)) {
0943 close(thread_data->pipes.msg[0]);
0944 thread_data->pipes.msg[0] = -1;
0945 close(thread_data->pipes.msg[1]);
0946 thread_data->pipes.msg[1] = -1;
0947 return -EINVAL;
0948 }
0949
0950 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
0951 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
0952 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
0953
0954 return 0;
0955 }
0956
0957 static void record__thread_data_close_pipes(struct record_thread *thread_data)
0958 {
0959 if (thread_data->pipes.msg[0] != -1) {
0960 close(thread_data->pipes.msg[0]);
0961 thread_data->pipes.msg[0] = -1;
0962 }
0963 if (thread_data->pipes.msg[1] != -1) {
0964 close(thread_data->pipes.msg[1]);
0965 thread_data->pipes.msg[1] = -1;
0966 }
0967 if (thread_data->pipes.ack[0] != -1) {
0968 close(thread_data->pipes.ack[0]);
0969 thread_data->pipes.ack[0] = -1;
0970 }
0971 if (thread_data->pipes.ack[1] != -1) {
0972 close(thread_data->pipes.ack[1]);
0973 thread_data->pipes.ack[1] = -1;
0974 }
0975 }
0976
0977 static bool evlist__per_thread(struct evlist *evlist)
0978 {
0979 return cpu_map__is_dummy(evlist->core.user_requested_cpus);
0980 }
0981
0982 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
0983 {
0984 int m, tm, nr_mmaps = evlist->core.nr_mmaps;
0985 struct mmap *mmap = evlist->mmap;
0986 struct mmap *overwrite_mmap = evlist->overwrite_mmap;
0987 struct perf_cpu_map *cpus = evlist->core.all_cpus;
0988 bool per_thread = evlist__per_thread(evlist);
0989
0990 if (per_thread)
0991 thread_data->nr_mmaps = nr_mmaps;
0992 else
0993 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
0994 thread_data->mask->maps.nbits);
0995 if (mmap) {
0996 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
0997 if (!thread_data->maps)
0998 return -ENOMEM;
0999 }
1000 if (overwrite_mmap) {
1001 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1002 if (!thread_data->overwrite_maps) {
1003 zfree(&thread_data->maps);
1004 return -ENOMEM;
1005 }
1006 }
1007 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1008 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1009
1010 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1011 if (per_thread ||
1012 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1013 if (thread_data->maps) {
1014 thread_data->maps[tm] = &mmap[m];
1015 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1016 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1017 }
1018 if (thread_data->overwrite_maps) {
1019 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1020 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1021 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1022 }
1023 tm++;
1024 }
1025 }
1026
1027 return 0;
1028 }
1029
1030 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1031 {
1032 int f, tm, pos;
1033 struct mmap *map, *overwrite_map;
1034
1035 fdarray__init(&thread_data->pollfd, 64);
1036
1037 for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1038 map = thread_data->maps ? thread_data->maps[tm] : NULL;
1039 overwrite_map = thread_data->overwrite_maps ?
1040 thread_data->overwrite_maps[tm] : NULL;
1041
1042 for (f = 0; f < evlist->core.pollfd.nr; f++) {
1043 void *ptr = evlist->core.pollfd.priv[f].ptr;
1044
1045 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1046 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1047 &evlist->core.pollfd);
1048 if (pos < 0)
1049 return pos;
1050 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1051 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1052 }
1053 }
1054 }
1055
1056 return 0;
1057 }
1058
1059 static void record__free_thread_data(struct record *rec)
1060 {
1061 int t;
1062 struct record_thread *thread_data = rec->thread_data;
1063
1064 if (thread_data == NULL)
1065 return;
1066
1067 for (t = 0; t < rec->nr_threads; t++) {
1068 record__thread_data_close_pipes(&thread_data[t]);
1069 zfree(&thread_data[t].maps);
1070 zfree(&thread_data[t].overwrite_maps);
1071 fdarray__exit(&thread_data[t].pollfd);
1072 }
1073
1074 zfree(&rec->thread_data);
1075 }
1076
1077 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1078 {
1079 int t, ret;
1080 struct record_thread *thread_data;
1081
1082 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1083 if (!rec->thread_data) {
1084 pr_err("Failed to allocate thread data\n");
1085 return -ENOMEM;
1086 }
1087 thread_data = rec->thread_data;
1088
1089 for (t = 0; t < rec->nr_threads; t++)
1090 record__thread_data_init_pipes(&thread_data[t]);
1091
1092 for (t = 0; t < rec->nr_threads; t++) {
1093 thread_data[t].rec = rec;
1094 thread_data[t].mask = &rec->thread_masks[t];
1095 ret = record__thread_data_init_maps(&thread_data[t], evlist);
1096 if (ret) {
1097 pr_err("Failed to initialize thread[%d] maps\n", t);
1098 goto out_free;
1099 }
1100 ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1101 if (ret) {
1102 pr_err("Failed to initialize thread[%d] pollfd\n", t);
1103 goto out_free;
1104 }
1105 if (t) {
1106 thread_data[t].tid = -1;
1107 ret = record__thread_data_open_pipes(&thread_data[t]);
1108 if (ret) {
1109 pr_err("Failed to open thread[%d] communication pipes\n", t);
1110 goto out_free;
1111 }
1112 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1113 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1114 if (ret < 0) {
1115 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1116 goto out_free;
1117 }
1118 thread_data[t].ctlfd_pos = ret;
1119 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1120 thread_data, thread_data[t].ctlfd_pos,
1121 thread_data[t].pipes.msg[0]);
1122 } else {
1123 thread_data[t].tid = gettid();
1124 if (evlist->ctl_fd.pos == -1)
1125 continue;
1126 ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
1127 &evlist->core.pollfd);
1128 if (ret < 0) {
1129 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1130 goto out_free;
1131 }
1132 thread_data[t].ctlfd_pos = ret;
1133 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1134 thread_data, thread_data[t].ctlfd_pos,
1135 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
1136 }
1137 }
1138
1139 return 0;
1140
1141 out_free:
1142 record__free_thread_data(rec);
1143
1144 return ret;
1145 }
1146
1147 static int record__mmap_evlist(struct record *rec,
1148 struct evlist *evlist)
1149 {
1150 int i, ret;
1151 struct record_opts *opts = &rec->opts;
1152 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1153 opts->auxtrace_sample_mode;
1154 char msg[512];
1155
1156 if (opts->affinity != PERF_AFFINITY_SYS)
1157 cpu__setup_cpunode_map();
1158
1159 if (evlist__mmap_ex(evlist, opts->mmap_pages,
1160 opts->auxtrace_mmap_pages,
1161 auxtrace_overwrite,
1162 opts->nr_cblocks, opts->affinity,
1163 opts->mmap_flush, opts->comp_level) < 0) {
1164 if (errno == EPERM) {
1165 pr_err("Permission error mapping pages.\n"
1166 "Consider increasing "
1167 "/proc/sys/kernel/perf_event_mlock_kb,\n"
1168 "or try again with a smaller value of -m/--mmap_pages.\n"
1169 "(current value: %u,%u)\n",
1170 opts->mmap_pages, opts->auxtrace_mmap_pages);
1171 return -errno;
1172 } else {
1173 pr_err("failed to mmap with %d (%s)\n", errno,
1174 str_error_r(errno, msg, sizeof(msg)));
1175 if (errno)
1176 return -errno;
1177 else
1178 return -EINVAL;
1179 }
1180 }
1181
1182 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1183 return -1;
1184
1185 ret = record__alloc_thread_data(rec, evlist);
1186 if (ret)
1187 return ret;
1188
1189 if (record__threads_enabled(rec)) {
1190 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1191 if (ret) {
1192 pr_err("Failed to create data directory: %s\n", strerror(-ret));
1193 return ret;
1194 }
1195 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1196 if (evlist->mmap)
1197 evlist->mmap[i].file = &rec->data.dir.files[i];
1198 if (evlist->overwrite_mmap)
1199 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1200 }
1201 }
1202
1203 return 0;
1204 }
1205
1206 static int record__mmap(struct record *rec)
1207 {
1208 return record__mmap_evlist(rec, rec->evlist);
1209 }
1210
1211 static int record__open(struct record *rec)
1212 {
1213 char msg[BUFSIZ];
1214 struct evsel *pos;
1215 struct evlist *evlist = rec->evlist;
1216 struct perf_session *session = rec->session;
1217 struct record_opts *opts = &rec->opts;
1218 int rc = 0;
1219
1220
1221
1222
1223
1224
1225 if (opts->initial_delay || target__has_cpu(&opts->target) ||
1226 perf_pmu__has_hybrid()) {
1227 pos = evlist__get_tracking_event(evlist);
1228 if (!evsel__is_dummy_event(pos)) {
1229
1230 if (evlist__add_dummy(evlist))
1231 return -ENOMEM;
1232 pos = evlist__last(evlist);
1233 evlist__set_tracking_event(evlist, pos);
1234 }
1235
1236
1237
1238
1239
1240 if (opts->initial_delay && !pos->immediate &&
1241 !target__has_cpu(&opts->target))
1242 pos->core.attr.enable_on_exec = 1;
1243 else
1244 pos->immediate = 1;
1245 }
1246
1247 evlist__config(evlist, opts, &callchain_param);
1248
1249 evlist__for_each_entry(evlist, pos) {
1250 try_again:
1251 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1252 if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1253 if (verbose > 0)
1254 ui__warning("%s\n", msg);
1255 goto try_again;
1256 }
1257 if ((errno == EINVAL || errno == EBADF) &&
1258 pos->core.leader != &pos->core &&
1259 pos->weak_group) {
1260 pos = evlist__reset_weak_group(evlist, pos, true);
1261 goto try_again;
1262 }
1263 rc = -errno;
1264 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1265 ui__error("%s\n", msg);
1266 goto out;
1267 }
1268
1269 pos->supported = true;
1270 }
1271
1272 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1273 pr_warning(
1274 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1275 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1276 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1277 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1278 "Samples in kernel modules won't be resolved at all.\n\n"
1279 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1280 "even with a suitable vmlinux or kallsyms file.\n\n");
1281 }
1282
1283 if (evlist__apply_filters(evlist, &pos)) {
1284 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1285 pos->filter, evsel__name(pos), errno,
1286 str_error_r(errno, msg, sizeof(msg)));
1287 rc = -1;
1288 goto out;
1289 }
1290
1291 rc = record__mmap(rec);
1292 if (rc)
1293 goto out;
1294
1295 session->evlist = evlist;
1296 perf_session__set_id_hdr_size(session);
1297 out:
1298 return rc;
1299 }
1300
1301 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1302 {
1303 if (rec->evlist->first_sample_time == 0)
1304 rec->evlist->first_sample_time = sample_time;
1305
1306 if (sample_time)
1307 rec->evlist->last_sample_time = sample_time;
1308 }
1309
1310 static int process_sample_event(struct perf_tool *tool,
1311 union perf_event *event,
1312 struct perf_sample *sample,
1313 struct evsel *evsel,
1314 struct machine *machine)
1315 {
1316 struct record *rec = container_of(tool, struct record, tool);
1317
1318 set_timestamp_boundary(rec, sample->time);
1319
1320 if (rec->buildid_all)
1321 return 0;
1322
1323 rec->samples++;
1324 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1325 }
1326
1327 static int process_buildids(struct record *rec)
1328 {
1329 struct perf_session *session = rec->session;
1330
1331 if (perf_data__size(&rec->data) == 0)
1332 return 0;
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343 symbol_conf.ignore_vmlinux_buildid = true;
1344
1345
1346
1347
1348
1349
1350
1351 if (rec->buildid_all && !rec->timestamp_boundary)
1352 rec->tool.sample = NULL;
1353
1354 return perf_session__process_events(session);
1355 }
1356
1357 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1358 {
1359 int err;
1360 struct perf_tool *tool = data;
1361
1362
1363
1364
1365
1366
1367
1368
1369 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1370 machine);
1371 if (err < 0)
1372 pr_err("Couldn't record guest kernel [%d]'s reference"
1373 " relocation symbol.\n", machine->pid);
1374
1375
1376
1377
1378
1379 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1380 machine);
1381 if (err < 0)
1382 pr_err("Couldn't record guest kernel [%d]'s reference"
1383 " relocation symbol.\n", machine->pid);
1384 }
1385
1386 static struct perf_event_header finished_round_event = {
1387 .size = sizeof(struct perf_event_header),
1388 .type = PERF_RECORD_FINISHED_ROUND,
1389 };
1390
1391 static struct perf_event_header finished_init_event = {
1392 .size = sizeof(struct perf_event_header),
1393 .type = PERF_RECORD_FINISHED_INIT,
1394 };
1395
1396 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1397 {
1398 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1399 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1400 thread->mask->affinity.nbits)) {
1401 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1402 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1403 map->affinity_mask.bits, thread->mask->affinity.nbits);
1404 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1405 (cpu_set_t *)thread->mask->affinity.bits);
1406 if (verbose == 2) {
1407 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1408 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1409 }
1410 }
1411 }
1412
1413 static size_t process_comp_header(void *record, size_t increment)
1414 {
1415 struct perf_record_compressed *event = record;
1416 size_t size = sizeof(*event);
1417
1418 if (increment) {
1419 event->header.size += increment;
1420 return increment;
1421 }
1422
1423 event->header.type = PERF_RECORD_COMPRESSED;
1424 event->header.size = size;
1425
1426 return size;
1427 }
1428
1429 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1430 void *dst, size_t dst_size, void *src, size_t src_size)
1431 {
1432 size_t compressed;
1433 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1434 struct zstd_data *zstd_data = &session->zstd_data;
1435
1436 if (map && map->file)
1437 zstd_data = &map->zstd_data;
1438
1439 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1440 max_record_size, process_comp_header);
1441
1442 if (map && map->file) {
1443 thread->bytes_transferred += src_size;
1444 thread->bytes_compressed += compressed;
1445 } else {
1446 session->bytes_transferred += src_size;
1447 session->bytes_compressed += compressed;
1448 }
1449
1450 return compressed;
1451 }
1452
1453 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1454 bool overwrite, bool synch)
1455 {
1456 u64 bytes_written = rec->bytes_written;
1457 int i;
1458 int rc = 0;
1459 int nr_mmaps;
1460 struct mmap **maps;
1461 int trace_fd = rec->data.file.fd;
1462 off_t off = 0;
1463
1464 if (!evlist)
1465 return 0;
1466
1467 nr_mmaps = thread->nr_mmaps;
1468 maps = overwrite ? thread->overwrite_maps : thread->maps;
1469
1470 if (!maps)
1471 return 0;
1472
1473 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1474 return 0;
1475
1476 if (record__aio_enabled(rec))
1477 off = record__aio_get_pos(trace_fd);
1478
1479 for (i = 0; i < nr_mmaps; i++) {
1480 u64 flush = 0;
1481 struct mmap *map = maps[i];
1482
1483 if (map->core.base) {
1484 record__adjust_affinity(rec, map);
1485 if (synch) {
1486 flush = map->core.flush;
1487 map->core.flush = 1;
1488 }
1489 if (!record__aio_enabled(rec)) {
1490 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1491 if (synch)
1492 map->core.flush = flush;
1493 rc = -1;
1494 goto out;
1495 }
1496 } else {
1497 if (record__aio_push(rec, map, &off) < 0) {
1498 record__aio_set_pos(trace_fd, off);
1499 if (synch)
1500 map->core.flush = flush;
1501 rc = -1;
1502 goto out;
1503 }
1504 }
1505 if (synch)
1506 map->core.flush = flush;
1507 }
1508
1509 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1510 !rec->opts.auxtrace_sample_mode &&
1511 record__auxtrace_mmap_read(rec, map) != 0) {
1512 rc = -1;
1513 goto out;
1514 }
1515 }
1516
1517 if (record__aio_enabled(rec))
1518 record__aio_set_pos(trace_fd, off);
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1529 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1530
1531 if (overwrite)
1532 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1533 out:
1534 return rc;
1535 }
1536
1537 static int record__mmap_read_all(struct record *rec, bool synch)
1538 {
1539 int err;
1540
1541 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1542 if (err)
1543 return err;
1544
1545 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1546 }
1547
1548 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1549 void *arg __maybe_unused)
1550 {
1551 struct perf_mmap *map = fda->priv[fd].ptr;
1552
1553 if (map)
1554 perf_mmap__put(map);
1555 }
1556
1557 static void *record__thread(void *arg)
1558 {
1559 enum thread_msg msg = THREAD_MSG__READY;
1560 bool terminate = false;
1561 struct fdarray *pollfd;
1562 int err, ctlfd_pos;
1563
1564 thread = arg;
1565 thread->tid = gettid();
1566
1567 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1568 if (err == -1)
1569 pr_warning("threads[%d]: failed to notify on start: %s\n",
1570 thread->tid, strerror(errno));
1571
1572 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1573
1574 pollfd = &thread->pollfd;
1575 ctlfd_pos = thread->ctlfd_pos;
1576
1577 for (;;) {
1578 unsigned long long hits = thread->samples;
1579
1580 if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1581 break;
1582
1583 if (hits == thread->samples) {
1584
1585 err = fdarray__poll(pollfd, -1);
1586
1587
1588
1589
1590 if (err > 0 || (err < 0 && errno == EINTR))
1591 err = 0;
1592 thread->waking++;
1593
1594 if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1595 record__thread_munmap_filtered, NULL) == 0)
1596 break;
1597 }
1598
1599 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1600 terminate = true;
1601 close(thread->pipes.msg[0]);
1602 thread->pipes.msg[0] = -1;
1603 pollfd->entries[ctlfd_pos].fd = -1;
1604 pollfd->entries[ctlfd_pos].events = 0;
1605 }
1606
1607 pollfd->entries[ctlfd_pos].revents = 0;
1608 }
1609 record__mmap_read_all(thread->rec, true);
1610
1611 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1612 if (err == -1)
1613 pr_warning("threads[%d]: failed to notify on termination: %s\n",
1614 thread->tid, strerror(errno));
1615
1616 return NULL;
1617 }
1618
1619 static void record__init_features(struct record *rec)
1620 {
1621 struct perf_session *session = rec->session;
1622 int feat;
1623
1624 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1625 perf_header__set_feat(&session->header, feat);
1626
1627 if (rec->no_buildid)
1628 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1629
1630 if (!have_tracepoints(&rec->evlist->core.entries))
1631 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1632
1633 if (!rec->opts.branch_stack)
1634 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1635
1636 if (!rec->opts.full_auxtrace)
1637 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1638
1639 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1640 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1641
1642 if (!rec->opts.use_clockid)
1643 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1644
1645 if (!record__threads_enabled(rec))
1646 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1647
1648 if (!record__comp_enabled(rec))
1649 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1650
1651 perf_header__clear_feat(&session->header, HEADER_STAT);
1652 }
1653
1654 static void
1655 record__finish_output(struct record *rec)
1656 {
1657 int i;
1658 struct perf_data *data = &rec->data;
1659 int fd = perf_data__fd(data);
1660
1661 if (data->is_pipe)
1662 return;
1663
1664 rec->session->header.data_size += rec->bytes_written;
1665 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1666 if (record__threads_enabled(rec)) {
1667 for (i = 0; i < data->dir.nr; i++)
1668 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1669 }
1670
1671 if (!rec->no_buildid) {
1672 process_buildids(rec);
1673
1674 if (rec->buildid_all)
1675 dsos__hit_all(rec->session);
1676 }
1677 perf_session__write_header(rec->session, rec->evlist, fd, true);
1678
1679 return;
1680 }
1681
1682 static int record__synthesize_workload(struct record *rec, bool tail)
1683 {
1684 int err;
1685 struct perf_thread_map *thread_map;
1686 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1687
1688 if (rec->opts.tail_synthesize != tail)
1689 return 0;
1690
1691 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1692 if (thread_map == NULL)
1693 return -1;
1694
1695 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1696 process_synthesized_event,
1697 &rec->session->machines.host,
1698 needs_mmap,
1699 rec->opts.sample_address);
1700 perf_thread_map__put(thread_map);
1701 return err;
1702 }
1703
1704 static int write_finished_init(struct record *rec, bool tail)
1705 {
1706 if (rec->opts.tail_synthesize != tail)
1707 return 0;
1708
1709 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1710 }
1711
1712 static int record__synthesize(struct record *rec, bool tail);
1713
1714 static int
1715 record__switch_output(struct record *rec, bool at_exit)
1716 {
1717 struct perf_data *data = &rec->data;
1718 int fd, err;
1719 char *new_filename;
1720
1721
1722 char timestamp[] = "InvalidTimestamp";
1723
1724 record__aio_mmap_read_sync(rec);
1725
1726 write_finished_init(rec, true);
1727
1728 record__synthesize(rec, true);
1729 if (target__none(&rec->opts.target))
1730 record__synthesize_workload(rec, true);
1731
1732 rec->samples = 0;
1733 record__finish_output(rec);
1734 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1735 if (err) {
1736 pr_err("Failed to get current timestamp\n");
1737 return -EINVAL;
1738 }
1739
1740 fd = perf_data__switch(data, timestamp,
1741 rec->session->header.data_offset,
1742 at_exit, &new_filename);
1743 if (fd >= 0 && !at_exit) {
1744 rec->bytes_written = 0;
1745 rec->session->header.data_size = 0;
1746 }
1747
1748 if (!quiet)
1749 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1750 data->path, timestamp);
1751
1752 if (rec->switch_output.num_files) {
1753 int n = rec->switch_output.cur_file + 1;
1754
1755 if (n >= rec->switch_output.num_files)
1756 n = 0;
1757 rec->switch_output.cur_file = n;
1758 if (rec->switch_output.filenames[n]) {
1759 remove(rec->switch_output.filenames[n]);
1760 zfree(&rec->switch_output.filenames[n]);
1761 }
1762 rec->switch_output.filenames[n] = new_filename;
1763 } else {
1764 free(new_filename);
1765 }
1766
1767
1768 if (!at_exit) {
1769 record__synthesize(rec, false);
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780 if (target__none(&rec->opts.target))
1781 record__synthesize_workload(rec, false);
1782 write_finished_init(rec, false);
1783 }
1784 return fd;
1785 }
1786
1787 static volatile int workload_exec_errno;
1788
1789
1790
1791
1792
1793
1794 static void workload_exec_failed_signal(int signo __maybe_unused,
1795 siginfo_t *info,
1796 void *ucontext __maybe_unused)
1797 {
1798 workload_exec_errno = info->si_value.sival_int;
1799 done = 1;
1800 child_finished = 1;
1801 }
1802
1803 static void snapshot_sig_handler(int sig);
1804 static void alarm_sig_handler(int sig);
1805
1806 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1807 {
1808 if (evlist) {
1809 if (evlist->mmap && evlist->mmap[0].core.base)
1810 return evlist->mmap[0].core.base;
1811 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1812 return evlist->overwrite_mmap[0].core.base;
1813 }
1814 return NULL;
1815 }
1816
1817 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1818 {
1819 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1820 if (pc)
1821 return pc;
1822 return NULL;
1823 }
1824
1825 static int record__synthesize(struct record *rec, bool tail)
1826 {
1827 struct perf_session *session = rec->session;
1828 struct machine *machine = &session->machines.host;
1829 struct perf_data *data = &rec->data;
1830 struct record_opts *opts = &rec->opts;
1831 struct perf_tool *tool = &rec->tool;
1832 int err = 0;
1833 event_op f = process_synthesized_event;
1834
1835 if (rec->opts.tail_synthesize != tail)
1836 return 0;
1837
1838 if (data->is_pipe) {
1839 err = perf_event__synthesize_for_pipe(tool, session, data,
1840 process_synthesized_event);
1841 if (err < 0)
1842 goto out;
1843
1844 rec->bytes_written += err;
1845 }
1846
1847 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1848 process_synthesized_event, machine);
1849 if (err)
1850 goto out;
1851
1852
1853 err = perf_event__synthesize_id_index(tool,
1854 process_synthesized_event,
1855 session->evlist, machine);
1856 if (err)
1857 goto out;
1858
1859 if (rec->opts.full_auxtrace) {
1860 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1861 session, process_synthesized_event);
1862 if (err)
1863 goto out;
1864 }
1865
1866 if (!evlist__exclude_kernel(rec->evlist)) {
1867 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1868 machine);
1869 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1870 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1871 "Check /proc/kallsyms permission or run as root.\n");
1872
1873 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1874 machine);
1875 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1876 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1877 "Check /proc/modules permission or run as root.\n");
1878 }
1879
1880 if (perf_guest) {
1881 machines__process_guests(&session->machines,
1882 perf_event__synthesize_guest_os, tool);
1883 }
1884
1885 err = perf_event__synthesize_extra_attr(&rec->tool,
1886 rec->evlist,
1887 process_synthesized_event,
1888 data->is_pipe);
1889 if (err)
1890 goto out;
1891
1892 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1893 process_synthesized_event,
1894 NULL);
1895 if (err < 0) {
1896 pr_err("Couldn't synthesize thread map.\n");
1897 return err;
1898 }
1899
1900 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
1901 process_synthesized_event, NULL);
1902 if (err < 0) {
1903 pr_err("Couldn't synthesize cpu map.\n");
1904 return err;
1905 }
1906
1907 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1908 machine, opts);
1909 if (err < 0) {
1910 pr_warning("Couldn't synthesize bpf events.\n");
1911 err = 0;
1912 }
1913
1914 if (rec->opts.synth & PERF_SYNTH_CGROUP) {
1915 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1916 machine);
1917 if (err < 0) {
1918 pr_warning("Couldn't synthesize cgroup events.\n");
1919 err = 0;
1920 }
1921 }
1922
1923 if (rec->opts.nr_threads_synthesize > 1) {
1924 perf_set_multithreaded();
1925 f = process_locked_synthesized_event;
1926 }
1927
1928 if (rec->opts.synth & PERF_SYNTH_TASK) {
1929 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1930
1931 err = __machine__synthesize_threads(machine, tool, &opts->target,
1932 rec->evlist->core.threads,
1933 f, needs_mmap, opts->sample_address,
1934 rec->opts.nr_threads_synthesize);
1935 }
1936
1937 if (rec->opts.nr_threads_synthesize > 1)
1938 perf_set_singlethreaded();
1939
1940 out:
1941 return err;
1942 }
1943
1944 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1945 {
1946 struct record *rec = data;
1947 pthread_kill(rec->thread_id, SIGUSR2);
1948 return 0;
1949 }
1950
1951 static int record__setup_sb_evlist(struct record *rec)
1952 {
1953 struct record_opts *opts = &rec->opts;
1954
1955 if (rec->sb_evlist != NULL) {
1956
1957
1958
1959
1960
1961 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1962 rec->thread_id = pthread_self();
1963 }
1964 #ifdef HAVE_LIBBPF_SUPPORT
1965 if (!opts->no_bpf_event) {
1966 if (rec->sb_evlist == NULL) {
1967 rec->sb_evlist = evlist__new();
1968
1969 if (rec->sb_evlist == NULL) {
1970 pr_err("Couldn't create side band evlist.\n.");
1971 return -1;
1972 }
1973 }
1974
1975 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1976 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1977 return -1;
1978 }
1979 }
1980 #endif
1981 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1982 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1983 opts->no_bpf_event = true;
1984 }
1985
1986 return 0;
1987 }
1988
1989 static int record__init_clock(struct record *rec)
1990 {
1991 struct perf_session *session = rec->session;
1992 struct timespec ref_clockid;
1993 struct timeval ref_tod;
1994 u64 ref;
1995
1996 if (!rec->opts.use_clockid)
1997 return 0;
1998
1999 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2000 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
2001
2002 session->header.env.clock.clockid = rec->opts.clockid;
2003
2004 if (gettimeofday(&ref_tod, NULL) != 0) {
2005 pr_err("gettimeofday failed, cannot set reference time.\n");
2006 return -1;
2007 }
2008
2009 if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2010 pr_err("clock_gettime failed, cannot set reference time.\n");
2011 return -1;
2012 }
2013
2014 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2015 (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2016
2017 session->header.env.clock.tod_ns = ref;
2018
2019 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2020 (u64) ref_clockid.tv_nsec;
2021
2022 session->header.env.clock.clockid_ns = ref;
2023 return 0;
2024 }
2025
2026 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2027 {
2028 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2029 trigger_hit(&auxtrace_snapshot_trigger);
2030 auxtrace_record__snapshot_started = 1;
2031 if (auxtrace_record__snapshot_start(rec->itr))
2032 trigger_error(&auxtrace_snapshot_trigger);
2033 }
2034 }
2035
2036 static void record__uniquify_name(struct record *rec)
2037 {
2038 struct evsel *pos;
2039 struct evlist *evlist = rec->evlist;
2040 char *new_name;
2041 int ret;
2042
2043 if (!perf_pmu__has_hybrid())
2044 return;
2045
2046 evlist__for_each_entry(evlist, pos) {
2047 if (!evsel__is_hybrid(pos))
2048 continue;
2049
2050 if (strchr(pos->name, '/'))
2051 continue;
2052
2053 ret = asprintf(&new_name, "%s/%s/",
2054 pos->pmu_name, pos->name);
2055 if (ret) {
2056 free(pos->name);
2057 pos->name = new_name;
2058 }
2059 }
2060 }
2061
2062 static int record__terminate_thread(struct record_thread *thread_data)
2063 {
2064 int err;
2065 enum thread_msg ack = THREAD_MSG__UNDEFINED;
2066 pid_t tid = thread_data->tid;
2067
2068 close(thread_data->pipes.msg[1]);
2069 thread_data->pipes.msg[1] = -1;
2070 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2071 if (err > 0)
2072 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2073 else
2074 pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2075 thread->tid, tid);
2076
2077 return 0;
2078 }
2079
2080 static int record__start_threads(struct record *rec)
2081 {
2082 int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2083 struct record_thread *thread_data = rec->thread_data;
2084 sigset_t full, mask;
2085 pthread_t handle;
2086 pthread_attr_t attrs;
2087
2088 thread = &thread_data[0];
2089
2090 if (!record__threads_enabled(rec))
2091 return 0;
2092
2093 sigfillset(&full);
2094 if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2095 pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2096 return -1;
2097 }
2098
2099 pthread_attr_init(&attrs);
2100 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2101
2102 for (t = 1; t < nr_threads; t++) {
2103 enum thread_msg msg = THREAD_MSG__UNDEFINED;
2104
2105 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2106 pthread_attr_setaffinity_np(&attrs,
2107 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2108 (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2109 #endif
2110 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2111 for (tt = 1; tt < t; tt++)
2112 record__terminate_thread(&thread_data[t]);
2113 pr_err("Failed to start threads: %s\n", strerror(errno));
2114 ret = -1;
2115 goto out_err;
2116 }
2117
2118 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2119 if (err > 0)
2120 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2121 thread_msg_tags[msg]);
2122 else
2123 pr_warning("threads[%d]: failed to receive start notification from %d\n",
2124 thread->tid, rec->thread_data[t].tid);
2125 }
2126
2127 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2128 (cpu_set_t *)thread->mask->affinity.bits);
2129
2130 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2131
2132 out_err:
2133 pthread_attr_destroy(&attrs);
2134
2135 if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2136 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2137 ret = -1;
2138 }
2139
2140 return ret;
2141 }
2142
2143 static int record__stop_threads(struct record *rec)
2144 {
2145 int t;
2146 struct record_thread *thread_data = rec->thread_data;
2147
2148 for (t = 1; t < rec->nr_threads; t++)
2149 record__terminate_thread(&thread_data[t]);
2150
2151 for (t = 0; t < rec->nr_threads; t++) {
2152 rec->samples += thread_data[t].samples;
2153 if (!record__threads_enabled(rec))
2154 continue;
2155 rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2156 rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2157 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2158 thread_data[t].samples, thread_data[t].waking);
2159 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2160 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2161 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2162 else
2163 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2164 }
2165
2166 return 0;
2167 }
2168
2169 static unsigned long record__waking(struct record *rec)
2170 {
2171 int t;
2172 unsigned long waking = 0;
2173 struct record_thread *thread_data = rec->thread_data;
2174
2175 for (t = 0; t < rec->nr_threads; t++)
2176 waking += thread_data[t].waking;
2177
2178 return waking;
2179 }
2180
2181 static int __cmd_record(struct record *rec, int argc, const char **argv)
2182 {
2183 int err;
2184 int status = 0;
2185 const bool forks = argc > 0;
2186 struct perf_tool *tool = &rec->tool;
2187 struct record_opts *opts = &rec->opts;
2188 struct perf_data *data = &rec->data;
2189 struct perf_session *session;
2190 bool disabled = false, draining = false;
2191 int fd;
2192 float ratio = 0;
2193 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2194
2195 atexit(record__sig_exit);
2196 signal(SIGCHLD, sig_handler);
2197 signal(SIGINT, sig_handler);
2198 signal(SIGTERM, sig_handler);
2199 signal(SIGSEGV, sigsegv_handler);
2200
2201 if (rec->opts.record_namespaces)
2202 tool->namespace_events = true;
2203
2204 if (rec->opts.record_cgroup) {
2205 #ifdef HAVE_FILE_HANDLE
2206 tool->cgroup_events = true;
2207 #else
2208 pr_err("cgroup tracking is not supported\n");
2209 return -1;
2210 #endif
2211 }
2212
2213 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2214 signal(SIGUSR2, snapshot_sig_handler);
2215 if (rec->opts.auxtrace_snapshot_mode)
2216 trigger_on(&auxtrace_snapshot_trigger);
2217 if (rec->switch_output.enabled)
2218 trigger_on(&switch_output_trigger);
2219 } else {
2220 signal(SIGUSR2, SIG_IGN);
2221 }
2222
2223 session = perf_session__new(data, tool);
2224 if (IS_ERR(session)) {
2225 pr_err("Perf session creation failed.\n");
2226 return PTR_ERR(session);
2227 }
2228
2229 if (record__threads_enabled(rec)) {
2230 if (perf_data__is_pipe(&rec->data)) {
2231 pr_err("Parallel trace streaming is not available in pipe mode.\n");
2232 return -1;
2233 }
2234 if (rec->opts.full_auxtrace) {
2235 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2236 return -1;
2237 }
2238 }
2239
2240 fd = perf_data__fd(data);
2241 rec->session = session;
2242
2243 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2244 pr_err("Compression initialization failed.\n");
2245 return -1;
2246 }
2247 #ifdef HAVE_EVENTFD_SUPPORT
2248 done_fd = eventfd(0, EFD_NONBLOCK);
2249 if (done_fd < 0) {
2250 pr_err("Failed to create wakeup eventfd, error: %m\n");
2251 status = -1;
2252 goto out_delete_session;
2253 }
2254 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2255 if (err < 0) {
2256 pr_err("Failed to add wakeup eventfd to poll list\n");
2257 status = err;
2258 goto out_delete_session;
2259 }
2260 #endif
2261
2262 session->header.env.comp_type = PERF_COMP_ZSTD;
2263 session->header.env.comp_level = rec->opts.comp_level;
2264
2265 if (rec->opts.kcore &&
2266 !record__kcore_readable(&session->machines.host)) {
2267 pr_err("ERROR: kcore is not readable.\n");
2268 return -1;
2269 }
2270
2271 if (record__init_clock(rec))
2272 return -1;
2273
2274 record__init_features(rec);
2275
2276 if (forks) {
2277 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2278 workload_exec_failed_signal);
2279 if (err < 0) {
2280 pr_err("Couldn't run the workload!\n");
2281 status = err;
2282 goto out_delete_session;
2283 }
2284 }
2285
2286
2287
2288
2289
2290
2291
2292 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2293 rec->opts.sample_id = true;
2294
2295 record__uniquify_name(rec);
2296
2297 if (record__open(rec) != 0) {
2298 err = -1;
2299 goto out_free_threads;
2300 }
2301 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2302
2303 if (rec->opts.kcore) {
2304 err = record__kcore_copy(&session->machines.host, data);
2305 if (err) {
2306 pr_err("ERROR: Failed to copy kcore\n");
2307 goto out_free_threads;
2308 }
2309 }
2310
2311 err = bpf__apply_obj_config();
2312 if (err) {
2313 char errbuf[BUFSIZ];
2314
2315 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2316 pr_err("ERROR: Apply config to BPF failed: %s\n",
2317 errbuf);
2318 goto out_free_threads;
2319 }
2320
2321
2322
2323
2324
2325 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2326 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2327 rec->tool.ordered_events = false;
2328 }
2329
2330 if (!rec->evlist->core.nr_groups)
2331 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2332
2333 if (data->is_pipe) {
2334 err = perf_header__write_pipe(fd);
2335 if (err < 0)
2336 goto out_free_threads;
2337 } else {
2338 err = perf_session__write_header(session, rec->evlist, fd, false);
2339 if (err < 0)
2340 goto out_free_threads;
2341 }
2342
2343 err = -1;
2344 if (!rec->no_buildid
2345 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2346 pr_err("Couldn't generate buildids. "
2347 "Use --no-buildid to profile anyway.\n");
2348 goto out_free_threads;
2349 }
2350
2351 err = record__setup_sb_evlist(rec);
2352 if (err)
2353 goto out_free_threads;
2354
2355 err = record__synthesize(rec, false);
2356 if (err < 0)
2357 goto out_free_threads;
2358
2359 if (rec->realtime_prio) {
2360 struct sched_param param;
2361
2362 param.sched_priority = rec->realtime_prio;
2363 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
2364 pr_err("Could not set realtime priority.\n");
2365 err = -1;
2366 goto out_free_threads;
2367 }
2368 }
2369
2370 if (record__start_threads(rec))
2371 goto out_free_threads;
2372
2373
2374
2375
2376
2377
2378 if (!target__none(&opts->target) && !opts->initial_delay)
2379 evlist__enable(rec->evlist);
2380
2381
2382
2383
2384 if (forks) {
2385 struct machine *machine = &session->machines.host;
2386 union perf_event *event;
2387 pid_t tgid;
2388
2389 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2390 if (event == NULL) {
2391 err = -ENOMEM;
2392 goto out_child;
2393 }
2394
2395
2396
2397
2398
2399
2400
2401 tgid = perf_event__synthesize_comm(tool, event,
2402 rec->evlist->workload.pid,
2403 process_synthesized_event,
2404 machine);
2405 free(event);
2406
2407 if (tgid == -1)
2408 goto out_child;
2409
2410 event = malloc(sizeof(event->namespaces) +
2411 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2412 machine->id_hdr_size);
2413 if (event == NULL) {
2414 err = -ENOMEM;
2415 goto out_child;
2416 }
2417
2418
2419
2420
2421 perf_event__synthesize_namespaces(tool, event,
2422 rec->evlist->workload.pid,
2423 tgid, process_synthesized_event,
2424 machine);
2425 free(event);
2426
2427 evlist__start_workload(rec->evlist);
2428 }
2429
2430 if (opts->initial_delay) {
2431 pr_info(EVLIST_DISABLED_MSG);
2432 if (opts->initial_delay > 0) {
2433 usleep(opts->initial_delay * USEC_PER_MSEC);
2434 evlist__enable(rec->evlist);
2435 pr_info(EVLIST_ENABLED_MSG);
2436 }
2437 }
2438
2439 trigger_ready(&auxtrace_snapshot_trigger);
2440 trigger_ready(&switch_output_trigger);
2441 perf_hooks__invoke_record_start();
2442
2443
2444
2445
2446
2447 err = write_finished_init(rec, false);
2448 if (err < 0)
2449 goto out_child;
2450
2451 for (;;) {
2452 unsigned long long hits = thread->samples;
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462 if (trigger_is_hit(&switch_output_trigger) || done || draining)
2463 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2464
2465 if (record__mmap_read_all(rec, false) < 0) {
2466 trigger_error(&auxtrace_snapshot_trigger);
2467 trigger_error(&switch_output_trigger);
2468 err = -1;
2469 goto out_child;
2470 }
2471
2472 if (auxtrace_record__snapshot_started) {
2473 auxtrace_record__snapshot_started = 0;
2474 if (!trigger_is_error(&auxtrace_snapshot_trigger))
2475 record__read_auxtrace_snapshot(rec, false);
2476 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2477 pr_err("AUX area tracing snapshot failed\n");
2478 err = -1;
2479 goto out_child;
2480 }
2481 }
2482
2483 if (trigger_is_hit(&switch_output_trigger)) {
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2494 continue;
2495 trigger_ready(&switch_output_trigger);
2496
2497
2498
2499
2500
2501
2502 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2503
2504 if (!quiet)
2505 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2506 record__waking(rec));
2507 thread->waking = 0;
2508 fd = record__switch_output(rec, false);
2509 if (fd < 0) {
2510 pr_err("Failed to switch to new file\n");
2511 trigger_error(&switch_output_trigger);
2512 err = fd;
2513 goto out_child;
2514 }
2515
2516
2517 if (rec->switch_output.time)
2518 alarm(rec->switch_output.time);
2519 }
2520
2521 if (hits == thread->samples) {
2522 if (done || draining)
2523 break;
2524 err = fdarray__poll(&thread->pollfd, -1);
2525
2526
2527
2528
2529 if (err > 0 || (err < 0 && errno == EINTR))
2530 err = 0;
2531 thread->waking++;
2532
2533 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2534 record__thread_munmap_filtered, NULL) == 0)
2535 draining = true;
2536
2537 evlist__ctlfd_update(rec->evlist,
2538 &thread->pollfd.entries[thread->ctlfd_pos]);
2539 }
2540
2541 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2542 switch (cmd) {
2543 case EVLIST_CTL_CMD_SNAPSHOT:
2544 hit_auxtrace_snapshot_trigger(rec);
2545 evlist__ctlfd_ack(rec->evlist);
2546 break;
2547 case EVLIST_CTL_CMD_STOP:
2548 done = 1;
2549 break;
2550 case EVLIST_CTL_CMD_ACK:
2551 case EVLIST_CTL_CMD_UNSUPPORTED:
2552 case EVLIST_CTL_CMD_ENABLE:
2553 case EVLIST_CTL_CMD_DISABLE:
2554 case EVLIST_CTL_CMD_EVLIST:
2555 case EVLIST_CTL_CMD_PING:
2556 default:
2557 break;
2558 }
2559 }
2560
2561
2562
2563
2564
2565
2566 if (done && !disabled && !target__none(&opts->target)) {
2567 trigger_off(&auxtrace_snapshot_trigger);
2568 evlist__disable(rec->evlist);
2569 disabled = true;
2570 }
2571 }
2572
2573 trigger_off(&auxtrace_snapshot_trigger);
2574 trigger_off(&switch_output_trigger);
2575
2576 if (opts->auxtrace_snapshot_on_exit)
2577 record__auxtrace_snapshot_exit(rec);
2578
2579 if (forks && workload_exec_errno) {
2580 char msg[STRERR_BUFSIZE], strevsels[2048];
2581 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2582
2583 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2584
2585 pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2586 strevsels, argv[0], emsg);
2587 err = -1;
2588 goto out_child;
2589 }
2590
2591 if (!quiet)
2592 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2593 record__waking(rec));
2594
2595 write_finished_init(rec, true);
2596
2597 if (target__none(&rec->opts.target))
2598 record__synthesize_workload(rec, true);
2599
2600 out_child:
2601 record__stop_threads(rec);
2602 record__mmap_read_all(rec, true);
2603 out_free_threads:
2604 record__free_thread_data(rec);
2605 evlist__finalize_ctlfd(rec->evlist);
2606 record__aio_mmap_read_sync(rec);
2607
2608 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2609 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2610 session->header.env.comp_ratio = ratio + 0.5;
2611 }
2612
2613 if (forks) {
2614 int exit_status;
2615
2616 if (!child_finished)
2617 kill(rec->evlist->workload.pid, SIGTERM);
2618
2619 wait(&exit_status);
2620
2621 if (err < 0)
2622 status = err;
2623 else if (WIFEXITED(exit_status))
2624 status = WEXITSTATUS(exit_status);
2625 else if (WIFSIGNALED(exit_status))
2626 signr = WTERMSIG(exit_status);
2627 } else
2628 status = err;
2629
2630 if (rec->off_cpu)
2631 rec->bytes_written += off_cpu_write(rec->session);
2632
2633 record__synthesize(rec, true);
2634
2635 rec->samples = 0;
2636
2637 if (!err) {
2638 if (!rec->timestamp_filename) {
2639 record__finish_output(rec);
2640 } else {
2641 fd = record__switch_output(rec, true);
2642 if (fd < 0) {
2643 status = fd;
2644 goto out_delete_session;
2645 }
2646 }
2647 }
2648
2649 perf_hooks__invoke_record_end();
2650
2651 if (!err && !quiet) {
2652 char samples[128];
2653 const char *postfix = rec->timestamp_filename ?
2654 ".<timestamp>" : "";
2655
2656 if (rec->samples && !rec->opts.full_auxtrace)
2657 scnprintf(samples, sizeof(samples),
2658 " (%" PRIu64 " samples)", rec->samples);
2659 else
2660 samples[0] = '\0';
2661
2662 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2663 perf_data__size(data) / 1024.0 / 1024.0,
2664 data->path, postfix, samples);
2665 if (ratio) {
2666 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2667 rec->session->bytes_transferred / 1024.0 / 1024.0,
2668 ratio);
2669 }
2670 fprintf(stderr, " ]\n");
2671 }
2672
2673 out_delete_session:
2674 #ifdef HAVE_EVENTFD_SUPPORT
2675 if (done_fd >= 0)
2676 close(done_fd);
2677 #endif
2678 zstd_fini(&session->zstd_data);
2679 perf_session__delete(session);
2680
2681 if (!opts->no_bpf_event)
2682 evlist__stop_sb_thread(rec->sb_evlist);
2683 return status;
2684 }
2685
2686 static void callchain_debug(struct callchain_param *callchain)
2687 {
2688 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2689
2690 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2691
2692 if (callchain->record_mode == CALLCHAIN_DWARF)
2693 pr_debug("callchain: stack dump size %d\n",
2694 callchain->dump_size);
2695 }
2696
2697 int record_opts__parse_callchain(struct record_opts *record,
2698 struct callchain_param *callchain,
2699 const char *arg, bool unset)
2700 {
2701 int ret;
2702 callchain->enabled = !unset;
2703
2704
2705 if (unset) {
2706 callchain->record_mode = CALLCHAIN_NONE;
2707 pr_debug("callchain: disabled\n");
2708 return 0;
2709 }
2710
2711 ret = parse_callchain_record_opt(arg, callchain);
2712 if (!ret) {
2713
2714 if (callchain->record_mode == CALLCHAIN_DWARF)
2715 record->sample_address = true;
2716 callchain_debug(callchain);
2717 }
2718
2719 return ret;
2720 }
2721
2722 int record_parse_callchain_opt(const struct option *opt,
2723 const char *arg,
2724 int unset)
2725 {
2726 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2727 }
2728
2729 int record_callchain_opt(const struct option *opt,
2730 const char *arg __maybe_unused,
2731 int unset __maybe_unused)
2732 {
2733 struct callchain_param *callchain = opt->value;
2734
2735 callchain->enabled = true;
2736
2737 if (callchain->record_mode == CALLCHAIN_NONE)
2738 callchain->record_mode = CALLCHAIN_FP;
2739
2740 callchain_debug(callchain);
2741 return 0;
2742 }
2743
2744 static int perf_record_config(const char *var, const char *value, void *cb)
2745 {
2746 struct record *rec = cb;
2747
2748 if (!strcmp(var, "record.build-id")) {
2749 if (!strcmp(value, "cache"))
2750 rec->no_buildid_cache = false;
2751 else if (!strcmp(value, "no-cache"))
2752 rec->no_buildid_cache = true;
2753 else if (!strcmp(value, "skip"))
2754 rec->no_buildid = true;
2755 else if (!strcmp(value, "mmap"))
2756 rec->buildid_mmap = true;
2757 else
2758 return -1;
2759 return 0;
2760 }
2761 if (!strcmp(var, "record.call-graph")) {
2762 var = "call-graph.record-mode";
2763 return perf_default_config(var, value, cb);
2764 }
2765 #ifdef HAVE_AIO_SUPPORT
2766 if (!strcmp(var, "record.aio")) {
2767 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2768 if (!rec->opts.nr_cblocks)
2769 rec->opts.nr_cblocks = nr_cblocks_default;
2770 }
2771 #endif
2772 if (!strcmp(var, "record.debuginfod")) {
2773 rec->debuginfod.urls = strdup(value);
2774 if (!rec->debuginfod.urls)
2775 return -ENOMEM;
2776 rec->debuginfod.set = true;
2777 }
2778
2779 return 0;
2780 }
2781
2782
2783 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2784 {
2785 struct record_opts *opts = (struct record_opts *)opt->value;
2786
2787 if (unset || !str)
2788 return 0;
2789
2790 if (!strcasecmp(str, "node"))
2791 opts->affinity = PERF_AFFINITY_NODE;
2792 else if (!strcasecmp(str, "cpu"))
2793 opts->affinity = PERF_AFFINITY_CPU;
2794
2795 return 0;
2796 }
2797
2798 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2799 {
2800 mask->nbits = nr_bits;
2801 mask->bits = bitmap_zalloc(mask->nbits);
2802 if (!mask->bits)
2803 return -ENOMEM;
2804
2805 return 0;
2806 }
2807
2808 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2809 {
2810 bitmap_free(mask->bits);
2811 mask->nbits = 0;
2812 }
2813
2814 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2815 {
2816 int ret;
2817
2818 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2819 if (ret) {
2820 mask->affinity.bits = NULL;
2821 return ret;
2822 }
2823
2824 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2825 if (ret) {
2826 record__mmap_cpu_mask_free(&mask->maps);
2827 mask->maps.bits = NULL;
2828 }
2829
2830 return ret;
2831 }
2832
2833 static void record__thread_mask_free(struct thread_mask *mask)
2834 {
2835 record__mmap_cpu_mask_free(&mask->maps);
2836 record__mmap_cpu_mask_free(&mask->affinity);
2837 }
2838
2839 static int record__parse_threads(const struct option *opt, const char *str, int unset)
2840 {
2841 int s;
2842 struct record_opts *opts = opt->value;
2843
2844 if (unset || !str || !strlen(str)) {
2845 opts->threads_spec = THREAD_SPEC__CPU;
2846 } else {
2847 for (s = 1; s < THREAD_SPEC__MAX; s++) {
2848 if (s == THREAD_SPEC__USER) {
2849 opts->threads_user_spec = strdup(str);
2850 if (!opts->threads_user_spec)
2851 return -ENOMEM;
2852 opts->threads_spec = THREAD_SPEC__USER;
2853 break;
2854 }
2855 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
2856 opts->threads_spec = s;
2857 break;
2858 }
2859 }
2860 }
2861
2862 if (opts->threads_spec == THREAD_SPEC__USER)
2863 pr_debug("threads_spec: %s\n", opts->threads_user_spec);
2864 else
2865 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
2866
2867 return 0;
2868 }
2869
2870 static int parse_output_max_size(const struct option *opt,
2871 const char *str, int unset)
2872 {
2873 unsigned long *s = (unsigned long *)opt->value;
2874 static struct parse_tag tags_size[] = {
2875 { .tag = 'B', .mult = 1 },
2876 { .tag = 'K', .mult = 1 << 10 },
2877 { .tag = 'M', .mult = 1 << 20 },
2878 { .tag = 'G', .mult = 1 << 30 },
2879 { .tag = 0 },
2880 };
2881 unsigned long val;
2882
2883 if (unset) {
2884 *s = 0;
2885 return 0;
2886 }
2887
2888 val = parse_tag_value(str, tags_size);
2889 if (val != (unsigned long) -1) {
2890 *s = val;
2891 return 0;
2892 }
2893
2894 return -1;
2895 }
2896
2897 static int record__parse_mmap_pages(const struct option *opt,
2898 const char *str,
2899 int unset __maybe_unused)
2900 {
2901 struct record_opts *opts = opt->value;
2902 char *s, *p;
2903 unsigned int mmap_pages;
2904 int ret;
2905
2906 if (!str)
2907 return -EINVAL;
2908
2909 s = strdup(str);
2910 if (!s)
2911 return -ENOMEM;
2912
2913 p = strchr(s, ',');
2914 if (p)
2915 *p = '\0';
2916
2917 if (*s) {
2918 ret = __evlist__parse_mmap_pages(&mmap_pages, s);
2919 if (ret)
2920 goto out_free;
2921 opts->mmap_pages = mmap_pages;
2922 }
2923
2924 if (!p) {
2925 ret = 0;
2926 goto out_free;
2927 }
2928
2929 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
2930 if (ret)
2931 goto out_free;
2932
2933 opts->auxtrace_mmap_pages = mmap_pages;
2934
2935 out_free:
2936 free(s);
2937 return ret;
2938 }
2939
2940 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
2941 {
2942 }
2943
2944 static int parse_control_option(const struct option *opt,
2945 const char *str,
2946 int unset __maybe_unused)
2947 {
2948 struct record_opts *opts = opt->value;
2949
2950 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
2951 }
2952
2953 static void switch_output_size_warn(struct record *rec)
2954 {
2955 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2956 struct switch_output *s = &rec->switch_output;
2957
2958 wakeup_size /= 2;
2959
2960 if (s->size < wakeup_size) {
2961 char buf[100];
2962
2963 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2964 pr_warning("WARNING: switch-output data size lower than "
2965 "wakeup kernel buffer size (%s) "
2966 "expect bigger perf.data sizes\n", buf);
2967 }
2968 }
2969
2970 static int switch_output_setup(struct record *rec)
2971 {
2972 struct switch_output *s = &rec->switch_output;
2973 static struct parse_tag tags_size[] = {
2974 { .tag = 'B', .mult = 1 },
2975 { .tag = 'K', .mult = 1 << 10 },
2976 { .tag = 'M', .mult = 1 << 20 },
2977 { .tag = 'G', .mult = 1 << 30 },
2978 { .tag = 0 },
2979 };
2980 static struct parse_tag tags_time[] = {
2981 { .tag = 's', .mult = 1 },
2982 { .tag = 'm', .mult = 60 },
2983 { .tag = 'h', .mult = 60*60 },
2984 { .tag = 'd', .mult = 60*60*24 },
2985 { .tag = 0 },
2986 };
2987 unsigned long val;
2988
2989
2990
2991
2992
2993
2994 if (rec->switch_output_event_set) {
2995 if (record__threads_enabled(rec)) {
2996 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
2997 return 0;
2998 }
2999 goto do_signal;
3000 }
3001
3002 if (!s->set)
3003 return 0;
3004
3005 if (record__threads_enabled(rec)) {
3006 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3007 return 0;
3008 }
3009
3010 if (!strcmp(s->str, "signal")) {
3011 do_signal:
3012 s->signal = true;
3013 pr_debug("switch-output with SIGUSR2 signal\n");
3014 goto enabled;
3015 }
3016
3017 val = parse_tag_value(s->str, tags_size);
3018 if (val != (unsigned long) -1) {
3019 s->size = val;
3020 pr_debug("switch-output with %s size threshold\n", s->str);
3021 goto enabled;
3022 }
3023
3024 val = parse_tag_value(s->str, tags_time);
3025 if (val != (unsigned long) -1) {
3026 s->time = val;
3027 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3028 s->str, s->time);
3029 goto enabled;
3030 }
3031
3032 return -1;
3033
3034 enabled:
3035 rec->timestamp_filename = true;
3036 s->enabled = true;
3037
3038 if (s->size && !rec->opts.no_buffering)
3039 switch_output_size_warn(rec);
3040
3041 return 0;
3042 }
3043
3044 static const char * const __record_usage[] = {
3045 "perf record [<options>] [<command>]",
3046 "perf record [<options>] -- <command> [<options>]",
3047 NULL
3048 };
3049 const char * const *record_usage = __record_usage;
3050
3051 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3052 struct perf_sample *sample, struct machine *machine)
3053 {
3054
3055
3056
3057
3058 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3059 return 0;
3060 return perf_event__process_mmap(tool, event, sample, machine);
3061 }
3062
3063 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3064 struct perf_sample *sample, struct machine *machine)
3065 {
3066
3067
3068
3069
3070 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3071 return 0;
3072
3073 return perf_event__process_mmap2(tool, event, sample, machine);
3074 }
3075
3076 static int process_timestamp_boundary(struct perf_tool *tool,
3077 union perf_event *event __maybe_unused,
3078 struct perf_sample *sample,
3079 struct machine *machine __maybe_unused)
3080 {
3081 struct record *rec = container_of(tool, struct record, tool);
3082
3083 set_timestamp_boundary(rec, sample->time);
3084 return 0;
3085 }
3086
3087 static int parse_record_synth_option(const struct option *opt,
3088 const char *str,
3089 int unset __maybe_unused)
3090 {
3091 struct record_opts *opts = opt->value;
3092 char *p = strdup(str);
3093
3094 if (p == NULL)
3095 return -1;
3096
3097 opts->synth = parse_synth_opt(p);
3098 free(p);
3099
3100 if (opts->synth < 0) {
3101 pr_err("Invalid synth option: %s\n", str);
3102 return -1;
3103 }
3104 return 0;
3105 }
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117 static struct record record = {
3118 .opts = {
3119 .sample_time = true,
3120 .mmap_pages = UINT_MAX,
3121 .user_freq = UINT_MAX,
3122 .user_interval = ULLONG_MAX,
3123 .freq = 4000,
3124 .target = {
3125 .uses_mmap = true,
3126 .default_per_cpu = true,
3127 },
3128 .mmap_flush = MMAP_FLUSH_DEFAULT,
3129 .nr_threads_synthesize = 1,
3130 .ctl_fd = -1,
3131 .ctl_fd_ack = -1,
3132 .synth = PERF_SYNTH_ALL,
3133 },
3134 .tool = {
3135 .sample = process_sample_event,
3136 .fork = perf_event__process_fork,
3137 .exit = perf_event__process_exit,
3138 .comm = perf_event__process_comm,
3139 .namespaces = perf_event__process_namespaces,
3140 .mmap = build_id__process_mmap,
3141 .mmap2 = build_id__process_mmap2,
3142 .itrace_start = process_timestamp_boundary,
3143 .aux = process_timestamp_boundary,
3144 .ordered_events = true,
3145 },
3146 };
3147
3148 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3149 "\n\t\t\t\tDefault: fp";
3150
3151 static bool dry_run;
3152
3153
3154
3155
3156
3157
3158
3159
3160 static struct option __record_options[] = {
3161 OPT_CALLBACK('e', "event", &record.evlist, "event",
3162 "event selector. use 'perf list' to list available events",
3163 parse_events_option),
3164 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3165 "event filter", parse_filter),
3166 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3167 NULL, "don't record events from perf itself",
3168 exclude_perf),
3169 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3170 "record events on existing process id"),
3171 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3172 "record events on existing thread id"),
3173 OPT_INTEGER('r', "realtime", &record.realtime_prio,
3174 "collect data with this RT SCHED_FIFO priority"),
3175 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3176 "collect data without buffering"),
3177 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3178 "collect raw sample records from all opened counters"),
3179 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3180 "system-wide collection from all CPUs"),
3181 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3182 "list of cpus to monitor"),
3183 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3184 OPT_STRING('o', "output", &record.data.path, "file",
3185 "output file name"),
3186 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3187 &record.opts.no_inherit_set,
3188 "child tasks do not inherit counters"),
3189 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3190 "synthesize non-sample events at the end of output"),
3191 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3192 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3193 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3194 "Fail if the specified frequency can't be used"),
3195 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3196 "profile at this frequency",
3197 record__parse_freq),
3198 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3199 "number of mmap data pages and AUX area tracing mmap pages",
3200 record__parse_mmap_pages),
3201 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3202 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3203 record__mmap_flush_parse),
3204 OPT_BOOLEAN(0, "group", &record.opts.group,
3205 "put the counters into a counter group"),
3206 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3207 NULL, "enables call-graph recording" ,
3208 &record_callchain_opt),
3209 OPT_CALLBACK(0, "call-graph", &record.opts,
3210 "record_mode[,record_size]", record_callchain_help,
3211 &record_parse_callchain_opt),
3212 OPT_INCR('v', "verbose", &verbose,
3213 "be more verbose (show counter open errors, etc)"),
3214 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
3215 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3216 "per thread counts"),
3217 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3218 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3219 "Record the sample physical addresses"),
3220 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3221 "Record the sampled data address data page size"),
3222 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3223 "Record the sampled code address (ip) page size"),
3224 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3225 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3226 "Record the sample identifier"),
3227 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3228 &record.opts.sample_time_set,
3229 "Record the sample timestamps"),
3230 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3231 "Record the sample period"),
3232 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3233 "don't sample"),
3234 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3235 &record.no_buildid_cache_set,
3236 "do not update the buildid cache"),
3237 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3238 &record.no_buildid_set,
3239 "do not collect buildids in perf.data"),
3240 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3241 "monitor event in cgroup name only",
3242 parse_cgroups),
3243 OPT_INTEGER('D', "delay", &record.opts.initial_delay,
3244 "ms to wait before starting measurement after program start (-1: start with events disabled)"),
3245 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3246 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3247 "user to profile"),
3248
3249 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3250 "branch any", "sample any taken branches",
3251 parse_branch_stack),
3252
3253 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3254 "branch filter mask", "branch stack filter modes",
3255 parse_branch_stack),
3256 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3257 "sample by weight (on special events only)"),
3258 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3259 "sample transaction flags (special events only)"),
3260 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3261 "use per-thread mmaps"),
3262 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3263 "sample selected machine registers on interrupt,"
3264 " use '-I?' to list register names", parse_intr_regs),
3265 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3266 "sample selected machine registers on interrupt,"
3267 " use '--user-regs=?' to list register names", parse_user_regs),
3268 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3269 "Record running/enabled time of read (:S) events"),
3270 OPT_CALLBACK('k', "clockid", &record.opts,
3271 "clockid", "clockid to use for events, see clock_gettime()",
3272 parse_clockid),
3273 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3274 "opts", "AUX area tracing Snapshot Mode", ""),
3275 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3276 "opts", "sample AUX area", ""),
3277 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3278 "per thread proc mmap processing timeout in ms"),
3279 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3280 "Record namespaces events"),
3281 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3282 "Record cgroup events"),
3283 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3284 &record.opts.record_switch_events_set,
3285 "Record context switch events"),
3286 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3287 "Configure all used events to run in kernel space.",
3288 PARSE_OPT_EXCLUSIVE),
3289 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3290 "Configure all used events to run in user space.",
3291 PARSE_OPT_EXCLUSIVE),
3292 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3293 "collect kernel callchains"),
3294 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3295 "collect user callchains"),
3296 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
3297 "clang binary to use for compiling BPF scriptlets"),
3298 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
3299 "options passed to clang when compiling BPF scriptlets"),
3300 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3301 "file", "vmlinux pathname"),
3302 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3303 "Record build-id of all DSOs regardless of hits"),
3304 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3305 "Record build-id in map events"),
3306 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3307 "append timestamp to output filename"),
3308 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3309 "Record timestamp boundary (time of first/last samples)"),
3310 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3311 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3312 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3313 "signal"),
3314 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
3315 "switch output event selector. use 'perf list' to list available events",
3316 parse_events_option_new_evlist),
3317 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3318 "Limit number of switch output generated files"),
3319 OPT_BOOLEAN(0, "dry-run", &dry_run,
3320 "Parse options then exit"),
3321 #ifdef HAVE_AIO_SUPPORT
3322 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3323 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3324 record__aio_parse),
3325 #endif
3326 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3327 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3328 record__parse_affinity),
3329 #ifdef HAVE_ZSTD_SUPPORT
3330 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3331 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3332 record__parse_comp_level),
3333 #endif
3334 OPT_CALLBACK(0, "max-size", &record.output_max_size,
3335 "size", "Limit the maximum size of the output file", parse_output_max_size),
3336 OPT_UINTEGER(0, "num-thread-synthesize",
3337 &record.opts.nr_threads_synthesize,
3338 "number of threads to run for event synthesis"),
3339 #ifdef HAVE_LIBPFM
3340 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3341 "libpfm4 event selector. use 'perf list' to list available events",
3342 parse_libpfm_events_option),
3343 #endif
3344 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3345 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3346 "\t\t\t 'snapshot': AUX area tracing snapshot).\n"
3347 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3348 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3349 parse_control_option),
3350 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3351 "Fine-tune event synthesis: default=all", parse_record_synth_option),
3352 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3353 &record.debuginfod.set, "debuginfod urls",
3354 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3355 "system"),
3356 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3357 "write collected trace data into several data files using parallel threads",
3358 record__parse_threads),
3359 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3360 OPT_END()
3361 };
3362
3363 struct option *record_options = __record_options;
3364
3365 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3366 {
3367 struct perf_cpu cpu;
3368 int idx;
3369
3370 if (cpu_map__is_dummy(cpus))
3371 return 0;
3372
3373 perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
3374 if (cpu.cpu == -1)
3375 continue;
3376
3377 if ((unsigned long)cpu.cpu > mask->nbits)
3378 return -ENODEV;
3379 set_bit(cpu.cpu, mask->bits);
3380 }
3381
3382 return 0;
3383 }
3384
3385 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3386 {
3387 struct perf_cpu_map *cpus;
3388
3389 cpus = perf_cpu_map__new(mask_spec);
3390 if (!cpus)
3391 return -ENOMEM;
3392
3393 bitmap_zero(mask->bits, mask->nbits);
3394 if (record__mmap_cpu_mask_init(mask, cpus))
3395 return -ENODEV;
3396
3397 perf_cpu_map__put(cpus);
3398
3399 return 0;
3400 }
3401
3402 static void record__free_thread_masks(struct record *rec, int nr_threads)
3403 {
3404 int t;
3405
3406 if (rec->thread_masks)
3407 for (t = 0; t < nr_threads; t++)
3408 record__thread_mask_free(&rec->thread_masks[t]);
3409
3410 zfree(&rec->thread_masks);
3411 }
3412
3413 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3414 {
3415 int t, ret;
3416
3417 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3418 if (!rec->thread_masks) {
3419 pr_err("Failed to allocate thread masks\n");
3420 return -ENOMEM;
3421 }
3422
3423 for (t = 0; t < nr_threads; t++) {
3424 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3425 if (ret) {
3426 pr_err("Failed to allocate thread masks[%d]\n", t);
3427 goto out_free;
3428 }
3429 }
3430
3431 return 0;
3432
3433 out_free:
3434 record__free_thread_masks(rec, nr_threads);
3435
3436 return ret;
3437 }
3438
3439 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3440 {
3441 int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3442
3443 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3444 if (ret)
3445 return ret;
3446
3447 rec->nr_threads = nr_cpus;
3448 pr_debug("nr_threads: %d\n", rec->nr_threads);
3449
3450 for (t = 0; t < rec->nr_threads; t++) {
3451 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3452 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3453 if (verbose) {
3454 pr_debug("thread_masks[%d]: ", t);
3455 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3456 pr_debug("thread_masks[%d]: ", t);
3457 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3458 }
3459 }
3460
3461 return 0;
3462 }
3463
3464 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3465 const char **maps_spec, const char **affinity_spec,
3466 u32 nr_spec)
3467 {
3468 u32 s;
3469 int ret = 0, t = 0;
3470 struct mmap_cpu_mask cpus_mask;
3471 struct thread_mask thread_mask, full_mask, *thread_masks;
3472
3473 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3474 if (ret) {
3475 pr_err("Failed to allocate CPUs mask\n");
3476 return ret;
3477 }
3478
3479 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3480 if (ret) {
3481 pr_err("Failed to init cpu mask\n");
3482 goto out_free_cpu_mask;
3483 }
3484
3485 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3486 if (ret) {
3487 pr_err("Failed to allocate full mask\n");
3488 goto out_free_cpu_mask;
3489 }
3490
3491 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3492 if (ret) {
3493 pr_err("Failed to allocate thread mask\n");
3494 goto out_free_full_and_cpu_masks;
3495 }
3496
3497 for (s = 0; s < nr_spec; s++) {
3498 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3499 if (ret) {
3500 pr_err("Failed to initialize maps thread mask\n");
3501 goto out_free;
3502 }
3503 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3504 if (ret) {
3505 pr_err("Failed to initialize affinity thread mask\n");
3506 goto out_free;
3507 }
3508
3509
3510 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3511 cpus_mask.bits, thread_mask.maps.nbits)) {
3512 pr_err("Empty maps mask: %s\n", maps_spec[s]);
3513 ret = -EINVAL;
3514 goto out_free;
3515 }
3516 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3517 cpus_mask.bits, thread_mask.affinity.nbits)) {
3518 pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3519 ret = -EINVAL;
3520 goto out_free;
3521 }
3522
3523
3524 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3525 thread_mask.maps.nbits)) {
3526 pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3527 ret = -EINVAL;
3528 goto out_free;
3529 }
3530 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3531 thread_mask.affinity.nbits)) {
3532 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3533 ret = -EINVAL;
3534 goto out_free;
3535 }
3536
3537 bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3538 thread_mask.maps.bits, full_mask.maps.nbits);
3539 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3540 thread_mask.affinity.bits, full_mask.maps.nbits);
3541
3542 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3543 if (!thread_masks) {
3544 pr_err("Failed to reallocate thread masks\n");
3545 ret = -ENOMEM;
3546 goto out_free;
3547 }
3548 rec->thread_masks = thread_masks;
3549 rec->thread_masks[t] = thread_mask;
3550 if (verbose) {
3551 pr_debug("thread_masks[%d]: ", t);
3552 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3553 pr_debug("thread_masks[%d]: ", t);
3554 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3555 }
3556 t++;
3557 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3558 if (ret) {
3559 pr_err("Failed to allocate thread mask\n");
3560 goto out_free_full_and_cpu_masks;
3561 }
3562 }
3563 rec->nr_threads = t;
3564 pr_debug("nr_threads: %d\n", rec->nr_threads);
3565 if (!rec->nr_threads)
3566 ret = -EINVAL;
3567
3568 out_free:
3569 record__thread_mask_free(&thread_mask);
3570 out_free_full_and_cpu_masks:
3571 record__thread_mask_free(&full_mask);
3572 out_free_cpu_mask:
3573 record__mmap_cpu_mask_free(&cpus_mask);
3574
3575 return ret;
3576 }
3577
3578 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3579 {
3580 int ret;
3581 struct cpu_topology *topo;
3582
3583 topo = cpu_topology__new();
3584 if (!topo) {
3585 pr_err("Failed to allocate CPU topology\n");
3586 return -ENOMEM;
3587 }
3588
3589 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3590 topo->core_cpus_list, topo->core_cpus_lists);
3591 cpu_topology__delete(topo);
3592
3593 return ret;
3594 }
3595
3596 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3597 {
3598 int ret;
3599 struct cpu_topology *topo;
3600
3601 topo = cpu_topology__new();
3602 if (!topo) {
3603 pr_err("Failed to allocate CPU topology\n");
3604 return -ENOMEM;
3605 }
3606
3607 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3608 topo->package_cpus_list, topo->package_cpus_lists);
3609 cpu_topology__delete(topo);
3610
3611 return ret;
3612 }
3613
3614 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3615 {
3616 u32 s;
3617 int ret;
3618 const char **spec;
3619 struct numa_topology *topo;
3620
3621 topo = numa_topology__new();
3622 if (!topo) {
3623 pr_err("Failed to allocate NUMA topology\n");
3624 return -ENOMEM;
3625 }
3626
3627 spec = zalloc(topo->nr * sizeof(char *));
3628 if (!spec) {
3629 pr_err("Failed to allocate NUMA spec\n");
3630 ret = -ENOMEM;
3631 goto out_delete_topo;
3632 }
3633 for (s = 0; s < topo->nr; s++)
3634 spec[s] = topo->nodes[s].cpus;
3635
3636 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3637
3638 zfree(&spec);
3639
3640 out_delete_topo:
3641 numa_topology__delete(topo);
3642
3643 return ret;
3644 }
3645
3646 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3647 {
3648 int t, ret;
3649 u32 s, nr_spec = 0;
3650 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3651 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3652
3653 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3654 spec = strtok_r(user_spec, ":", &spec_ptr);
3655 if (spec == NULL)
3656 break;
3657 pr_debug2("threads_spec[%d]: %s\n", t, spec);
3658 mask = strtok_r(spec, "/", &mask_ptr);
3659 if (mask == NULL)
3660 break;
3661 pr_debug2(" maps mask: %s\n", mask);
3662 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3663 if (!tmp_spec) {
3664 pr_err("Failed to reallocate maps spec\n");
3665 ret = -ENOMEM;
3666 goto out_free;
3667 }
3668 maps_spec = tmp_spec;
3669 maps_spec[nr_spec] = dup_mask = strdup(mask);
3670 if (!maps_spec[nr_spec]) {
3671 pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3672 ret = -ENOMEM;
3673 goto out_free;
3674 }
3675 mask = strtok_r(NULL, "/", &mask_ptr);
3676 if (mask == NULL) {
3677 pr_err("Invalid thread maps or affinity specs\n");
3678 ret = -EINVAL;
3679 goto out_free;
3680 }
3681 pr_debug2(" affinity mask: %s\n", mask);
3682 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3683 if (!tmp_spec) {
3684 pr_err("Failed to reallocate affinity spec\n");
3685 ret = -ENOMEM;
3686 goto out_free;
3687 }
3688 affinity_spec = tmp_spec;
3689 affinity_spec[nr_spec] = strdup(mask);
3690 if (!affinity_spec[nr_spec]) {
3691 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3692 ret = -ENOMEM;
3693 goto out_free;
3694 }
3695 dup_mask = NULL;
3696 nr_spec++;
3697 }
3698
3699 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3700 (const char **)affinity_spec, nr_spec);
3701
3702 out_free:
3703 free(dup_mask);
3704 for (s = 0; s < nr_spec; s++) {
3705 if (maps_spec)
3706 free(maps_spec[s]);
3707 if (affinity_spec)
3708 free(affinity_spec[s]);
3709 }
3710 free(affinity_spec);
3711 free(maps_spec);
3712
3713 return ret;
3714 }
3715
3716 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3717 {
3718 int ret;
3719
3720 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3721 if (ret)
3722 return ret;
3723
3724 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
3725 return -ENODEV;
3726
3727 rec->nr_threads = 1;
3728
3729 return 0;
3730 }
3731
3732 static int record__init_thread_masks(struct record *rec)
3733 {
3734 int ret = 0;
3735 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3736
3737 if (!record__threads_enabled(rec))
3738 return record__init_thread_default_masks(rec, cpus);
3739
3740 if (evlist__per_thread(rec->evlist)) {
3741 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3742 return -EINVAL;
3743 }
3744
3745 switch (rec->opts.threads_spec) {
3746 case THREAD_SPEC__CPU:
3747 ret = record__init_thread_cpu_masks(rec, cpus);
3748 break;
3749 case THREAD_SPEC__CORE:
3750 ret = record__init_thread_core_masks(rec, cpus);
3751 break;
3752 case THREAD_SPEC__PACKAGE:
3753 ret = record__init_thread_package_masks(rec, cpus);
3754 break;
3755 case THREAD_SPEC__NUMA:
3756 ret = record__init_thread_numa_masks(rec, cpus);
3757 break;
3758 case THREAD_SPEC__USER:
3759 ret = record__init_thread_user_masks(rec, cpus);
3760 break;
3761 default:
3762 break;
3763 }
3764
3765 return ret;
3766 }
3767
3768 int cmd_record(int argc, const char **argv)
3769 {
3770 int err;
3771 struct record *rec = &record;
3772 char errbuf[BUFSIZ];
3773
3774 setlocale(LC_ALL, "");
3775
3776 #ifndef HAVE_LIBBPF_SUPPORT
3777 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
3778 set_nobuild('\0', "clang-path", true);
3779 set_nobuild('\0', "clang-opt", true);
3780 # undef set_nobuild
3781 #endif
3782
3783 #ifndef HAVE_BPF_PROLOGUE
3784 # if !defined (HAVE_DWARF_SUPPORT)
3785 # define REASON "NO_DWARF=1"
3786 # elif !defined (HAVE_LIBBPF_SUPPORT)
3787 # define REASON "NO_LIBBPF=1"
3788 # else
3789 # define REASON "this architecture doesn't support BPF prologue"
3790 # endif
3791 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
3792 set_nobuild('\0', "vmlinux", true);
3793 # undef set_nobuild
3794 # undef REASON
3795 #endif
3796
3797 #ifndef HAVE_BPF_SKEL
3798 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3799 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3800 # undef set_nobuild
3801 #endif
3802
3803 rec->opts.affinity = PERF_AFFINITY_SYS;
3804
3805 rec->evlist = evlist__new();
3806 if (rec->evlist == NULL)
3807 return -ENOMEM;
3808
3809 err = perf_config(perf_record_config, rec);
3810 if (err)
3811 return err;
3812
3813 argc = parse_options(argc, argv, record_options, record_usage,
3814 PARSE_OPT_STOP_AT_NON_OPTION);
3815 if (quiet)
3816 perf_quiet_option();
3817
3818 err = symbol__validate_sym_arguments();
3819 if (err)
3820 return err;
3821
3822 perf_debuginfod_setup(&record.debuginfod);
3823
3824
3825 if (!argc && target__none(&rec->opts.target))
3826 rec->opts.target.system_wide = true;
3827
3828 if (nr_cgroups && !rec->opts.target.system_wide) {
3829 usage_with_options_msg(record_usage, record_options,
3830 "cgroup monitoring only available in system-wide mode");
3831
3832 }
3833
3834 if (rec->buildid_mmap) {
3835 if (!perf_can_record_build_id()) {
3836 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3837 err = -EINVAL;
3838 goto out_opts;
3839 }
3840 pr_debug("Enabling build id in mmap2 events.\n");
3841
3842 symbol_conf.buildid_mmap2 = true;
3843
3844 rec->opts.build_id = true;
3845
3846 rec->no_buildid = true;
3847 }
3848
3849 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3850 pr_err("Kernel has no cgroup sampling support.\n");
3851 err = -EINVAL;
3852 goto out_opts;
3853 }
3854
3855 if (rec->opts.kcore)
3856 rec->opts.text_poke = true;
3857
3858 if (rec->opts.kcore || record__threads_enabled(rec))
3859 rec->data.is_dir = true;
3860
3861 if (record__threads_enabled(rec)) {
3862 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3863 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
3864 goto out_opts;
3865 }
3866 if (record__aio_enabled(rec)) {
3867 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
3868 goto out_opts;
3869 }
3870 }
3871
3872 if (rec->opts.comp_level != 0) {
3873 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
3874 rec->no_buildid = true;
3875 }
3876
3877 if (rec->opts.record_switch_events &&
3878 !perf_can_record_switch_events()) {
3879 ui__error("kernel does not support recording context switch events\n");
3880 parse_options_usage(record_usage, record_options, "switch-events", 0);
3881 err = -EINVAL;
3882 goto out_opts;
3883 }
3884
3885 if (switch_output_setup(rec)) {
3886 parse_options_usage(record_usage, record_options, "switch-output", 0);
3887 err = -EINVAL;
3888 goto out_opts;
3889 }
3890
3891 if (rec->switch_output.time) {
3892 signal(SIGALRM, alarm_sig_handler);
3893 alarm(rec->switch_output.time);
3894 }
3895
3896 if (rec->switch_output.num_files) {
3897 rec->switch_output.filenames = calloc(sizeof(char *),
3898 rec->switch_output.num_files);
3899 if (!rec->switch_output.filenames) {
3900 err = -EINVAL;
3901 goto out_opts;
3902 }
3903 }
3904
3905 if (rec->timestamp_filename && record__threads_enabled(rec)) {
3906 rec->timestamp_filename = false;
3907 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
3908 }
3909
3910
3911
3912
3913
3914 symbol_conf.allow_aliases = true;
3915
3916 symbol__init(NULL);
3917
3918 err = record__auxtrace_init(rec);
3919 if (err)
3920 goto out;
3921
3922 if (dry_run)
3923 goto out;
3924
3925 err = bpf__setup_stdout(rec->evlist);
3926 if (err) {
3927 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
3928 pr_err("ERROR: Setup BPF stdout failed: %s\n",
3929 errbuf);
3930 goto out;
3931 }
3932
3933 err = -ENOMEM;
3934
3935 if (rec->no_buildid_cache || rec->no_buildid) {
3936 disable_buildid_cache();
3937 } else if (rec->switch_output.enabled) {
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953 bool disable = true;
3954
3955 if (rec->no_buildid_set && !rec->no_buildid)
3956 disable = false;
3957 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
3958 disable = false;
3959 if (disable) {
3960 rec->no_buildid = true;
3961 rec->no_buildid_cache = true;
3962 disable_buildid_cache();
3963 }
3964 }
3965
3966 if (record.opts.overwrite)
3967 record.opts.tail_synthesize = true;
3968
3969 if (rec->evlist->core.nr_entries == 0) {
3970 if (perf_pmu__has_hybrid()) {
3971 err = evlist__add_default_hybrid(rec->evlist,
3972 !record.opts.no_samples);
3973 } else {
3974 err = __evlist__add_default(rec->evlist,
3975 !record.opts.no_samples);
3976 }
3977
3978 if (err < 0) {
3979 pr_err("Not enough memory for event selector list\n");
3980 goto out;
3981 }
3982 }
3983
3984 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
3985 rec->opts.no_inherit = true;
3986
3987 err = target__validate(&rec->opts.target);
3988 if (err) {
3989 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3990 ui__warning("%s\n", errbuf);
3991 }
3992
3993 err = target__parse_uid(&rec->opts.target);
3994 if (err) {
3995 int saved_errno = errno;
3996
3997 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3998 ui__error("%s", errbuf);
3999
4000 err = -saved_errno;
4001 goto out;
4002 }
4003
4004
4005 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
4006
4007 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
4008 pr_err("failed to use cpu list %s\n",
4009 rec->opts.target.cpu_list);
4010 goto out;
4011 }
4012
4013 rec->opts.target.hybrid = perf_pmu__has_hybrid();
4014
4015 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4016 arch__add_leaf_frame_record_opts(&rec->opts);
4017
4018 err = -ENOMEM;
4019 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4020 if (rec->opts.target.pid != NULL) {
4021 pr_err("Couldn't create thread/CPU maps: %s\n",
4022 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4023 goto out;
4024 }
4025 else
4026 usage_with_options(record_usage, record_options);
4027 }
4028
4029 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4030 if (err)
4031 goto out;
4032
4033
4034
4035
4036
4037
4038 if (rec->opts.full_auxtrace)
4039 rec->buildid_all = true;
4040
4041 if (rec->opts.text_poke) {
4042 err = record__config_text_poke(rec->evlist);
4043 if (err) {
4044 pr_err("record__config_text_poke failed, error %d\n", err);
4045 goto out;
4046 }
4047 }
4048
4049 if (rec->off_cpu) {
4050 err = record__config_off_cpu(rec);
4051 if (err) {
4052 pr_err("record__config_off_cpu failed, error %d\n", err);
4053 goto out;
4054 }
4055 }
4056
4057 if (record_opts__config(&rec->opts)) {
4058 err = -EINVAL;
4059 goto out;
4060 }
4061
4062 err = record__init_thread_masks(rec);
4063 if (err) {
4064 pr_err("Failed to initialize parallel data streaming masks\n");
4065 goto out;
4066 }
4067
4068 if (rec->opts.nr_cblocks > nr_cblocks_max)
4069 rec->opts.nr_cblocks = nr_cblocks_max;
4070 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4071
4072 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4073 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4074
4075 if (rec->opts.comp_level > comp_level_max)
4076 rec->opts.comp_level = comp_level_max;
4077 pr_debug("comp level: %d\n", rec->opts.comp_level);
4078
4079 err = __cmd_record(&record, argc, argv);
4080 out:
4081 evlist__delete(rec->evlist);
4082 symbol__exit();
4083 auxtrace_record__free(rec->itr);
4084 out_opts:
4085 record__free_thread_masks(rec, rec->nr_threads);
4086 rec->nr_threads = 0;
4087 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4088 return err;
4089 }
4090
4091 static void snapshot_sig_handler(int sig __maybe_unused)
4092 {
4093 struct record *rec = &record;
4094
4095 hit_auxtrace_snapshot_trigger(rec);
4096
4097 if (switch_output_signal(rec))
4098 trigger_hit(&switch_output_trigger);
4099 }
4100
4101 static void alarm_sig_handler(int sig __maybe_unused)
4102 {
4103 struct record *rec = &record;
4104
4105 if (switch_output_time(rec))
4106 trigger_hit(&switch_output_trigger);
4107 }